diff options
Diffstat (limited to 'fs')
197 files changed, 2525 insertions, 1805 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b84c291ba1eb..d7b78d531e63 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9fs_proto_dotu(v9ses)); fid = file->private_data; if (!fid) { - fid = v9fs_fid_clone(file->f_path.dentry); + fid = v9fs_fid_clone(file_dentry(file)); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(file->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(file)); if (IS_ERR(fid)) { err = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); @@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(filp->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(filp)); if (IS_ERR(fid)) { retval = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f4645c515262..e2e7c749925a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -853,7 +853,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct p9_fid *fid, *inode_fid; struct dentry *res = NULL; - if (d_unhashed(dentry)) { + if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a34702c998f5..1b51eaa5e2dd 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -254,7 +254,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct posix_acl *pacl = NULL, *dacl = NULL; struct dentry *res = NULL; - if (d_unhashed(dentry)) { + if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f0d268b97d19..a439548de785 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -70,9 +70,13 @@ struct autofs_info { }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ -#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered +#define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered * for expiry, so RCU_walk is - * not permitted + * not permitted. If it progresses to + * actual expiry attempt, the flag is + * not cleared when EXPIRING is set - + * in that case it gets cleared only + * when it comes to clearing EXPIRING. */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 9510d8d2e9cd..b493909e7492 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, if (ino->flags & AUTOFS_INF_PENDING) goto out; if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { - ino->flags |= AUTOFS_INF_NO_RCU; + ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); spin_lock(&sbi->fs_lock); if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { ino->flags |= AUTOFS_INF_EXPIRING; - smp_mb(); - ino->flags &= ~AUTOFS_INF_NO_RCU; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; } - ino->flags &= ~AUTOFS_INF_NO_RCU; + ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; } out: spin_unlock(&sbi->fs_lock); @@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, while ((dentry = get_next_positive_subdir(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); - if (ino->flags & AUTOFS_INF_NO_RCU) + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) expired = NULL; else expired = should_expire(dentry, mnt, timeout, how); @@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, continue; } ino = autofs4_dentry_ino(expired); - ino->flags |= AUTOFS_INF_NO_RCU; + ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); spin_lock(&sbi->fs_lock); @@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, goto found; } - ino->flags &= ~AUTOFS_INF_NO_RCU; + ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; if (expired != dentry) dput(expired); spin_unlock(&sbi->fs_lock); @@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, found: pr_debug("returning %p %pd\n", expired, expired); ino->flags |= AUTOFS_INF_EXPIRING; - smp_mb(); - ino->flags &= ~AUTOFS_INF_NO_RCU; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&sbi->lookup_lock); - spin_lock(&expired->d_parent->d_lock); - spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_child); - spin_unlock(&expired->d_lock); - spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&sbi->lookup_lock); return expired; } @@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) int status; /* Block on any pending expire */ - if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))) + if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) return 0; if (rcu_walk) return -ECHILD; @@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb, ino = autofs4_dentry_ino(dentry); /* avoid rapid-fire expire attempts if expiry fails */ ino->last_used = now; - ino->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); /* avoid rapid-fire expire attempts if expiry fails */ ino->last_used = now; - ino->flags &= ~AUTOFS_INF_EXPIRING; + ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); dput(dentry); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 78bd80298528..3767f6641af1 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) */ struct inode *inode; - if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) return 0; if (d_mountpoint(dentry)) return 0; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 0146d911f468..631f1554c87b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi, set_fs(KERNEL_DS); mutex_lock(&sbi->pipe_mutex); - wr = __vfs_write(file, data, bytes, &file->f_pos); - while (bytes && wr) { + while (bytes) { + wr = __vfs_write(file, data, bytes, &file->f_pos); + if (wr <= 0) + break; data += wr; bytes -= wr; - wr = __vfs_write(file, data, bytes, &file->f_pos); } mutex_unlock(&sbi->pipe_mutex); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e158b22ef32f..a7a28110dc80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e556b7..203589311bf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/block_dev.c b/fs/block_dev.c index 71ccab1d22c6..d012be4ab977 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -493,7 +493,7 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) if (size < 0) return size; - if (!ops->direct_access) + if (!blk_queue_dax(bdev_get_queue(bdev)) || !ops->direct_access) return -EOPNOTSUPP; if ((sector + DIV_ROUND_UP(size, 512)) > part_nr_sects_read(bdev->bd_part)) @@ -1287,7 +1287,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; - if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) + if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && + blk_queue_dax(disk->queue)) bdev->bd_inode->i_flags = S_DAX; else bdev->bd_inode->i_flags = 0; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b677a6ea6001..5d5cae05818d 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1673,6 +1673,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, } bio->bi_bdev = block_ctx->dev->bdev; bio->bi_iter.bi_sector = dev_bytenr >> 9; + bio_set_op_attrs(bio, REQ_OP_READ, 0); for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], @@ -1685,7 +1686,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, "btrfsic: error, failed to add a single page!\n"); return -1; } - if (submit_bio_wait(READ, bio)) { + if (submit_bio_wait(bio)) { printk(KERN_INFO "btrfsic: read error at logical %llu dev %s!\n", block_ctx->start, block_ctx->dev->name); @@ -2206,7 +2207,7 @@ static void btrfsic_bio_end_io(struct bio *bp) block->dev_bytenr, block->mirror_num); next_block = block->next_in_same_bio; block->iodone_w_error = iodone_w_error; - if (block->submit_bio_bh_rw & REQ_FLUSH) { + if (block->submit_bio_bh_rw & REQ_PREFLUSH) { dev_state->last_flush_gen++; if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) @@ -2242,7 +2243,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) block->dev_bytenr, block->mirror_num); block->iodone_w_error = iodone_w_error; - if (block->submit_bio_bh_rw & REQ_FLUSH) { + if (block->submit_bio_bh_rw & REQ_PREFLUSH) { dev_state->last_flush_gen++; if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) @@ -2645,7 +2646,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", + indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); @@ -2855,12 +2856,12 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup( return ds; } -int btrfsic_submit_bh(int rw, struct buffer_head *bh) +int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh) { struct btrfsic_dev_state *dev_state; if (!btrfsic_is_initialized) - return submit_bh(rw, bh); + return submit_bh(op, op_flags, bh); mutex_lock(&btrfsic_mutex); /* since btrfsic_submit_bh() might also be called before @@ -2869,26 +2870,26 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) /* Only called to write the superblock (incl. FLUSH/FUA) */ if (NULL != dev_state && - (rw & WRITE) && bh->b_size > 0) { + (op == REQ_OP_WRITE) && bh->b_size > 0) { u64 dev_bytenr; dev_bytenr = 4096 * bh->b_blocknr; if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu)," - " size=%zu, data=%p, bdev=%p)\n", - rw, (unsigned long long)bh->b_blocknr, + "submit_bh(op=0x%x,0x%x, blocknr=%llu " + "(bytenr %llu), size=%zu, data=%p, bdev=%p)\n", + op, op_flags, (unsigned long long)bh->b_blocknr, dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, &bh->b_data, 1, NULL, - NULL, bh, rw); - } else if (NULL != dev_state && (rw & REQ_FLUSH)) { + NULL, bh, op_flags); + } else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", - rw, bh->b_bdev); + "submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n", + op, op_flags, bh->b_bdev); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | @@ -2906,7 +2907,7 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) block->never_written = 0; block->iodone_w_error = 0; block->flush_gen = dev_state->last_flush_gen + 1; - block->submit_bio_bh_rw = rw; + block->submit_bio_bh_rw = op_flags; block->orig_bio_bh_private = bh->b_private; block->orig_bio_bh_end_io.bh = bh->b_end_io; block->next_in_same_bio = NULL; @@ -2915,10 +2916,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) } } mutex_unlock(&btrfsic_mutex); - return submit_bh(rw, bh); + return submit_bh(op, op_flags, bh); } -static void __btrfsic_submit_bio(int rw, struct bio *bio) +static void __btrfsic_submit_bio(struct bio *bio) { struct btrfsic_dev_state *dev_state; @@ -2930,7 +2931,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) * btrfsic_mount(), this might return NULL */ dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); if (NULL != dev_state && - (rw & WRITE) && NULL != bio->bi_io_vec) { + (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) { unsigned int i; u64 dev_bytenr; u64 cur_bytenr; @@ -2942,9 +2943,9 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bio(rw=0x%x, bi_vcnt=%u," + "submit_bio(rw=%d,0x%x, bi_vcnt=%u," " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, + bio_op(bio), bio->bi_rw, bio->bi_vcnt, (unsigned long long)bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev); @@ -2975,18 +2976,18 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, bio, &bio_is_patched, - NULL, rw); + NULL, bio->bi_rw); while (i > 0) { i--; kunmap(bio->bi_io_vec[i].bv_page); } kfree(mapped_datav); - } else if (NULL != dev_state && (rw & REQ_FLUSH)) { + } else if (NULL != dev_state && (bio->bi_rw & REQ_PREFLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", - rw, bio->bi_bdev); + "submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n", + bio_op(bio), bio->bi_rw, bio->bi_bdev); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | @@ -3004,7 +3005,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) block->never_written = 0; block->iodone_w_error = 0; block->flush_gen = dev_state->last_flush_gen + 1; - block->submit_bio_bh_rw = rw; + block->submit_bio_bh_rw = bio->bi_rw; block->orig_bio_bh_private = bio->bi_private; block->orig_bio_bh_end_io.bio = bio->bi_end_io; block->next_in_same_bio = NULL; @@ -3016,16 +3017,16 @@ leave: mutex_unlock(&btrfsic_mutex); } -void btrfsic_submit_bio(int rw, struct bio *bio) +void btrfsic_submit_bio(struct bio *bio) { - __btrfsic_submit_bio(rw, bio); - submit_bio(rw, bio); + __btrfsic_submit_bio(bio); + submit_bio(bio); } -int btrfsic_submit_bio_wait(int rw, struct bio *bio) +int btrfsic_submit_bio_wait(struct bio *bio) { - __btrfsic_submit_bio(rw, bio); - return submit_bio_wait(rw, bio); + __btrfsic_submit_bio(bio); + return submit_bio_wait(bio); } int btrfsic_mount(struct btrfs_root *root, diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 13b8566c97ab..f78dff1c7e86 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h @@ -20,9 +20,9 @@ #define __BTRFS_CHECK_INTEGRITY__ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY -int btrfsic_submit_bh(int rw, struct buffer_head *bh); -void btrfsic_submit_bio(int rw, struct bio *bio); -int btrfsic_submit_bio_wait(int rw, struct bio *bio); +int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh); +void btrfsic_submit_bio(struct bio *bio); +int btrfsic_submit_bio_wait(struct bio *bio); #else #define btrfsic_submit_bh submit_bh #define btrfsic_submit_bio submit_bio diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 658c39b70fba..cefedabf0a92 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -363,6 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, kfree(cb); return -ENOMEM; } + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; atomic_inc(&cb->pending_bios); @@ -373,7 +374,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, page = compressed_pages[pg_index]; page->mapping = inode->i_mapping; if (bio->bi_iter.bi_size) - ret = io_tree->ops->merge_bio_hook(WRITE, page, 0, + ret = io_tree->ops->merge_bio_hook(page, 0, PAGE_SIZE, bio, 0); else @@ -401,13 +402,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, BUG_ON(ret); /* -ENOMEM */ } - ret = btrfs_map_bio(root, WRITE, bio, 0, 1); + ret = btrfs_map_bio(root, bio, 0, 1); BUG_ON(ret); /* -ENOMEM */ bio_put(bio); bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); BUG_ON(!bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; bio_add_page(bio, page, PAGE_SIZE, 0); @@ -431,7 +433,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, BUG_ON(ret); /* -ENOMEM */ } - ret = btrfs_map_bio(root, WRITE, bio, 0, 1); + ret = btrfs_map_bio(root, bio, 0, 1); BUG_ON(ret); /* -ENOMEM */ bio_put(bio); @@ -646,6 +648,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); if (!comp_bio) goto fail2; + bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; atomic_inc(&cb->pending_bios); @@ -656,7 +659,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) - ret = tree->ops->merge_bio_hook(READ, page, 0, + ret = tree->ops->merge_bio_hook(page, 0, PAGE_SIZE, comp_bio, 0); else @@ -687,8 +690,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size, root->sectorsize); - ret = btrfs_map_bio(root, READ, comp_bio, - mirror_num, 0); + ret = btrfs_map_bio(root, comp_bio, mirror_num, 0); if (ret) { bio->bi_error = ret; bio_endio(comp_bio); @@ -699,6 +701,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); BUG_ON(!comp_bio); + bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; @@ -717,7 +720,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, BUG_ON(ret); /* -ENOMEM */ } - ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); + ret = btrfs_map_bio(root, comp_bio, mirror_num, 0); if (ret) { bio->bi_error = ret; bio_endio(comp_bio); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 427c36b430a6..a85cf7d23309 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1373,7 +1373,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); - eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); + eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start, + eb->len); if (!eb_rewin) { btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); @@ -1454,7 +1455,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } else if (old_root) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - eb = alloc_dummy_extent_buffer(root->fs_info, logical); + eb = alloc_dummy_extent_buffer(root->fs_info, logical, + root->nodesize); } else { btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); @@ -1552,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -1783,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb, if (!err) { tmp = (struct btrfs_disk_key *)(kaddr + offset - map_start); - } else { + } else if (err == 1) { read_extent_buffer(eb, &unaligned, offset, sizeof(unaligned)); tmp = &unaligned; + } else { + return err; } } else { @@ -2510,6 +2515,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, if (!btrfs_buffer_uptodate(tmp, 0, 0)) ret = -EIO; free_extent_buffer(tmp); + } else { + ret = PTR_ERR(tmp); } return ret; } @@ -2773,8 +2780,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the @@ -2823,6 +2832,8 @@ cow_done: } ret = key_search(b, key, level, &prev_cmp, &slot); + if (ret < 0) + goto done; if (level != 0) { int dec = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 101c3cfd3f7c..b2620d1f883f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait); + unsigned long count, u64 transid, int wait); int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, @@ -3091,7 +3091,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, struct btrfs_root *new_root, struct btrfs_root *parent_root, u64 new_dirid); -int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 61561c2a3f96..d3aaabbfada0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) return 0; } -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list) +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_item *item; delayed_node = btrfs_get_delayed_node(inode); if (!delayed_node) - return; + return false; + + /* + * We can only do one readdir with delayed items at a time because of + * item->readdir_list. + */ + inode_unlock_shared(inode); + inode_lock(inode); mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); @@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, * requeue or dequeue this delayed node. */ atomic_dec(&delayed_node->refs); + + return true; } -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list) +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list) { struct btrfs_delayed_item *curr, *next; @@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list, if (atomic_dec_and_test(&curr->refs)) kfree(curr); } + + /* + * The VFS is going to do up_read(), so we need to downgrade back to a + * read lock. + */ + downgrade_write(&inode->i_rwsem); } int btrfs_should_delete_dir_index(struct list_head *del_list, diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0167853c84ae..2495b3d4075f 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); void btrfs_destroy_delayed_inodes(struct btrfs_root *root); /* Used for readdir() */ -void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, - struct list_head *del_list); -void btrfs_put_delayed_items(struct list_head *ins_list, - struct list_head *del_list); +bool btrfs_readdir_get_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); +void btrfs_readdir_put_delayed_items(struct inode *inode, + struct list_head *ins_list, + struct list_head *del_list); int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6628fca9f4ed..9a726ded2c6d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -124,7 +124,6 @@ struct async_submit_bio { struct list_head list; extent_submit_bio_hook_t *submit_bio_start; extent_submit_bio_hook_t *submit_bio_done; - int rw; int mirror_num; unsigned long bio_flags; /* @@ -727,7 +726,7 @@ static void end_workqueue_bio(struct bio *bio) fs_info = end_io_wq->info; end_io_wq->error = bio->bi_error; - if (bio->bi_rw & REQ_WRITE) { + if (bio_op(bio) == REQ_OP_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { wq = fs_info->endio_meta_write_workers; func = btrfs_endio_meta_write_helper; @@ -797,7 +796,7 @@ static void run_one_async_start(struct btrfs_work *work) int ret; async = container_of(work, struct async_submit_bio, work); - ret = async->submit_bio_start(async->inode, async->rw, async->bio, + ret = async->submit_bio_start(async->inode, async->bio, async->mirror_num, async->bio_flags, async->bio_offset); if (ret) @@ -830,9 +829,8 @@ static void run_one_async_done(struct btrfs_work *work) return; } - async->submit_bio_done(async->inode, async->rw, async->bio, - async->mirror_num, async->bio_flags, - async->bio_offset); + async->submit_bio_done(async->inode, async->bio, async->mirror_num, + async->bio_flags, async->bio_offset); } static void run_one_async_free(struct btrfs_work *work) @@ -844,7 +842,7 @@ static void run_one_async_free(struct btrfs_work *work) } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - int rw, struct bio *bio, int mirror_num, + struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, extent_submit_bio_hook_t *submit_bio_start, @@ -857,7 +855,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, return -ENOMEM; async->inode = inode; - async->rw = rw; async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_start = submit_bio_start; @@ -873,7 +870,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); - if (rw & REQ_SYNC) + if (bio->bi_rw & REQ_SYNC) btrfs_set_work_high_priority(&async->work); btrfs_queue_work(fs_info->workers, &async->work); @@ -903,9 +900,8 @@ static int btree_csum_one_bio(struct bio *bio) return ret; } -static int __btree_submit_bio_start(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, +static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, u64 bio_offset) { /* @@ -915,7 +911,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, return btree_csum_one_bio(bio); } -static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, +static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { @@ -925,7 +921,7 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); + ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 1); if (ret) { bio->bi_error = ret; bio_endio(bio); @@ -944,14 +940,14 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) return 1; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { int async = check_async_write(inode, bio_flags); int ret; - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) != REQ_OP_WRITE) { /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads @@ -960,21 +956,19 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, bio, BTRFS_WQ_ENDIO_METADATA); if (ret) goto out_w_error; - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0); } else if (!async) { ret = btree_csum_one_bio(bio); if (ret) goto out_w_error; - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0); } else { /* * kthread helpers are used to submit writes so that * checksumming can happen in parallel across all CPUs */ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, 0, + inode, bio, mirror_num, 0, bio_offset, __btree_submit_bio_start, __btree_submit_bio_done); @@ -1098,7 +1092,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr) struct inode *btree_inode = root->fs_info->btree_inode; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); @@ -1114,7 +1108,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); @@ -1147,7 +1141,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr); + return alloc_test_extent_buffer(root->fs_info, bytenr, + root->nodesize); return alloc_extent_buffer(root->fs_info, bytenr); } @@ -1171,8 +1166,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { @@ -1314,14 +1309,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(void) +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; root = btrfs_alloc_root(NULL, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, root, NULL, 1); + /* We don't use the stripesize in selftest, set it as sectorsize */ + __setup_root(nodesize, sectorsize, sectorsize, root, NULL, + BTRFS_ROOT_TREE_OBJECTID); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; @@ -1803,6 +1800,13 @@ static int cleaner_kthread(void *arg) if (btrfs_need_cleaner_sleep(root)) goto sleep; + /* + * Do not do anything if we might cause open_ctree() to block + * before we have finished mounting the filesystem. + */ + if (!root->fs_info->open) + goto sleep; + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; @@ -2517,7 +2521,6 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int max_active; - bool cleaner_mutex_locked = false; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2797,7 +2800,7 @@ int open_ctree(struct super_block *sb, nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); + stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); @@ -2996,13 +2999,6 @@ retry_root_backup: goto fail_sysfs; } - /* - * Hold the cleaner_mutex thread here so that we don't block - * for a long time on btrfs_recover_relocation. cleaner_kthread - * will wait for us to finish mounting the filesystem. - */ - mutex_lock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = true; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) @@ -3062,8 +3058,10 @@ retry_root_backup: ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; - /* We locked cleaner_mutex before creating cleaner_kthread. */ + + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(tree_root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); @@ -3071,8 +3069,6 @@ retry_root_backup: goto fail_qgroup; } } - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3186,10 +3182,6 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: - if (cleaner_mutex_locked) { - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; - } btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: @@ -3420,9 +3412,9 @@ static int write_dev_supers(struct btrfs_device *device, * to go down lazy. */ if (i == 0) - ret = btrfsic_submit_bh(WRITE_FUA, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh); else - ret = btrfsic_submit_bh(WRITE_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); if (ret) errors++; } @@ -3486,12 +3478,13 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; bio_get(bio); - btrfsic_submit_bio(WRITE_FLUSH, bio); + btrfsic_submit_bio(bio); return 0; } @@ -4130,6 +4123,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + btrfs_err(fs_info, "bytes_used is too small %llu", + btrfs_super_bytes_used(sb)); + ret = -EINVAL; + } + if (!is_power_of_2(btrfs_super_stripesize(sb))) { + btrfs_err(fs_info, "invalid stripesize %u", + btrfs_super_stripesize(sb)); + ret = -EINVAL; + } if (btrfs_super_num_devices(sb) > (1UL << 31)) printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", btrfs_super_num_devices(sb)); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8e79d0070bcf..dbf3e1aab69e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -90,7 +90,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, void btrfs_free_fs_root(struct btrfs_root *root); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -struct btrfs_root *btrfs_alloc_dummy_root(void); +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize); #endif /* @@ -122,7 +122,7 @@ void btrfs_csum_final(u32 crc, char *result); int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata); int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - int rw, struct bio *bio, int mirror_num, + struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, extent_submit_bio_hook_t *submit_bio_start, extent_submit_bio_hook_t *submit_bio_done); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a400951e8678..b480fd555774 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2042,8 +2042,13 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, struct btrfs_bio *bbio = NULL; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are discarding. + */ + btrfs_bio_counter_inc_blocked(root->fs_info); /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(root->fs_info, REQ_DISCARD, + ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD, bytenr, &num_bytes, &bbio, 0); /* Error condition is -ENOMEM */ if (!ret) { @@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, } btrfs_put_bbio(bbio); } + btrfs_bio_counter_dec(root->fs_info); if (actual_bytes) *actual_bytes = discarded_bytes; @@ -2829,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, struct async_delayed_refs { struct btrfs_root *root; + u64 transid; int count; int error; int sync; @@ -2844,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); + /* if the commit is already started, we don't need to wait here */ + if (btrfs_transaction_blocked(async->root->fs_info)) + goto done; + trans = btrfs_join_transaction(async->root); if (IS_ERR(trans)) { async->error = PTR_ERR(trans); @@ -2855,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work) * wait on delayed refs */ trans->sync = true; + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid > async->transid) + goto end; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); if (ret) async->error = ret; - +end: ret = btrfs_end_transaction(trans, async->root); if (ret && !async->error) async->error = ret; @@ -2870,7 +2886,7 @@ done: } int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait) + unsigned long count, u64 transid, int wait) { struct async_delayed_refs *async; int ret; @@ -2882,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->root = root->fs_info->tree_root; async->count = count; async->error = 0; + async->transid = transid; if (wait) async->sync = 1; else @@ -8010,8 +8027,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8038,7 +8056,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } @@ -8653,8 +8671,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, level - 1); reada = 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3cd57825c75f..27c214941004 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, bio->bi_iter.bi_size = 0; map_length = length; + /* + * Avoid races with device replace and make sure our bbio has devices + * associated to its stripes that don't go away while we are doing the + * read repair operation. + */ + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_block(fs_info, WRITE, logical, &map_length, &bbio, mirror_num); if (ret) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } @@ -2037,14 +2044,17 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, dev = bbio->stripes[mirror_num-1].dev; btrfs_put_bbio(bbio); if (!dev || !dev->bdev || !dev->writeable) { + btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } bio->bi_bdev = dev->bdev; + bio->bi_rw = WRITE_SYNC; bio_add_page(bio, page, length, pg_offset); - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { + if (btrfsic_submit_bio_wait(bio)) { /* try to remap that extent elsewhere? */ + btrfs_bio_counter_dec(fs_info); bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; @@ -2054,6 +2064,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, "read error corrected: ino %llu off %llu (dev %s sector %llu)", btrfs_ino(inode), start, rcu_str_deref(dev->name), sector); + btrfs_bio_counter_dec(fs_info); bio_put(bio); return 0; } @@ -2376,7 +2387,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, int read_mode; int ret; - BUG_ON(failed_bio->bi_rw & REQ_WRITE); + BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) @@ -2402,12 +2413,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, free_io_failure(inode, failrec); return -EIO; } + bio_set_op_attrs(bio, REQ_OP_READ, read_mode); pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", read_mode, failrec->this_mirror, failrec->in_validation); - ret = tree->ops->submit_bio_hook(inode, read_mode, bio, - failrec->this_mirror, + ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, failrec->bio_flags, 0); if (ret) { free_io_failure(inode, failrec); @@ -2713,8 +2724,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) } -static int __must_check submit_one_bio(int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int __must_check submit_one_bio(struct bio *bio, int mirror_num, + unsigned long bio_flags) { int ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -2725,33 +2736,32 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, start = page_offset(page) + bvec->bv_offset; bio->bi_private = NULL; - bio_get(bio); if (tree->ops && tree->ops->submit_bio_hook) - ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, + ret = tree->ops->submit_bio_hook(page->mapping->host, bio, mirror_num, bio_flags, start); else - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); bio_put(bio); return ret; } -static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page, +static int merge_bio(struct extent_io_tree *tree, struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags) { int ret = 0; if (tree->ops && tree->ops->merge_bio_hook) - ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio, + ret = tree->ops->merge_bio_hook(page, offset, size, bio, bio_flags); BUG_ON(ret < 0); return ret; } -static int submit_extent_page(int rw, struct extent_io_tree *tree, +static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, struct writeback_control *wbc, struct page *page, sector_t sector, size_t size, unsigned long offset, @@ -2779,10 +2789,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (prev_bio_flags != bio_flags || !contig || force_bio_submit || - merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || + merge_bio(tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { - ret = submit_one_bio(rw, bio, mirror_num, - prev_bio_flags); + ret = submit_one_bio(bio, mirror_num, prev_bio_flags); if (ret < 0) { *bio_ret = NULL; return ret; @@ -2803,6 +2812,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + bio_set_op_attrs(bio, op, op_flags); if (wbc) { wbc_init_bio(wbc, bio); wbc_account_io(wbc, page, page_size); @@ -2811,7 +2821,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret) *bio_ret = bio; else - ret = submit_one_bio(rw, bio, mirror_num, bio_flags); + ret = submit_one_bio(bio, mirror_num, bio_flags); return ret; } @@ -2875,7 +2885,7 @@ static int __do_readpage(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, int read_flags, u64 *prev_em_start) { struct inode *inode = page->mapping->host; @@ -3058,8 +3068,8 @@ static int __do_readpage(struct extent_io_tree *tree, } pnr -= page->index; - ret = submit_extent_page(rw, tree, NULL, page, - sector, disk_io_size, pg_offset, + ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL, + page, sector, disk_io_size, pg_offset, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, @@ -3090,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, u64 *prev_em_start) { struct inode *inode; @@ -3111,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw, prev_em_start); + mirror_num, bio_flags, 0, prev_em_start); put_page(pages[index]); } } @@ -3121,7 +3131,7 @@ static void __extent_readpages(struct extent_io_tree *tree, int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw, + unsigned long *bio_flags, u64 *prev_em_start) { u64 start = 0; @@ -3143,7 +3153,7 @@ static void __extent_readpages(struct extent_io_tree *tree, index - first_index, start, end, get_extent, em_cached, bio, mirror_num, bio_flags, - rw, prev_em_start); + prev_em_start); start = page_start; end = start + PAGE_SIZE - 1; first_index = index; @@ -3154,7 +3164,7 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, rw, + mirror_num, bio_flags, prev_em_start); } @@ -3162,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int read_flags) { struct inode *inode = page->mapping->host; struct btrfs_ordered_extent *ordered; @@ -3182,7 +3192,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw, NULL); + bio_flags, read_flags, NULL); return ret; } @@ -3194,9 +3204,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, int ret; ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, 0); if (bio) - ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + ret = submit_one_bio(bio, mirror_num, bio_flags); return ret; } @@ -3430,8 +3440,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, page->index, cur, end); } - ret = submit_extent_page(write_flags, tree, wbc, page, - sector, iosize, pg_offset, + ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, + page, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, 0, 0, 0, false); @@ -3470,13 +3480,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, size_t pg_offset = 0; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_SHIFT; - int write_flags; + int write_flags = 0; unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) write_flags = WRITE_SYNC; - else - write_flags = WRITE; trace___extent_writepage(page, inode, wbc); @@ -3720,7 +3728,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, u64 offset = eb->start; unsigned long i, num_pages; unsigned long bio_flags = 0; - int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; + int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -3734,9 +3742,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, clear_page_dirty_for_io(p); set_page_writeback(p); - ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, - PAGE_SIZE, 0, bdev, &epd->bio, - -1, end_bio_extent_buffer_writepage, + ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, + p, offset >> 9, PAGE_SIZE, 0, bdev, + &epd->bio, -1, + end_bio_extent_buffer_writepage, 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { @@ -4046,13 +4055,12 @@ retry: static void flush_epd_write_bio(struct extent_page_data *epd) { if (epd->bio) { - int rw = WRITE; int ret; - if (epd->sync_io) - rw = WRITE_SYNC; + bio_set_op_attrs(epd->bio, REQ_OP_WRITE, + epd->sync_io ? WRITE_SYNC : 0); - ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); + ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ epd->bio = NULL; } @@ -4179,19 +4187,19 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ, &prev_em_start); + &bio, 0, &bio_flags, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ, &prev_em_start); + &bio, 0, &bio_flags, &prev_em_start); if (em_cached) free_extent_map(em_cached); BUG_ON(!list_empty(pages)); if (bio) - return submit_one_bio(READ, bio, 0, bio_flags); + return submit_one_bio(bio, 0, bio_flags); return 0; } @@ -4718,16 +4726,16 @@ err: } struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { unsigned long len; if (!fs_info) { /* * Called only from tests that don't always have a fs_info - * available, but we know that nodesize is 4096 + * available */ - len = 4096; + len = nodesize; } else { len = fs_info->tree_root->nodesize; } @@ -4823,7 +4831,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) + u64 start, u32 nodesize) { struct extent_buffer *eb, *exists = NULL; int ret; @@ -4831,7 +4839,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = alloc_dummy_extent_buffer(fs_info, start); + eb = alloc_dummy_extent_buffer(fs_info, start, nodesize); if (!eb) return NULL; eb->fs_info = fs_info; @@ -4882,18 +4890,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int uptodate = 1; int ret; + if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) { + btrfs_err(fs_info, "bad tree block start %llu", start); + return ERR_PTR(-EINVAL); + } + eb = find_extent_buffer(fs_info, start); if (eb) return eb; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); - if (!p) + if (!p) { + exists = ERR_PTR(-ENOMEM); goto free_eb; + } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { @@ -4938,8 +4953,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, @@ -5217,7 +5234,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, err = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, &bio_flags, - READ | REQ_META); + REQ_META); if (err) ret = err; } else { @@ -5226,8 +5243,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (bio) { - err = submit_one_bio(READ | REQ_META, bio, mirror_num, - bio_flags); + err = submit_one_bio(bio, mirror_num, bio_flags); if (err) return err; } @@ -5323,6 +5339,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, return ret; } +/* + * return 0 if the item is found within a page. + * return 1 if the item spans two pages. + * return -EINVAL otherwise. + */ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, @@ -5337,7 +5358,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, PAGE_SHIFT; if (i != end_i) - return -EINVAL; + return 1; if (i == 0) { offset = start_offset; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1baf19c9b79d..bc2729a7612d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -63,16 +63,16 @@ struct btrfs_root; struct btrfs_io_bio; struct io_failure_record; -typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, u64 bio_offset); +typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, + u64 bio_offset); struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written); int (*writepage_start_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; - int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, + int (*merge_bio_hook)(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); @@ -348,7 +348,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); @@ -468,5 +468,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, u64 *end, u64 max_bytes); #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start, u32 nodesize); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e0c9bd3fb02d..2234e88cf674 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, reserve_bytes = round_up(write_bytes + sector_offset, root->sectorsize); - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) && - check_can_nocow(inode, pos, &write_bytes) > 0) { - /* - * For nodata cow case, no need to reserve - * data space. - */ - only_release_metadata = true; - /* - * our prealloc extent may be smaller than - * write_bytes, so scale down. - */ - num_pages = DIV_ROUND_UP(write_bytes + offset, - PAGE_SIZE); - reserve_bytes = round_up(write_bytes + sector_offset, - root->sectorsize); - goto reserve_metadata; - } - ret = btrfs_check_data_free_space(inode, pos, write_bytes); - if (ret < 0) - break; + if (ret < 0) { + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC)) && + check_can_nocow(inode, pos, &write_bytes) > 0) { + /* + * For nodata cow case, no need to reserve + * data space. + */ + only_release_metadata = true; + /* + * our prealloc extent may be smaller than + * write_bytes, so scale down. + */ + num_pages = DIV_ROUND_UP(write_bytes + offset, + PAGE_SIZE); + reserve_bytes = round_up(write_bytes + + sector_offset, + root->sectorsize); + } else { + break; + } + } -reserve_metadata: ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); if (ret) { if (!only_release_metadata) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c6dc1183f542..69d270f6602c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -29,7 +29,7 @@ #include "inode-map.h" #include "volumes.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_32K struct btrfs_trim_range { @@ -1415,11 +1415,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) { u64 bitmap_start; - u32 bytes_per_bitmap; + u64 bytes_per_bitmap; bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; bitmap_start = offset - ctl->start; - bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); + bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); bitmap_start *= bytes_per_bitmap; bitmap_start += ctl->start; @@ -1638,10 +1638,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) u64 bitmap_bytes; u64 extent_bytes; u64 size = block_group->key.offset; - u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; - u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); + u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; + u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); - max_bitmaps = max_t(u32, max_bitmaps, 1); + max_bitmaps = max_t(u64, max_bitmaps, 1); ASSERT(ctl->total_bitmaps <= max_bitmaps); @@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as * we add more bitmaps. */ - bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE; + bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; if (bitmap_bytes >= max_bytes) { ctl->extents_thresh = 0; @@ -3662,7 +3662,7 @@ have_info: if (tmp->offset + tmp->bytes < offset) break; if (offset + bytes < tmp->offset) { - n = rb_prev(&info->offset_index); + n = rb_prev(&tmp->offset_index); continue; } info = tmp; @@ -3676,7 +3676,7 @@ have_info: if (offset + bytes < tmp->offset) break; if (tmp->offset + tmp->bytes < offset) { - n = rb_next(&info->offset_index); + n = rb_next(&tmp->offset_index); continue; } info = tmp; diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index aae520b2aee5..a97fdc156a03 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -24,6 +24,11 @@ int __init btrfs_hash_init(void) return PTR_ERR_OR_ZERO(tfm); } +const char* btrfs_crc32c_impl(void) +{ + return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm)); +} + void btrfs_hash_exit(void) { crypto_free_shash(tfm); diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index 118a2316e5d3..c3a2ec554361 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -22,6 +22,7 @@ int __init btrfs_hash_init(void); void btrfs_hash_exit(void); +const char* btrfs_crc32c_impl(void); u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 270499598ed4..df731c0ebec7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1823,7 +1823,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, * extent_io.c merge_bio_hook, this must check the chunk tree to make sure * we don't create bios that span stripes or chunks */ -int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags) { @@ -1838,7 +1838,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, length = bio->bi_iter.bi_size; map_length = length; - ret = btrfs_map_block(root->fs_info, rw, logical, + ret = btrfs_map_block(root->fs_info, bio_op(bio), logical, &map_length, NULL, 0); /* Will always return 0 with map_multi == NULL */ BUG_ON(ret < 0); @@ -1855,9 +1855,8 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_start(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, +static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1876,14 +1875,14 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, +static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - ret = btrfs_map_bio(root, rw, bio, mirror_num, 1); + ret = btrfs_map_bio(root, bio, mirror_num, 1); if (ret) { bio->bi_error = ret; bio_endio(bio); @@ -1895,7 +1894,7 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * extent_io.c submission hook. This does the right thing for csum calculation * on write, or reading the csums from the tree before a read */ -static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { @@ -1910,7 +1909,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (btrfs_is_free_space_inode(inode)) metadata = BTRFS_WQ_ENDIO_FREE_SPACE; - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) != REQ_OP_WRITE) { ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); if (ret) goto out; @@ -1932,7 +1931,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, goto mapit; /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, + inode, bio, mirror_num, bio_flags, bio_offset, __btrfs_submit_bio_start, __btrfs_submit_bio_done); @@ -1944,7 +1943,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, } mapit: - ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); + ret = btrfs_map_bio(root, bio, mirror_num, 0); out: if (ret < 0) { @@ -3271,7 +3270,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); - BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */ + ASSERT(!ret); + if (ret) { + atomic_dec(&root->orphan_inodes); + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + if (insert) + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + return ret; + } } /* insert an orphan item to track this unlinked/truncated file */ @@ -4549,6 +4557,7 @@ delete: BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, + trans->transid, trans->delayed_ref_updates * 2, 0); if (be_nice) { if (truncate_space_check(trans, root, @@ -5748,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ bool emitted; + bool put = false; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5765,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (key_type == BTRFS_DIR_INDEX_KEY) { INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - btrfs_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, &ins_list, + &del_list); } key.type = key_type; @@ -5912,8 +5923,8 @@ next: nopos: ret = 0; err: - if (key_type == BTRFS_DIR_INDEX_KEY) - btrfs_put_delayed_items(&ins_list, &del_list); + if (put) + btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list); btrfs_free_path(path); return ret; } @@ -6979,7 +6990,18 @@ insert: * existing will always be non-NULL, since there must be * extent causing the -EEXIST. */ - if (start >= extent_map_end(existing) || + if (existing->start == em->start && + extent_map_end(existing) == extent_map_end(em) && + em->block_start == existing->block_start) { + /* + * these two extents are the same, it happens + * with inlines especially + */ + free_extent_map(em); + em = existing; + err = 0; + + } else if (start >= extent_map_end(existing) || start <= existing->start) { /* * The existing extent map is the one nearest to @@ -7767,12 +7789,12 @@ err: } static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, - int rw, int mirror_num) + int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - BUG_ON(rw & REQ_WRITE); + BUG_ON(bio_op(bio) == REQ_OP_WRITE); bio_get(bio); @@ -7781,7 +7803,7 @@ static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, if (ret) goto err; - ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); + ret = btrfs_map_bio(root, bio, mirror_num, 0); err: bio_put(bio); return ret; @@ -7832,7 +7854,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, int read_mode; int ret; - BUG_ON(failed_bio->bi_rw & REQ_WRITE); + BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); ret = btrfs_get_io_failure_record(inode, start, end, &failrec); if (ret) @@ -7860,13 +7882,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, free_io_failure(inode, failrec); return -EIO; } + bio_set_op_attrs(bio, REQ_OP_READ, read_mode); btrfs_debug(BTRFS_I(inode)->root->fs_info, "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", read_mode, failrec->this_mirror, failrec->in_validation); - ret = submit_dio_repair_bio(inode, bio, read_mode, - failrec->this_mirror); + ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror); if (ret) { free_io_failure(inode, failrec); bio_put(bio); @@ -8156,7 +8178,7 @@ static void btrfs_endio_direct_write(struct bio *bio) bio_put(bio); } -static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, +static int __btrfs_submit_bio_start_direct_io(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 offset) { @@ -8174,8 +8196,8 @@ static void btrfs_end_dio_bio(struct bio *bio) if (err) btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, - "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", - btrfs_ino(dip->inode), bio->bi_rw, + "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d", + btrfs_ino(dip->inode), bio_op(bio), bio->bi_rw, (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size, err); @@ -8249,11 +8271,11 @@ static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root, } static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, - int rw, u64 file_offset, int skip_sum, + u64 file_offset, int skip_sum, int async_submit) { struct btrfs_dio_private *dip = bio->bi_private; - int write = rw & REQ_WRITE; + bool write = bio_op(bio) == REQ_OP_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -8274,8 +8296,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (write && async_submit) { ret = btrfs_wq_submit_bio(root->fs_info, - inode, rw, bio, 0, 0, - file_offset, + inode, bio, 0, 0, file_offset, __btrfs_submit_bio_start_direct_io, __btrfs_submit_bio_done); goto err; @@ -8294,13 +8315,13 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, goto err; } map: - ret = btrfs_map_bio(root, rw, bio, 0, async_submit); + ret = btrfs_map_bio(root, bio, 0, async_submit); err: bio_put(bio); return ret; } -static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, +static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, int skip_sum) { struct inode *inode = dip->inode; @@ -8319,8 +8340,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, int i; map_length = orig_bio->bi_iter.bi_size; - ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, - &map_length, NULL, 0); + ret = btrfs_map_block(root->fs_info, bio_op(orig_bio), + start_sector << 9, &map_length, NULL, 0); if (ret) return -EIO; @@ -8340,6 +8361,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, if (!bio) return -ENOMEM; + bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_rw); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; @@ -8359,7 +8381,7 @@ next_block: * before we're done setting it up */ atomic_inc(&dip->pending_bios); - ret = __btrfs_submit_dio_bio(bio, inode, rw, + ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, async_submit); if (ret) { @@ -8377,12 +8399,13 @@ next_block: start_sector, GFP_NOFS); if (!bio) goto out_err; + bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_rw); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; map_length = orig_bio->bi_iter.bi_size; - ret = btrfs_map_block(root->fs_info, rw, + ret = btrfs_map_block(root->fs_info, bio_op(orig_bio), start_sector << 9, &map_length, NULL, 0); if (ret) { @@ -8402,7 +8425,7 @@ next_block: } submit: - ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, + ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, async_submit); if (!ret) return 0; @@ -8422,14 +8445,14 @@ out_err: return 0; } -static void btrfs_submit_direct(int rw, struct bio *dio_bio, - struct inode *inode, loff_t file_offset) +static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, + loff_t file_offset) { struct btrfs_dio_private *dip = NULL; struct bio *io_bio = NULL; struct btrfs_io_bio *btrfs_bio; int skip_sum; - int write = rw & REQ_WRITE; + bool write = (bio_op(dio_bio) == REQ_OP_WRITE); int ret = 0; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; @@ -8480,7 +8503,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dio_data->unsubmitted_oe_range_end; } - ret = btrfs_submit_direct_hook(rw, dip, skip_sum); + ret = btrfs_submit_direct_hook(dip, skip_sum); if (!ret) return; @@ -10514,7 +10537,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = btrfs_real_readdir, + .iterate_shared = btrfs_real_readdir, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 559170464d7c..aca8264f4a49 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, return count; } -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len) { struct btrfs_root *root; struct list_head splice; int done; + int total_done = 0; INIT_LIST_HEAD(&splice); @@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, done = btrfs_wait_ordered_extents(root, nr, range_start, range_len); btrfs_put_fs_root(root); + total_done += done; spin_lock(&fs_info->ordered_root_lock); if (nr != -1) { @@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); mutex_unlock(&fs_info->ordered_operations_mutex); + + return total_done; } /* @@ -964,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct rb_node *prev = NULL; struct btrfs_ordered_extent *test; int ret = 1; + u64 orig_offset = offset; spin_lock_irq(&tree->lock); if (ordered) { @@ -979,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, /* truncate file */ if (disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = i_size; + BTRFS_I(inode)->disk_i_size = orig_offset; ret = 0; goto out; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 2049c9be85ee..451507776ff5 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum, int len); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, const u64 range_start, const u64 range_len); -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len); void btrfs_get_logged_extents(struct inode *inode, struct list_head *logged_list, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f8b6d411a034..cd8d302a1f61 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1320,7 +1320,9 @@ write_data: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - submit_bio(WRITE, bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + submit_bio(bio); } return; @@ -1573,11 +1575,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_rmw_end_io; + bio_set_op_attrs(bio, REQ_OP_READ, 0); btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - submit_bio(READ, bio); + submit_bio(bio); } /* the actual write will happen once the reads are done */ return 0; @@ -2097,11 +2100,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_recover_end_io; + bio_set_op_attrs(bio, REQ_OP_READ, 0); btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - submit_bio(READ, bio); + submit_bio(bio); } out: return 0; @@ -2433,7 +2437,9 @@ submit_write: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - submit_bio(WRITE, bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + submit_bio(bio); } return; @@ -2610,11 +2616,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid56_parity_scrub_end_io; + bio_set_op_attrs(bio, REQ_OP_READ, 0); btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - submit_bio(READ, bio); + submit_bio(bio); } /* the actual write will happen once the reads are done */ return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 298631eaee78..8428db7cd88f 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) do { enqueued = 0; + mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (atomic_read(&device->reada_in_flight) < MAX_IN_FLIGHT) enqueued += reada_start_machine_dev(fs_info, device); } + mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 46d847f66e4b..e08b6bc676e3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1504,8 +1504,9 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, sblock->no_io_error_seen = 0; } else { bio->bi_iter.bi_sector = page->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (btrfsic_submit_bio_wait(READ, bio)) + if (btrfsic_submit_bio_wait(bio)) sblock->no_io_error_seen = 0; } @@ -1583,6 +1584,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; bio->bi_bdev = page_bad->dev->bdev; bio->bi_iter.bi_sector = page_bad->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); if (PAGE_SIZE != ret) { @@ -1590,7 +1592,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; } - if (btrfsic_submit_bio_wait(WRITE, bio)) { + if (btrfsic_submit_bio_wait(bio)) { btrfs_dev_stat_inc_and_print(page_bad->dev, BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_replace_stats_inc( @@ -1684,6 +1686,7 @@ again: bio->bi_end_io = scrub_wr_bio_end_io; bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); sbio->err = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical_for_dev_replace || @@ -1731,7 +1734,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx) * orders the requests before sending them to the driver which * doubled the write performance on spinning disks when measured * with Linux 3.5 */ - btrfsic_submit_bio(WRITE, sbio->bio); + btrfsic_submit_bio(sbio->bio); } static void scrub_wr_bio_end_io(struct bio *bio) @@ -2041,7 +2044,7 @@ static void scrub_submit(struct scrub_ctx *sctx) sbio = sctx->bios[sctx->curr]; sctx->curr = -1; scrub_pending_bio_inc(sctx); - btrfsic_submit_bio(READ, sbio->bio); + btrfsic_submit_bio(sbio->bio); } static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, @@ -2088,6 +2091,7 @@ again: bio->bi_end_io = scrub_bio_end_io; bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; + bio_set_op_attrs(bio, REQ_OP_READ, 0); sbio->err = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical || @@ -3582,6 +3586,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, */ scrub_pause_on(fs_info); ret = btrfs_inc_block_group_ro(root, cache); + if (!ret && is_dev_replace) { + /* + * If we are doing a device replace wait for any tasks + * that started dellaloc right before we set the block + * group to RO mode, as they might have just allocated + * an extent from it or decided they could do a nocow + * write. And if any such tasks did that, wait for their + * ordered extents to complete and then commit the + * current transaction, so that we can later see the new + * extent items in the extent tree - the ordered extents + * create delayed data references (for cow writes) when + * they complete, which will be run and insert the + * corresponding extent items into the extent tree when + * we commit the transaction they used when running + * inode.c:btrfs_finish_ordered_io(). We later use + * the commit root of the extent tree to find extents + * to copy from the srcdev into the tgtdev, and we don't + * want to miss any new extents. + */ + btrfs_wait_block_group_reservations(cache); + btrfs_wait_nocow_writers(cache); + ret = btrfs_wait_ordered_roots(fs_info, -1, + cache->key.objectid, + cache->key.offset); + if (ret > 0) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + ret = PTR_ERR(trans); + else + ret = btrfs_commit_transaction(trans, + root); + if (ret) { + scrub_pause_off(fs_info); + btrfs_put_block_group(cache); + break; + } + } + } scrub_pause_off(fs_info); if (ret == 0) { @@ -3602,9 +3646,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, break; } + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); dev_replace->cursor_right = found_key.offset + length; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, found_key.offset, cache, is_dev_replace); @@ -3640,6 +3686,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); + btrfs_dev_replace_lock(&fs_info->dev_replace, 1); + dev_replace->cursor_left = dev_replace->cursor_right; + dev_replace->item_needs_writeback = 1; + btrfs_dev_replace_unlock(&fs_info->dev_replace, 1); + if (ro_set) btrfs_dec_block_group_ro(root, cache); @@ -3677,9 +3728,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ret = -ENOMEM; break; } - - dev_replace->cursor_left = dev_replace->cursor_right; - dev_replace->item_needs_writeback = 1; skip: key.offset = found_key.offset + length; btrfs_release_path(path); @@ -4392,6 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); ret = bio_add_page(bio, page, PAGE_SIZE, 0); if (ret != PAGE_SIZE) { leave_with_eio: @@ -4400,7 +4449,7 @@ leave_with_eio: return -EIO; } - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) + if (btrfsic_submit_bio_wait(bio)) goto leave_with_eio; bio_put(bio); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4e59a91a11e0..60e7179ed4b7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); @@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } } sb->s_flags &= ~MS_RDONLY; + + fs_info->open = 1; } out: wake_up_process(fs_info->transaction_kthread); @@ -2303,7 +2305,7 @@ static void btrfs_interface_exit(void) static void btrfs_print_mod_info(void) { - printk(KERN_INFO "Btrfs loaded" + printk(KERN_INFO "Btrfs loaded, crc32c=%s" #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif @@ -2313,33 +2315,48 @@ static void btrfs_print_mod_info(void) #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY ", integrity-checker=on" #endif - "\n"); + "\n", + btrfs_crc32c_impl()); } static int btrfs_run_sanity_tests(void) { - int ret; - + int ret, i; + u32 sectorsize, nodesize; + u32 test_sectorsize[] = { + PAGE_SIZE, + }; ret = btrfs_init_test_fs(); if (ret) return ret; - - ret = btrfs_test_free_space_cache(); - if (ret) - goto out; - ret = btrfs_test_extent_buffer_operations(); - if (ret) - goto out; - ret = btrfs_test_extent_io(); - if (ret) - goto out; - ret = btrfs_test_inodes(); - if (ret) - goto out; - ret = btrfs_test_qgroups(); - if (ret) - goto out; - ret = btrfs_test_free_space_tree(); + for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) { + sectorsize = test_sectorsize[i]; + for (nodesize = sectorsize; + nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE; + nodesize <<= 1) { + pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n", + sectorsize, nodesize); + ret = btrfs_test_free_space_cache(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_buffer_operations(sectorsize, + nodesize); + if (ret) + goto out; + ret = btrfs_test_extent_io(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_inodes(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_qgroups(sectorsize, nodesize); + if (ret) + goto out; + ret = btrfs_test_free_space_tree(sectorsize, nodesize); + if (ret) + goto out; + } + } out: btrfs_destroy_test_fs(); return ret; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index f54bf450bad3..02223f3f78f4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -68,7 +68,7 @@ int btrfs_init_test_fs(void) if (IS_ERR(test_mnt)) { printk(KERN_ERR "btrfs: cannot mount test file system\n"); unregister_filesystem(&test_type); - return ret; + return PTR_ERR(test_mnt); } return 0; } @@ -175,7 +175,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root) } struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length) +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize) { struct btrfs_block_group_cache *cache; @@ -192,8 +192,8 @@ btrfs_alloc_dummy_block_group(unsigned long length) cache->key.objectid = 0; cache->key.offset = length; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; - cache->full_stripe_len = 4096; + cache->sectorsize = sectorsize; + cache->full_stripe_len = sectorsize; INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 054b8c73c951..66fb6b701eb7 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -26,27 +26,28 @@ struct btrfs_root; struct btrfs_trans_handle; -int btrfs_test_free_space_cache(void); -int btrfs_test_extent_buffer_operations(void); -int btrfs_test_extent_io(void); -int btrfs_test_inodes(void); -int btrfs_test_qgroups(void); -int btrfs_test_free_space_tree(void); +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); +int btrfs_test_inodes(u32 sectorsize, u32 nodesize); +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); int btrfs_init_test_fs(void); void btrfs_destroy_test_fs(void); struct inode *btrfs_new_test_inode(void); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); void btrfs_free_dummy_root(struct btrfs_root *root); struct btrfs_block_group_cache * -btrfs_alloc_dummy_block_group(unsigned long length); +btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize); void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache); void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans); #else -static inline int btrfs_test_free_space_cache(void) +static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_extent_buffer_operations(void) +static inline int btrfs_test_extent_buffer_operations(u32 sectorsize, + u32 nodesize) { return 0; } @@ -57,19 +58,19 @@ static inline int btrfs_init_test_fs(void) static inline void btrfs_destroy_test_fs(void) { } -static inline int btrfs_test_extent_io(void) +static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_inodes(void) +static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_qgroups(void) +static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { return 0; } -static inline int btrfs_test_free_space_tree(void) +static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { return 0; } diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index f51963a8f929..4f8cbd1ec5ee 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -22,7 +22,7 @@ #include "../extent_io.h" #include "../disk-io.h" -static int test_btrfs_split_item(void) +static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) { struct btrfs_path *path; struct btrfs_root *root; @@ -40,7 +40,7 @@ static int test_btrfs_split_item(void) test_msg("Running btrfs_split_item tests\n"); - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Could not allocate root\n"); return PTR_ERR(root); @@ -53,7 +53,8 @@ static int test_btrfs_split_item(void) return -ENOMEM; } - path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096); + path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize, + nodesize); if (!eb) { test_msg("Could not allocate dummy buffer\n"); ret = -ENOMEM; @@ -222,8 +223,8 @@ out: return ret; } -int btrfs_test_extent_buffer_operations(void) +int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize) { - test_msg("Running extent buffer operation tests"); - return test_btrfs_split_item(); + test_msg("Running extent buffer operation tests\n"); + return test_btrfs_split_item(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 55724607f79b..d19ab0317283 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/sizes.h> #include "btrfs-tests.h" +#include "../ctree.h" #include "../extent_io.h" #define PROCESS_UNLOCK (1 << 0) @@ -65,7 +66,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, return count; } -static int test_find_delalloc(void) +static int test_find_delalloc(u32 sectorsize) { struct inode *inode; struct extent_io_tree tmp; @@ -113,7 +114,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, 4095, NULL); + set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -122,9 +123,9 @@ static int test_find_delalloc(void) test_msg("Should have found at least one delalloc\n"); goto out_bits; } - if (start != 0 || end != 4095) { - test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n", - start, end); + if (start != 0 || end != (sectorsize - 1)) { + test_msg("Expected start 0 end %u, got start %llu end %llu\n", + sectorsize - 1, start, end); goto out_bits; } unlock_extent(&tmp, start, end); @@ -144,7 +145,7 @@ static int test_find_delalloc(void) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL); + set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -172,7 +173,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - test_start = max_bytes + 4096; + test_start = max_bytes + sectorsize; locked_page = find_lock_page(inode->i_mapping, test_start >> PAGE_SHIFT); if (!locked_page) { @@ -272,6 +273,16 @@ out: return ret; } +/** + * test_bit_in_byte - Determine whether a bit is set in a byte + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit_in_byte(int nr, const u8 *addr) +{ + return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1))); +} + static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, unsigned long len) { @@ -298,25 +309,29 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return -EINVAL; } - bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Setting straddling pages failed\n"); - return -EINVAL; - } + /* Straddling pages test */ + if (len > PAGE_SIZE) { + bitmap_set(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Setting straddling pages failed\n"); + return -EINVAL; + } - bitmap_set(bitmap, 0, len * BITS_PER_BYTE); - bitmap_clear(bitmap, - (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, - sizeof(long) * BITS_PER_BYTE); - extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); - extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, - sizeof(long) * BITS_PER_BYTE); - if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { - test_msg("Clearing straddling pages failed\n"); - return -EINVAL; + bitmap_set(bitmap, 0, len * BITS_PER_BYTE); + bitmap_clear(bitmap, + (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Clearing straddling pages failed\n"); + return -EINVAL; + } } /* @@ -333,7 +348,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, for (i = 0; i < len * BITS_PER_BYTE; i++) { int bit, bit1; - bit = !!test_bit(i, bitmap); + bit = !!test_bit_in_byte(i, (u8 *)bitmap); bit1 = !!extent_buffer_test_bit(eb, 0, i); if (bit1 != bit) { test_msg("Testing bit pattern failed\n"); @@ -351,15 +366,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, return 0; } -static int test_eb_bitmaps(void) +static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) { - unsigned long len = PAGE_SIZE * 4; + unsigned long len; unsigned long *bitmap; struct extent_buffer *eb; int ret; test_msg("Running extent buffer bitmap tests\n"); + /* + * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than + * BTRFS_MAX_METADATA_BLOCKSIZE. + */ + len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE) + ? sectorsize * 4 : sectorsize; + bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { test_msg("Couldn't allocate test bitmap\n"); @@ -379,7 +401,7 @@ static int test_eb_bitmaps(void) /* Do it over again with an extent buffer which isn't page-aligned. */ free_extent_buffer(eb); - eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len); + eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); if (!eb) { test_msg("Couldn't allocate test extent buffer\n"); kfree(bitmap); @@ -393,17 +415,17 @@ out: return ret; } -int btrfs_test_extent_io(void) +int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { int ret; test_msg("Running extent I/O tests\n"); - ret = test_find_delalloc(); + ret = test_find_delalloc(sectorsize); if (ret) goto out; - ret = test_eb_bitmaps(); + ret = test_eb_bitmaps(sectorsize, nodesize); out: test_msg("Extent I/O tests finished\n"); return ret; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 0eeb8f3d6b67..3956bb2ff84c 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -22,7 +22,7 @@ #include "../disk-io.h" #include "../free-space-cache.h" -#define BITS_PER_BITMAP (PAGE_SIZE * 8) +#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) /* * This test just does basic sanity checking, making sure we can add an extent @@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache) return 0; } -static int test_bitmaps(struct btrfs_block_group_cache *cache) +static int test_bitmaps(struct btrfs_block_group_cache *cache, + u32 sectorsize) { u64 next_bitmap_offset; int ret; @@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) * The first bitmap we have starts at offset 0 so the next one is just * at the end of the first bitmap. */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); /* Test a bit straddling two bitmaps */ ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M, @@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) } /* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache, + u32 sectorsize) { - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize); int ret; test_msg("Running bitmap and extent tests\n"); @@ -401,7 +403,8 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache) * requests. */ static int -test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) +test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache, + u32 sectorsize) { int ret; u64 offset; @@ -539,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096); + ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -597,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) { - test_msg("Cache free space is not 1Mb + 4Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", sectorsize); return -EINVAL; } @@ -611,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 4Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is a sectorsize free space region in a bitmap. + * Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 4096) { - test_msg("Cache free space is not 4Kb\n"); + if (cache->free_space_ctl->free_space != sectorsize) { + test_msg("Cache free space is not %u\n", sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 4096, 0, + 0, sectorsize, 0, &max_extent_size); if (offset != (SZ_128M + SZ_16M)) { - test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", - offset); + test_msg("Failed to allocate %u, returned offset : %llu\n", + sectorsize, offset); return -EINVAL; } @@ -733,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, SZ_32M, 8192); + ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -757,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) /* * Confirm that our extent entry didn't stole all free space from the - * bitmap, because of the small 8Kb free space region. + * bitmap, because of the small 2 * sectorsize free space region. */ ret = check_num_extents_and_bitmaps(cache, 2, 1); if (ret) @@ -783,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -ENOENT; } - if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) { - test_msg("Cache free space is not 1Mb + 8Kb\n"); + if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) { + test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize); return -EINVAL; } @@ -796,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return -EINVAL; } - /* All that remains is a 8Kb free space region in a bitmap. Confirm. */ + /* + * All that remains is 2 * sectorsize free space region + * in a bitmap. Confirm. + */ ret = check_num_extents_and_bitmaps(cache, 1, 1); if (ret) return ret; - if (cache->free_space_ctl->free_space != 8192) { - test_msg("Cache free space is not 8Kb\n"); + if (cache->free_space_ctl->free_space != 2 * sectorsize) { + test_msg("Cache free space is not %u\n", 2 * sectorsize); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 8192, 0, + 0, 2 * sectorsize, 0, &max_extent_size); if (offset != SZ_32M) { - test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", + test_msg("Failed to allocate %u, offset: %llu\n", + 2 * sectorsize, offset); return -EINVAL; } @@ -825,7 +835,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) return 0; } -int btrfs_test_free_space_cache(void) +int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) { struct btrfs_block_group_cache *cache; struct btrfs_root *root = NULL; @@ -833,13 +843,19 @@ int btrfs_test_free_space_cache(void) test_msg("Running btrfs free space cache tests\n"); - cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024); + /* + * For ppc64 (with 64k page size), bytes per bitmap might be + * larger than 1G. To make bitmap test available in ppc64, + * alloc dummy block group whose size cross bitmaps. + */ + cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize + + PAGE_SIZE, sectorsize); if (!cache) { test_msg("Couldn't run the tests\n"); return 0; } - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out; @@ -855,14 +871,14 @@ int btrfs_test_free_space_cache(void) ret = test_extents(cache); if (ret) goto out; - ret = test_bitmaps(cache); + ret = test_bitmaps(cache, sectorsize); if (ret) goto out; - ret = test_bitmaps_and_extents(cache); + ret = test_bitmaps_and_extents(cache, sectorsize); if (ret) goto out; - ret = test_steal_space_from_bitmap_to_extent(cache); + ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize); out: btrfs_free_dummy_block_group(cache); btrfs_free_dummy_root(root); diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 7cea4462acd5..aac507085ab0 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../disk-io.h" @@ -30,7 +31,7 @@ struct free_space_extent { * The test cases align their operations to this in order to hit some of the * edge cases in the bitmap code. */ -#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096) +#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE) static int __check_free_space_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -439,7 +440,8 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *, struct btrfs_block_group_cache *, struct btrfs_path *); -static int run_test(test_func_t test_func, int bitmaps) +static int run_test(test_func_t test_func, int bitmaps, + u32 sectorsize, u32 nodesize) { struct btrfs_root *root = NULL; struct btrfs_block_group_cache *cache = NULL; @@ -447,7 +449,7 @@ static int run_test(test_func_t test_func, int bitmaps) struct btrfs_path *path = NULL; int ret; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate dummy root\n"); ret = PTR_ERR(root); @@ -466,7 +468,8 @@ static int run_test(test_func_t test_func, int bitmaps) root->fs_info->free_space_root = root; root->fs_info->tree_root = root; - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, + nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -474,9 +477,9 @@ static int run_test(test_func_t test_func, int bitmaps) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE); + cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize); if (!cache) { test_msg("Couldn't allocate dummy block group cache\n"); ret = -ENOMEM; @@ -534,17 +537,18 @@ out: return ret; } -static int run_test_both_formats(test_func_t test_func) +static int run_test_both_formats(test_func_t test_func, + u32 sectorsize, u32 nodesize) { int ret; - ret = run_test(test_func, 0); + ret = run_test(test_func, 0, sectorsize, nodesize); if (ret) return ret; - return run_test(test_func, 1); + return run_test(test_func, 1, sectorsize, nodesize); } -int btrfs_test_free_space_tree(void) +int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize) { test_func_t tests[] = { test_empty_block_group, @@ -561,9 +565,11 @@ int btrfs_test_free_space_tree(void) test_msg("Running free space tree tests\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { - int ret = run_test_both_formats(tests[i]); + int ret = run_test_both_formats(tests[i], sectorsize, + nodesize); if (ret) { - test_msg("%pf failed\n", tests[i]); + test_msg("%pf : sectorsize %u failed\n", + tests[i], sectorsize); return ret; } } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 8a25fe8b7c45..29648c0a39f1 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../btrfs_inode.h" @@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root) * diagram of how the extents will look though this may not be possible we still * want to make sure everything acts normally (the last number is not inclusive) * - * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288] - * [hole ][inline][ hole ][ regular ][regular1 split][ hole ] + * [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291] + * [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split] * - * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056] - * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ] + * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ] + * [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written] * - * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ] - * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent] + * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635] + * [ prealloc1 ][ compressed ][ compressed1 ][ regular ][ compressed1] * - * [81920-86016] - * [ regular ] + * [69635-73731][ 73731 - 86019 ][86019-90115] + * [ regular ][ hole but no extent][ regular ] */ -static void setup_file_extents(struct btrfs_root *root) +static void setup_file_extents(struct btrfs_root *root, u32 sectorsize) { int slot = 0; u64 disk_bytenr = SZ_1M; @@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root) insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, slot); slot++; - offset = 4096; + offset = sectorsize; /* Now another hole */ insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, @@ -128,99 +129,106 @@ static void setup_file_extents(struct btrfs_root *root) offset += 4; /* Now for a regular extent */ - insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - disk_bytenr += 4096; - offset += 4095; + disk_bytenr += sectorsize; + offset += sectorsize - 1; /* * Now for 3 extents that were split from a hole punch so we test * offsets properly. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG, - 0, slot); + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0, + BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now for a unwritten prealloc extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; + offset += sectorsize; /* * We want to jack up disk_bytenr a little more so the em stuff doesn't * merge our records. */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* * Now for a partially written prealloc extent, basically the same as * the hole punch example above. Ram_bytes never changes when you mark * extents written btw. */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += sectorsize; + insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize, + disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, + slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot); slot++; - offset += 8192; - disk_bytenr += 16384; + offset += 2 * sectorsize; + disk_bytenr += 4 * sectorsize; /* Now a normal compressed extent */ - insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0, + disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; + offset += 2 * sectorsize; /* No merges */ - disk_bytenr += 8192; + disk_bytenr += 2 * sectorsize; /* Now a split compressed extent */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, + BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096, + offset += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, + disk_bytenr + sectorsize, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096, + offset += sectorsize; + insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize, + 2 * sectorsize, disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); slot++; - offset += 8192; - disk_bytenr += 8192; + offset += 2 * sectorsize; + disk_bytenr += 2 * sectorsize; /* Now extents that have a hole but no hole extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); slot++; - offset += 16384; - disk_bytenr += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); + offset += 4 * sectorsize; + disk_bytenr += sectorsize; + insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); } static unsigned long prealloc_only = 0; static unsigned long compressed_only = 0; static unsigned long vacancy_only = 0; -static noinline int test_btrfs_get_extent(void) +static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -240,7 +248,7 @@ static noinline int test_btrfs_get_extent(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -256,7 +264,7 @@ static noinline int test_btrfs_get_extent(void) goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -273,7 +281,7 @@ static noinline int test_btrfs_get_extent(void) /* First with no extents */ BTRFS_I(inode)->root = root; - em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0); if (IS_ERR(em)) { em = NULL; test_msg("Got an error when we shouldn't have\n"); @@ -295,7 +303,7 @@ static noinline int test_btrfs_get_extent(void) * setup_file_extents, so if you change anything there you need to * update the comment and update the expected values below. */ - setup_file_extents(root); + setup_file_extents(root, sectorsize); em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0); if (IS_ERR(em)) { @@ -318,7 +326,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -327,7 +335,8 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected an inline, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4091) { + + if (em->start != offset || em->len != (sectorsize - 5)) { test_msg("Unexpected extent wanted start %llu len 1, got start " "%llu len %llu\n", offset, em->start, em->len); goto out; @@ -344,7 +353,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -366,7 +375,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Regular extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -375,7 +384,7 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4095) { + if (em->start != offset || em->len != sectorsize - 1) { test_msg("Unexpected extent wanted start %llu len 4095, got " "start %llu len %llu\n", offset, em->start, em->len); goto out; @@ -393,7 +402,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are split extents */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -402,9 +411,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -421,7 +431,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -430,9 +440,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -442,7 +453,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -451,9 +462,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -475,7 +487,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -484,9 +496,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -503,7 +516,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* The next 3 are a half written prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -512,9 +525,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -532,7 +546,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -541,9 +555,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -564,7 +579,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -573,9 +588,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != prealloc_only) { @@ -598,7 +614,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Now for the compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -607,9 +623,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -631,7 +648,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* Split compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -640,9 +657,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -665,7 +683,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -674,9 +692,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -691,7 +710,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -701,9 +720,10 @@ static noinline int test_btrfs_get_extent(void) disk_bytenr, em->block_start); goto out; } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 2 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 2 * sectorsize, em->start, em->len); goto out; } if (em->flags != compressed_only) { @@ -725,7 +745,7 @@ static noinline int test_btrfs_get_extent(void) free_extent_map(em); /* A hole between regular extents but no hole extent */ - em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -734,9 +754,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -765,9 +786,10 @@ static noinline int test_btrfs_get_extent(void) * length of the actual hole, if this changes we'll have to change this * test. */ - if (em->start != offset || em->len != 12288) { - test_msg("Unexpected extent wanted start %llu len 12288, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != 3 * sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u, " + "got start %llu len %llu\n", + offset, 3 * sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -783,7 +805,7 @@ static noinline int test_btrfs_get_extent(void) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -792,9 +814,10 @@ static noinline int test_btrfs_get_extent(void) test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); + if (em->start != offset || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %llu len %u," + "got start %llu len %llu\n", + offset, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -815,7 +838,7 @@ out: return ret; } -static int test_hole_first(void) +static int test_hole_first(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -832,7 +855,7 @@ static int test_hole_first(void) BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -844,7 +867,7 @@ static int test_hole_first(void) goto out; } - root->node = alloc_dummy_extent_buffer(NULL, 4096); + root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -861,9 +884,9 @@ static int test_hole_first(void) * btrfs_get_extent. */ insert_inode_item_key(root); - insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096, - BTRFS_FILE_EXTENT_REG, 0, 1); - em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0); + insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize, + sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1); + em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; @@ -872,9 +895,10 @@ static int test_hole_first(void) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (em->start != 0 || em->len != 4096) { - test_msg("Unexpected extent wanted start 0 len 4096, got start " - "%llu len %llu\n", em->start, em->len); + if (em->start != 0 || em->len != sectorsize) { + test_msg("Unexpected extent wanted start 0 len %u, " + "got start %llu len %llu\n", + sectorsize, em->start, em->len); goto out; } if (em->flags != vacancy_only) { @@ -884,18 +908,19 @@ static int test_hole_first(void) } free_extent_map(em); - em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0); + em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0); if (IS_ERR(em)) { test_msg("Got an error when we shouldn't have\n"); goto out; } - if (em->block_start != 4096) { + if (em->block_start != sectorsize) { test_msg("Expected a real extent, got %llu\n", em->block_start); goto out; } - if (em->start != 4096 || em->len != 4096) { - test_msg("Unexpected extent wanted start 4096 len 4096, got " - "start %llu len %llu\n", em->start, em->len); + if (em->start != sectorsize || em->len != sectorsize) { + test_msg("Unexpected extent wanted start %u len %u, " + "got start %llu len %llu\n", + sectorsize, sectorsize, em->start, em->len); goto out; } if (em->flags != 0) { @@ -912,7 +937,7 @@ out: return ret; } -static int test_extent_accounting(void) +static int test_extent_accounting(u32 sectorsize, u32 nodesize) { struct inode *inode = NULL; struct btrfs_root *root = NULL; @@ -924,7 +949,7 @@ static int test_extent_accounting(void) return ret; } - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); goto out; @@ -954,10 +979,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, - BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, + NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -969,10 +995,10 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_KERNEL); @@ -987,10 +1013,11 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ BTRFS_I(inode)->outstanding_extents++; ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + (BTRFS_MAX_EXTENT_SIZE >> 1) + + sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); @@ -1004,16 +1031,17 @@ static int test_extent_accounting(void) } /* - * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] * * I'm artificially adding 2 to outstanding_extents because in the * buffered IO case we'd add things up as we go, but I don't feel like * doing that here, this isn't the interesting case we want to test. */ BTRFS_I(inode)->outstanding_extents += 2; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, - (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, - NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, + (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, + NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1025,10 +1053,13 @@ static int test_extent_accounting(void) goto out; } - /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + /* + * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] + */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1042,8 +1073,8 @@ static int test_extent_accounting(void) /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, - BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, NULL, GFP_KERNEL); @@ -1063,8 +1094,9 @@ static int test_extent_accounting(void) * might fail and I'd rather satisfy my paranoia at this point. */ BTRFS_I(inode)->outstanding_extents++; - ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, - BTRFS_MAX_EXTENT_SIZE+8191, NULL); + ret = btrfs_set_extent_delalloc(inode, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL); if (ret) { test_msg("btrfs_set_extent_delalloc returned %d\n", ret); goto out; @@ -1103,7 +1135,7 @@ out: return ret; } -int btrfs_test_inodes(void) +int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { int ret; @@ -1112,13 +1144,13 @@ int btrfs_test_inodes(void) set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); test_msg("Running btrfs_get_extent tests\n"); - ret = test_btrfs_get_extent(); + ret = test_btrfs_get_extent(sectorsize, nodesize); if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - ret = test_hole_first(); + ret = test_hole_first(sectorsize, nodesize); if (ret) return ret; test_msg("Running outstanding_extents tests\n"); - return test_extent_accounting(); + return test_extent_accounting(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 8aa4ded31326..57a12c0d680b 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/types.h> #include "btrfs-tests.h" #include "../ctree.h" #include "../transaction.h" @@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, return ret; } -static int test_no_shared_qgroup(struct btrfs_root *root) +static int test_no_shared_qgroup(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -227,7 +229,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root) btrfs_init_dummy_trans(&trans); test_msg("Qgroup basic add\n"); - ret = btrfs_create_qgroup(NULL, fs_info, 5); + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; @@ -238,18 +240,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * we can only call btrfs_qgroup_account_extent() directly to test * quota. */ - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -257,32 +260,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } old_roots = NULL; new_roots = NULL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_item(root, 4096, 4096); + ret = remove_extent_item(root, nodesize, nodesize); if (ret) return -EINVAL; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -290,14 +294,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -310,7 +314,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root) * right, also remove one of the roots and make sure the exclusive count is * adjusted properly. */ -static int test_multiple_refs(struct btrfs_root *root) +static int test_multiple_refs(struct btrfs_root *root, + u32 sectorsize, u32 nodesize) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; @@ -322,25 +327,29 @@ static int test_multiple_refs(struct btrfs_root *root) test_msg("Qgroup multiple refs test\n"); - /* We have 5 created already from the previous test */ - ret = btrfs_create_qgroup(NULL, fs_info, 256); + /* + * We have BTRFS_FS_TREE_OBJECTID created already from the + * previous test. + */ + ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); if (ret) { test_msg("Couldn't create a qgroup %d\n", ret); return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -348,30 +357,32 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = add_tree_ref(root, 4096, 4096, 0, 256); + ret = add_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -379,35 +390,38 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + nodesize, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); if (ret) { ulist_free(old_roots); test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = remove_extent_ref(root, 4096, 4096, 0, 256); + ret = remove_extent_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); if (ret) return ret; - ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -415,19 +429,21 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, - old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, + nodesize, old_roots, new_roots); if (ret) { test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } - if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID, + 0, 0)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } - if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } @@ -435,13 +451,13 @@ static int test_multiple_refs(struct btrfs_root *root) return 0; } -int btrfs_test_qgroups(void) +int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; struct btrfs_root *tmp_root; int ret = 0; - root = btrfs_alloc_dummy_root(); + root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(root)) { test_msg("Couldn't allocate root\n"); return PTR_ERR(root); @@ -468,7 +484,8 @@ int btrfs_test_qgroups(void) * Can't use bytenr 0, some things freak out * *cough*backref walking code*cough* */ - root->node = alloc_test_extent_buffer(root->fs_info, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, nodesize, + nodesize); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; @@ -476,16 +493,16 @@ int btrfs_test_qgroups(void) } btrfs_set_header_level(root->node, 0); btrfs_set_header_nritems(root->node, 0); - root->alloc_bytenr += 8192; + root->alloc_bytenr += 2 * nodesize; - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); goto out; } - tmp_root->root_key.objectid = 5; + tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID; root->fs_info->fs_root = tmp_root; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { @@ -493,14 +510,14 @@ int btrfs_test_qgroups(void) goto out; } - tmp_root = btrfs_alloc_dummy_root(); + tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize); if (IS_ERR(tmp_root)) { test_msg("Couldn't allocate a fs root\n"); ret = PTR_ERR(tmp_root); goto out; } - tmp_root->root_key.objectid = 256; + tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID; ret = btrfs_insert_fs_root(root->fs_info, tmp_root); if (ret) { test_msg("Couldn't insert fs root %d\n", ret); @@ -508,10 +525,10 @@ int btrfs_test_qgroups(void) } test_msg("Running qgroup tests\n"); - ret = test_no_shared_qgroup(root); + ret = test_no_shared_qgroup(root, sectorsize, nodesize); if (ret) goto out; - ret = test_multiple_refs(root); + ret = test_multiple_refs(root, sectorsize, nodesize); out: btrfs_free_dummy_root(root); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f6e24cb423ae..948aa186b353 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; + u64 transid = trans->transid; unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; @@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (must_run_delayed_refs) { - btrfs_async_run_delayed_refs(root, cur, + btrfs_async_run_delayed_refs(root, cur, transid, must_run_delayed_refs == 1); } return err; @@ -1311,11 +1312,6 @@ int btrfs_defrag_root(struct btrfs_root *root) return ret; } -/* Bisesctability fixup, remove in 4.8 */ -#ifndef btrfs_std_error -#define btrfs_std_error btrfs_handle_fs_error -#endif - /* * Do all special snapshot related qgroup dirty hack. * @@ -1385,7 +1381,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, switch_commit_roots(trans->transaction, fs_info); ret = btrfs_write_and_wait_transaction(trans, src); if (ret) - btrfs_std_error(fs_info, ret, + btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction for qgroup"); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9fe0ec2bf0fe..c5abee4f01ad 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -110,7 +110,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +120,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b7665af471d8..c05f69a8ec42 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); if (*level == 1) { ret = wc->process_func(root, next, wc, ptr_gen); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bdc62561ede8..0fb4a959012e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -462,7 +462,7 @@ loop_lock: sync_pending = 0; } - btrfsic_submit_bio(cur->bi_rw, cur); + btrfsic_submit_bio(cur); num_run++; batch_run++; @@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 dev_extent_len = 0; u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; int i, ret = 0; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; /* Just in case */ root = root->fs_info->chunk_root; @@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, check_system_chunk(trans, extent_root, map->type); unlock_chunks(root->fs_info->chunk_root); + /* + * Take the device list mutex to prevent races with the final phase of + * a device replace operation that replaces the device object associated + * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()). + */ + mutex_lock(&fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; ret = btrfs_free_dev_extent(trans, device, map->stripes[i].physical, &dev_extent_len); if (ret) { + mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, root, ret); goto out; } @@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, if (map->stripes[i].dev) { ret = btrfs_update_device(trans, map->stripes[i].dev); if (ret) { + mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, root, ret); goto out; } } } + mutex_unlock(&fs_devices->device_list_mutex); + ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -4230,6 +4241,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) if (IS_ERR(uuid_root)) { ret = PTR_ERR(uuid_root); btrfs_abort_transaction(trans, tree_root, ret); + btrfs_end_transaction(trans, tree_root); return ret; } @@ -4682,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_RAID5) { raid_stripe_len = find_raid56_stripe_len(ndevs - 1, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 1; } if (type & BTRFS_BLOCK_GROUP_RAID6) { raid_stripe_len = find_raid56_stripe_len(ndevs - 2, - btrfs_super_stripesize(info->super_copy)); + extent_root->stripesize); data_stripes = num_stripes - 2; } @@ -5248,7 +5260,7 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) kfree(bbio); } -static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, +static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map) @@ -5334,7 +5346,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, raid56_full_stripe_start *= full_stripe_len; } - if (rw & REQ_DISCARD) { + if (op == REQ_OP_DISCARD) { /* we don't discard raid56 yet */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = -EOPNOTSUPP; @@ -5347,7 +5359,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, For other RAID types and for RAID[56] reads, just allow a single stripe (on a single disk). */ if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && - (rw & REQ_WRITE)) { + (op == REQ_OP_WRITE)) { max_len = stripe_len * nr_data_stripes(map) - (offset - raid56_full_stripe_start); } else { @@ -5372,8 +5384,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, btrfs_dev_replace_set_lock_blocking(dev_replace); if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && - !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && - dev_replace->tgtdev != NULL) { + op != REQ_OP_WRITE && op != REQ_OP_DISCARD && + op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { /* * in dev-replace case, for repair case (that's the only * case where the mirror is selected explicitly when @@ -5460,15 +5472,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, (offset + *length); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - if (rw & REQ_DISCARD) + if (op == REQ_OP_DISCARD) num_stripes = min_t(u64, map->num_stripes, stripe_nr_end - stripe_nr_orig); stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) + if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD && + op != REQ_GET_READ_MIRRORS) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) + if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD || + op == REQ_GET_READ_MIRRORS) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -5481,7 +5495,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) { + if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD || + op == REQ_GET_READ_MIRRORS) { num_stripes = map->num_stripes; } else if (mirror_num) { stripe_index = mirror_num - 1; @@ -5495,9 +5510,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index *= map->sub_stripes; - if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) + if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) num_stripes = map->sub_stripes; - else if (rw & REQ_DISCARD) + else if (op == REQ_OP_DISCARD) num_stripes = min_t(u64, map->sub_stripes * (stripe_nr_end - stripe_nr_orig), map->num_stripes); @@ -5515,7 +5530,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { if (need_raid_map && - ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || + (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ stripe_nr = div_u64(raid56_full_stripe_start, @@ -5543,8 +5558,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, &stripe_index); - if (!(rw & (REQ_WRITE | REQ_DISCARD | - REQ_GET_READ_MIRRORS)) && mirror_num <= 1) + if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD && + op != REQ_GET_READ_MIRRORS) && mirror_num <= 1) mirror_num = 1; } } else { @@ -5567,9 +5582,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_alloc_stripes = num_stripes; if (dev_replace_is_ongoing) { - if (rw & (REQ_WRITE | REQ_DISCARD)) + if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) num_alloc_stripes <<= 1; - if (rw & REQ_GET_READ_MIRRORS) + if (op == REQ_GET_READ_MIRRORS) num_alloc_stripes++; tgtdev_indexes = num_stripes; } @@ -5584,7 +5599,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* build raid_map */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && - need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || + need_raid_map && + ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) || mirror_num > 1)) { u64 tmp; unsigned rot; @@ -5609,7 +5625,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, RAID6_Q_STRIPE; } - if (rw & REQ_DISCARD) { + if (op == REQ_OP_DISCARD) { u32 factor = 0; u32 sub_stripes = 0; u64 stripes_per_dev = 0; @@ -5689,14 +5705,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } - if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) + if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) max_errors = btrfs_chunk_max_errors(map); if (bbio->raid_map) sort_parity_stripes(bbio, num_stripes); tgtdev_indexes = 0; - if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && + if (dev_replace_is_ongoing && + (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) && dev_replace->tgtdev != NULL) { int index_where_to_add; u64 srcdev_devid = dev_replace->srcdev->devid; @@ -5731,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } num_stripes = index_where_to_add; - } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) && + } else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) && dev_replace->tgtdev != NULL) { u64 srcdev_devid = dev_replace->srcdev->devid; int index_srcdev = 0; @@ -5762,20 +5779,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } if (found) { - if (physical_of_found + map->stripe_len <= - dev_replace->cursor_left) { - struct btrfs_bio_stripe *tgtdev_stripe = - bbio->stripes + num_stripes; + struct btrfs_bio_stripe *tgtdev_stripe = + bbio->stripes + num_stripes; - tgtdev_stripe->physical = physical_of_found; - tgtdev_stripe->length = - bbio->stripes[index_srcdev].length; - tgtdev_stripe->dev = dev_replace->tgtdev; - bbio->tgtdev_map[index_srcdev] = num_stripes; + tgtdev_stripe->physical = physical_of_found; + tgtdev_stripe->length = + bbio->stripes[index_srcdev].length; + tgtdev_stripe->dev = dev_replace->tgtdev; + bbio->tgtdev_map[index_srcdev] = num_stripes; - tgtdev_indexes++; - num_stripes++; - } + tgtdev_indexes++; + num_stripes++; } } @@ -5806,21 +5820,21 @@ out: return ret; } -int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num) { - return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, + return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, mirror_num, 0); } /* For Scrub/replace */ -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map) { - return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, + return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, mirror_num, need_raid_map); } @@ -5934,7 +5948,7 @@ static void btrfs_end_bio(struct bio *bio) BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; if (dev->bdev) { - if (bio->bi_rw & WRITE) + if (bio_op(bio) == REQ_OP_WRITE) btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_WRITE_ERRS); else @@ -5988,7 +6002,7 @@ static void btrfs_end_bio(struct bio *bio) */ static noinline void btrfs_schedule_bio(struct btrfs_root *root, struct btrfs_device *device, - int rw, struct bio *bio) + struct bio *bio) { int should_queue = 1; struct btrfs_pending_bios *pending_bios; @@ -5999,9 +6013,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, } /* don't bother with additional async steps for reads, right now */ - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) == REQ_OP_READ) { bio_get(bio); - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); bio_put(bio); return; } @@ -6015,7 +6029,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, atomic_inc(&root->fs_info->nr_async_bios); WARN_ON(bio->bi_next); bio->bi_next = NULL; - bio->bi_rw |= rw; spin_lock(&device->io_lock); if (bio->bi_rw & REQ_SYNC) @@ -6041,7 +6054,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, struct bio *bio, u64 physical, int dev_nr, - int rw, int async) + int async) { struct btrfs_device *dev = bbio->stripes[dev_nr].dev; @@ -6055,8 +6068,8 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, rcu_read_lock(); name = rcu_dereference(dev->name); - pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " - "(%s id %llu), size=%u\n", rw, + pr_debug("btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu " + "(%s id %llu), size=%u\n", bio_op(bio), bio->bi_rw, (u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev, name->str, dev->devid, bio->bi_iter.bi_size); rcu_read_unlock(); @@ -6067,9 +6080,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, btrfs_bio_counter_inc_noblocked(root->fs_info); if (async) - btrfs_schedule_bio(root, dev, rw, bio); + btrfs_schedule_bio(root, dev, bio); else - btrfsic_submit_bio(rw, bio); + btrfsic_submit_bio(bio); } static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) @@ -6086,7 +6099,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) } } -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, +int btrfs_map_bio(struct btrfs_root *root, struct bio *bio, int mirror_num, int async_submit) { struct btrfs_device *dev; @@ -6103,8 +6116,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, map_length = length; btrfs_bio_counter_inc_blocked(root->fs_info); - ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, - mirror_num, 1); + ret = __btrfs_map_block(root->fs_info, bio_op(bio), logical, + &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(root->fs_info); return ret; @@ -6118,10 +6131,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, atomic_set(&bbio->stripes_pending, bbio->num_stripes); if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && - ((rw & WRITE) || (mirror_num > 1))) { + ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ - if (rw & WRITE) { + if (bio_op(bio) == REQ_OP_WRITE) { ret = raid56_parity_write(root, bio, bbio, map_length); } else { ret = raid56_parity_recover(root, bio, bbio, map_length, @@ -6140,7 +6153,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { dev = bbio->stripes[dev_nr].dev; - if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { + if (!dev || !dev->bdev || + (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) { bbio_error(bbio, first_bio, logical); continue; } @@ -6152,7 +6166,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, bio = first_bio; submit_stripe_bio(root, bbio, bio, - bbio->stripes[dev_nr].physical, dev_nr, rw, + bbio->stripes[dev_nr].physical, dev_nr, async_submit); } btrfs_bio_counter_dec(root->fs_info); @@ -6250,27 +6264,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } -static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, - struct extent_buffer *leaf, - struct btrfs_chunk *chunk) +/* Return -EIO if any error, otherwise return 0. */ +static int btrfs_check_chunk_valid(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) { - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - struct map_lookup *map; - struct extent_map *em; - u64 logical; u64 length; u64 stripe_len; - u64 devid; - u8 uuid[BTRFS_UUID_SIZE]; - int num_stripes; - int ret; - int i; + u16 num_stripes; + u16 sub_stripes; + u64 type; - logical = key->offset; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - /* Validation check */ + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + if (!num_stripes) { btrfs_err(root->fs_info, "invalid chunk num_stripes: %u", num_stripes); @@ -6281,6 +6291,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, "invalid chunk logical %llu", logical); return -EIO; } + if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) { + btrfs_err(root->fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } if (!length || !IS_ALIGNED(length, root->sectorsize)) { btrfs_err(root->fs_info, "invalid chunk length %llu", length); @@ -6292,13 +6307,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, return -EIO; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)) { + type) { btrfs_err(root->fs_info, "unrecognized chunk type: %llu", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); return -EIO; } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && + num_stripes != 1)) { + btrfs_err(root->fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct extent_map *em; + u64 logical; + u64 length; + u64 stripe_len; + u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; + int num_stripes; + int ret; + int i; + + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + + ret = btrfs_check_chunk_valid(root, leaf, chunk, logical); + if (ret) + return ret; read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); @@ -6546,6 +6602,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 array_size; u32 len = 0; u32 cur_offset; + u64 type; struct btrfs_key key; ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); @@ -6555,8 +6612,8 @@ int btrfs_read_sys_array(struct btrfs_root *root) * overallocate but we can keep it as-is, only the first page is used. */ sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); - if (!sb) - return -ENOMEM; + if (IS_ERR(sb)) + return PTR_ERR(sb); set_extent_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* @@ -6612,6 +6669,15 @@ int btrfs_read_sys_array(struct btrfs_root *root) break; } + type = btrfs_chunk_type(sb, chunk); + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { + btrfs_err(root->fs_info, + "invalid chunk type %llu in sys_array at offset %u", + type, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; @@ -6630,12 +6696,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_array_offset += len; cur_offset += len; } + clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return ret; out_short_read: printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n", len, cur_offset); + clear_extent_buffer_uptodate(sb); free_extent_buffer_stale(sb); return -EIO; } @@ -6648,6 +6716,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) struct btrfs_key found_key; int ret; int slot; + u64 total_dev = 0; root = root->fs_info->chunk_root; @@ -6689,6 +6758,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) ret = read_one_dev(root, leaf, dev_item); if (ret) goto error; + total_dev++; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -6698,6 +6768,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) } path->slots[0]++; } + + /* + * After loading chunk tree, we've got all device information, + * do another round of validation checks. + */ + if (total_dev != root->fs_info->fs_devices->total_devices) { + btrfs_err(root->fs_info, + "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_super_num_devices(root->fs_info->super_copy), + total_dev); + ret = -EINVAL; + goto error; + } + if (btrfs_super_total_bytes(root->fs_info->super_copy) < + root->fs_info->fs_devices->total_rw_bytes) { + btrfs_err(root->fs_info, + "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu", + btrfs_super_total_bytes(root->fs_info->super_copy), + root->fs_info->fs_devices->total_rw_bytes); + ret = -EINVAL; + goto error; + } ret = 0; error: unlock_chunks(root); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 0ac90f8d85bd..6613e6335ca2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -375,10 +375,10 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); void btrfs_get_bbio(struct btrfs_bio *bbio); void btrfs_put_bbio(struct btrfs_bio *bbio); -int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num); -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map); @@ -391,7 +391,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, +int btrfs_map_bio(struct btrfs_root *root, struct bio *bio, int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); diff --git a/fs/buffer.c b/fs/buffer.c index 754813a6962b..b9fa1be75e69 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -45,7 +45,7 @@ #include <trace/events/block.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); -static int submit_bh_wbc(int rw, struct buffer_head *bh, +static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, unsigned long bio_flags, struct writeback_control *wbc); @@ -153,7 +153,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - /* This happens, due to failed READA attempts. */ + /* This happens, due to failed read-ahead attempts. */ clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -588,7 +588,7 @@ void write_boundary_block(struct block_device *bdev, struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); put_bh(bh); } } @@ -1225,7 +1225,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) } else { get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -1395,7 +1395,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); if (likely(bh)) { - ll_rw_block(READA, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); brelse(bh); } } @@ -1687,7 +1687,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this * causes the writes to be flagged as synchronous writes. */ -static int __block_write_full_page(struct inode *inode, struct page *page, +int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler) { @@ -1697,7 +1697,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -1786,7 +1786,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(write_op, bh, 0, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc); nr_underway++; } bh = next; @@ -1840,7 +1840,7 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(write_op, bh, 0, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc); nr_underway++; } bh = next; @@ -1848,6 +1848,7 @@ recover: unlock_page(page); goto done; } +EXPORT_SYMBOL(__block_write_full_page); /* * If a page has any new buffers, zero them out here, and mark them uptodate @@ -1955,7 +1956,7 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh) && (block_start < from || block_end > to)) { - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); *wait_bh++=bh; } } @@ -2248,7 +2249,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) if (buffer_uptodate(bh)) end_buffer_async_read(bh, 1); else - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); } return 0; } @@ -2582,7 +2583,7 @@ int nobh_write_begin(struct address_space *mapping, if (block_start < from || block_end > to) { lock_buffer(bh); bh->b_end_io = end_buffer_read_nobh; - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); nr_reads++; } } @@ -2852,7 +2853,7 @@ int block_truncate_page(struct address_space *mapping, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { err = -EIO; - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) @@ -2949,7 +2950,7 @@ static void end_bio_bh_io_sync(struct bio *bio) * errors, this only handles the "we need to be able to * do IO at the final sector" case. */ -void guard_bio_eod(int rw, struct bio *bio) +void guard_bio_eod(int op, struct bio *bio) { sector_t maxsector; struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; @@ -2979,13 +2980,13 @@ void guard_bio_eod(int rw, struct bio *bio) bvec->bv_len -= truncated_bytes; /* ..and clear the end of the buffer for reads */ - if ((rw & RW_MASK) == READ) { + if (op == REQ_OP_READ) { zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len, truncated_bytes); } } -static int submit_bh_wbc(int rw, struct buffer_head *bh, +static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, unsigned long bio_flags, struct writeback_control *wbc) { struct bio *bio; @@ -2999,7 +3000,7 @@ static int submit_bh_wbc(int rw, struct buffer_head *bh, /* * Only clear out a write error when rewriting */ - if (test_set_buffer_req(bh) && (rw & WRITE)) + if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE)) clear_buffer_write_io_error(bh); /* @@ -3024,39 +3025,42 @@ static int submit_bh_wbc(int rw, struct buffer_head *bh, bio->bi_flags |= bio_flags; /* Take care of bh's that straddle the end of the device */ - guard_bio_eod(rw, bio); + guard_bio_eod(op, bio); if (buffer_meta(bh)) - rw |= REQ_META; + op_flags |= REQ_META; if (buffer_prio(bh)) - rw |= REQ_PRIO; + op_flags |= REQ_PRIO; + bio_set_op_attrs(bio, op, op_flags); - submit_bio(rw, bio); + submit_bio(bio); return 0; } -int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) +int _submit_bh(int op, int op_flags, struct buffer_head *bh, + unsigned long bio_flags) { - return submit_bh_wbc(rw, bh, bio_flags, NULL); + return submit_bh_wbc(op, op_flags, bh, bio_flags, NULL); } EXPORT_SYMBOL_GPL(_submit_bh); -int submit_bh(int rw, struct buffer_head *bh) +int submit_bh(int op, int op_flags, struct buffer_head *bh) { - return submit_bh_wbc(rw, bh, 0, NULL); + return submit_bh_wbc(op, op_flags, bh, 0, NULL); } EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @op: whether to %READ or %WRITE + * @op_flags: rq_flag_bits * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and - * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %READA option is described in the documentation for generic_make_request() - * which ll_rw_block() calls. + * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE. + * @op_flags contains flags modifying the detailed I/O behavior, most notably + * %REQ_RAHEAD. * * This function drops any buffer that it cannot get a lock on (with the * BH_Lock state bit), any buffer that appears to be clean when doing a write @@ -3072,7 +3076,7 @@ EXPORT_SYMBOL(submit_bh); * All of the buffers must be for the same device, and must also be a * multiple of the current approved size for the device. */ -void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) +void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[]) { int i; @@ -3081,18 +3085,18 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) if (!trylock_buffer(bh)) continue; - if (rw == WRITE) { + if (op == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE, bh); + submit_bh(op, op_flags, bh); continue; } } else { if (!buffer_uptodate(bh)) { bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(rw, bh); + submit_bh(op, op_flags, bh); continue; } } @@ -3101,7 +3105,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); -void write_dirty_buffer(struct buffer_head *bh, int rw) +void write_dirty_buffer(struct buffer_head *bh, int op_flags) { lock_buffer(bh); if (!test_clear_buffer_dirty(bh)) { @@ -3110,7 +3114,7 @@ void write_dirty_buffer(struct buffer_head *bh, int rw) } bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(rw, bh); + submit_bh(REQ_OP_WRITE, op_flags, bh); } EXPORT_SYMBOL(write_dirty_buffer); @@ -3119,7 +3123,7 @@ EXPORT_SYMBOL(write_dirty_buffer); * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int __sync_dirty_buffer(struct buffer_head *bh, int rw) +int __sync_dirty_buffer(struct buffer_head *bh, int op_flags) { int ret = 0; @@ -3128,7 +3132,7 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(rw, bh); + ret = submit_bh(REQ_OP_WRITE, op_flags, bh); wait_on_buffer(bh); if (!ret && !buffer_uptodate(bh)) ret = -EIO; @@ -3391,7 +3395,7 @@ int bh_submit_read(struct buffer_head *bh) get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return 0; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 861d611b8c05..ce5f345d70f5 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -380,7 +380,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) * check if the backing cache is updated to FS-Cache * - called by FS-Cache when evaluates if need to invalidate the cache */ -static bool cachefiles_check_consistency(struct fscache_operation *op) +static int cachefiles_check_consistency(struct fscache_operation *op) { struct cachefiles_object *object; struct cachefiles_cache *cache; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index eeb71e5de27a..26a9d10d75e9 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -276,8 +276,10 @@ static void finish_read(struct ceph_osd_request *req) for (i = 0; i < num_pages; i++) { struct page *page = osd_data->pages[i]; - if (rc < 0 && rc != -ENOENT) + if (rc < 0 && rc != -ENOENT) { + ceph_fscache_readpage_cancel(inode, page); goto unlock; + } if (bytes < (int)PAGE_SIZE) { /* zero (remainder of) page */ int s = bytes < 0 ? 0 : bytes; @@ -535,8 +537,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); - ceph_readpage_to_fscache(inode, page); - set_page_writeback(page); err = ceph_osdc_writepages(osdc, ceph_vino(inode), &ci->i_layout, snapc, diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index c052b5bf219b..238c55b01723 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -25,6 +25,7 @@ #include "cache.h" struct ceph_aux_inode { + u64 version; struct timespec mtime; loff_t size; }; @@ -69,15 +70,8 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc) fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, &ceph_fscache_fsid_object_def, fsc, true); - - if (fsc->fscache == NULL) { + if (!fsc->fscache) pr_err("Unable to resgister fsid: %p fscache cookie", fsc); - return 0; - } - - fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1); - if (fsc->revalidate_wq == NULL) - return -ENOMEM; return 0; } @@ -105,6 +99,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data, const struct inode* inode = &ci->vfs_inode; memset(&aux, 0, sizeof(aux)); + aux.version = ci->i_version; aux.mtime = inode->i_mtime; aux.size = i_size_read(inode); @@ -131,6 +126,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( return FSCACHE_CHECKAUX_OBSOLETE; memset(&aux, 0, sizeof(aux)); + aux.version = ci->i_version; aux.mtime = inode->i_mtime; aux.size = i_size_read(inode); @@ -181,32 +177,26 @@ static const struct fscache_cookie_def ceph_fscache_inode_object_def = { .now_uncached = ceph_fscache_inode_now_uncached, }; -void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, - struct ceph_inode_info* ci) +void ceph_fscache_register_inode_cookie(struct inode *inode) { - struct inode* inode = &ci->vfs_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); /* No caching for filesystem */ if (fsc->fscache == NULL) return; /* Only cache for regular files that are read only */ - if ((ci->vfs_inode.i_mode & S_IFREG) == 0) + if (!S_ISREG(inode->i_mode)) return; - /* Avoid multiple racing open requests */ - inode_lock(inode); - - if (ci->fscache) - goto done; - - ci->fscache = fscache_acquire_cookie(fsc->fscache, - &ceph_fscache_inode_object_def, - ci, true); - fscache_check_consistency(ci->fscache); -done: + inode_lock_nested(inode, I_MUTEX_CHILD); + if (!ci->fscache) { + ci->fscache = fscache_acquire_cookie(fsc->fscache, + &ceph_fscache_inode_object_def, + ci, false); + } inode_unlock(inode); - } void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) @@ -222,6 +212,34 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) fscache_relinquish_cookie(cookie, 0); } +static bool ceph_fscache_can_enable(void *data) +{ + struct inode *inode = data; + return !inode_is_open_for_write(inode); +} + +void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + + if (!fscache_cookie_valid(ci->fscache)) + return; + + if (inode_is_open_for_write(inode)) { + dout("fscache_file_set_cookie %p %p disabling cache\n", + inode, filp); + fscache_disable_cookie(ci->fscache, false); + fscache_uncache_all_inode_pages(ci->fscache, inode); + } else { + fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable, + inode); + if (fscache_cookie_enabled(ci->fscache)) { + dout("fscache_file_set_cookie %p %p enabing cache\n", + inode, filp); + } + } +} + static void ceph_vfs_readpage_complete(struct page *page, void *data, int error) { if (!error) @@ -238,8 +256,7 @@ static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int static inline bool cache_valid(struct ceph_inode_info *ci) { - return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) && - (ci->i_fscache_gen == ci->i_rdcache_gen)); + return ci->i_fscache_gen == ci->i_rdcache_gen; } @@ -332,69 +349,27 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc) { - if (fsc->revalidate_wq) - destroy_workqueue(fsc->revalidate_wq); - fscache_relinquish_cookie(fsc->fscache, 0); fsc->fscache = NULL; } -static void ceph_revalidate_work(struct work_struct *work) -{ - int issued; - u32 orig_gen; - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_revalidate_work); - struct inode *inode = &ci->vfs_inode; - - spin_lock(&ci->i_ceph_lock); - issued = __ceph_caps_issued(ci, NULL); - orig_gen = ci->i_rdcache_gen; - spin_unlock(&ci->i_ceph_lock); - - if (!(issued & CEPH_CAP_FILE_CACHE)) { - dout("revalidate_work lost cache before validation %p\n", - inode); - goto out; - } - - if (!fscache_check_consistency(ci->fscache)) - fscache_invalidate(ci->fscache); - - spin_lock(&ci->i_ceph_lock); - /* Update the new valid generation (backwards sanity check too) */ - if (orig_gen > ci->i_fscache_gen) { - ci->i_fscache_gen = orig_gen; - } - spin_unlock(&ci->i_ceph_lock); - -out: - iput(&ci->vfs_inode); -} - -void ceph_queue_revalidate(struct inode *inode) +/* + * caller should hold CEPH_CAP_FILE_{RD,CACHE} + */ +void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); - struct ceph_inode_info *ci = ceph_inode(inode); - - if (fsc->revalidate_wq == NULL || ci->fscache == NULL) + if (cache_valid(ci)) return; - ihold(inode); - - if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq, - &ci->i_revalidate_work)) { - dout("ceph_queue_revalidate %p\n", inode); - } else { - dout("ceph_queue_revalidate %p failed\n)", inode); - iput(inode); + /* resue i_truncate_mutex. There should be no pending + * truncate while the caller holds CEPH_CAP_FILE_RD */ + mutex_lock(&ci->i_truncate_mutex); + if (!cache_valid(ci)) { + if (fscache_check_consistency(ci->fscache)) + fscache_invalidate(ci->fscache); + spin_lock(&ci->i_ceph_lock); + ci->i_fscache_gen = ci->i_rdcache_gen; + spin_unlock(&ci->i_ceph_lock); } -} - -void ceph_fscache_inode_init(struct ceph_inode_info *ci) -{ - ci->fscache = NULL; - /* The first load is verifed cookie open time */ - ci->i_fscache_gen = 1; - INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work); + mutex_unlock(&ci->i_truncate_mutex); } diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index 5ac591bd012b..7e72c7594f0c 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -34,10 +34,10 @@ void ceph_fscache_unregister(void); int ceph_fscache_register_fs(struct ceph_fs_client* fsc); void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc); -void ceph_fscache_inode_init(struct ceph_inode_info *ci); -void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, - struct ceph_inode_info* ci); +void ceph_fscache_register_inode_cookie(struct inode *inode); void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci); +void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp); +void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci); int ceph_readpage_from_fscache(struct inode *inode, struct page *page); int ceph_readpages_from_fscache(struct inode *inode, @@ -46,12 +46,11 @@ int ceph_readpages_from_fscache(struct inode *inode, unsigned *nr_pages); void ceph_readpage_to_fscache(struct inode *inode, struct page *page); void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); -void ceph_queue_revalidate(struct inode *inode); -static inline void ceph_fscache_update_objectsize(struct inode *inode) +static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci) { - struct ceph_inode_info *ci = ceph_inode(inode); - fscache_attr_changed(ci->fscache); + ci->fscache = NULL; + ci->i_fscache_gen = 0; } static inline void ceph_fscache_invalidate(struct inode *inode) @@ -88,6 +87,11 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode, return fscache_readpages_cancel(ci->fscache, pages); } +static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci) +{ + ci->i_fscache_gen = ci->i_rdcache_gen - 1; +} + #else static inline int ceph_fscache_register(void) @@ -112,8 +116,20 @@ static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci) { } -static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc, - struct ceph_inode_info* ci) +static inline void ceph_fscache_register_inode_cookie(struct inode *inode) +{ +} + +static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) +{ +} + +static inline void ceph_fscache_file_set_cookie(struct inode *inode, + struct file *filp) +{ +} + +static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci) { } @@ -141,10 +157,6 @@ static inline void ceph_readpage_to_fscache(struct inode *inode, { } -static inline void ceph_fscache_update_objectsize(struct inode *inode) -{ -} - static inline void ceph_fscache_invalidate(struct inode *inode) { } @@ -154,10 +166,6 @@ static inline void ceph_invalidate_fscache_page(struct inode *inode, { } -static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci) -{ -} - static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp) { return 1; @@ -173,7 +181,7 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode, { } -static inline void ceph_queue_revalidate(struct inode *inode) +static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci) { } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c17b5d76d75e..6f60d0a3d0f9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2393,6 +2393,9 @@ again: snap_rwsem_locked = true; } *got = need | (have & want); + if ((need & CEPH_CAP_FILE_RD) && + !(*got & CEPH_CAP_FILE_CACHE)) + ceph_disable_fscache_readpage(ci); __take_cap_refs(ci, *got, true); ret = 1; } @@ -2554,6 +2557,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, break; } + if ((_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE)) + ceph_fscache_revalidate_cookie(ci); + *got = _got; return 0; } @@ -2795,7 +2801,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool writeback = false; bool queue_trunc = false; bool queue_invalidate = false; - bool queue_revalidate = false; bool deleted_inode = false; bool fill_inline = false; @@ -2837,8 +2842,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, ci->i_rdcache_revoking = ci->i_rdcache_gen; } } - - ceph_fscache_invalidate(inode); } /* side effects now are allowed */ @@ -2880,11 +2883,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } } - /* Do we need to revalidate our fscache cookie. Don't bother on the - * first cache cap as we already validate at cookie creation time. */ - if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) - queue_revalidate = true; - if (newcaps & CEPH_CAP_ANY_RD) { /* ctime/mtime/atime? */ ceph_decode_timespec(&mtime, &grant->mtime); @@ -2993,11 +2991,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, if (fill_inline) ceph_fill_inline_data(inode, NULL, inline_data, inline_len); - if (queue_trunc) { + if (queue_trunc) ceph_queue_vmtruncate(inode); - ceph_queue_revalidate(inode); - } else if (queue_revalidate) - ceph_queue_revalidate(inode); if (writeback) /* @@ -3199,10 +3194,8 @@ static void handle_cap_trunc(struct inode *inode, truncate_seq, truncate_size, size); spin_unlock(&ci->i_ceph_lock); - if (queue_trunc) { + if (queue_trunc) ceph_queue_vmtruncate(inode); - ceph_fscache_invalidate(inode); - } } /* diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 6e72c98162d5..1780218a48f0 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) } dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb, return ERR_PTR(-ENOENT); dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, dout("fh_to_parent %llx\n", cfh->parent_ino); dentry = __get_parent(sb, NULL, cfh->ino); - if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) + if (unlikely(dentry == ERR_PTR(-ENOENT))) dentry = __fh_to_dentry(sb, cfh->parent_ino); return dentry; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a888df6f2d71..0daaf7ceedc5 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -137,23 +137,11 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { struct ceph_file_info *cf; int ret = 0; - struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; switch (inode->i_mode & S_IFMT) { case S_IFREG: - /* First file open request creates the cookie, we want to keep - * this cookie around for the filetime of the inode as not to - * have to worry about fscache register / revoke / operation - * races. - * - * Also, if we know the operation is going to invalidate data - * (non readonly) just nuke the cache right away. - */ - ceph_fscache_register_inode_cookie(mdsc->fsc, ci); - if ((fmode & CEPH_FILE_MODE_WR)) - ceph_fscache_invalidate(inode); + ceph_fscache_register_inode_cookie(inode); + ceph_fscache_file_set_cookie(inode, file); case S_IFDIR: dout("init_file %p %p 0%o (regular)\n", inode, file, inode->i_mode); @@ -406,7 +394,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); - if (d_unhashed(dentry)) { + if (d_in_lookup(dentry)) { dn = ceph_finish_lookup(req, dentry, err); if (IS_ERR(dn)) err = PTR_ERR(dn); @@ -1349,7 +1337,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } retry_snap: - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; } @@ -1407,7 +1395,6 @@ retry_snap: iov_iter_advance(from, written); ceph_put_snap_context(snapc); } else { - loff_t old_size = i_size_read(inode); /* * No need to acquire the i_truncate_mutex. Because * the MDS revokes Fwb caps before sending truncate @@ -1418,8 +1405,6 @@ retry_snap: written = generic_perform_write(file, from, pos); if (likely(written >= 0)) iocb->ki_pos = pos + written; - if (i_size_read(inode) > old_size) - ceph_fscache_update_objectsize(inode); inode_unlock(inode); } @@ -1440,7 +1425,7 @@ retry_snap: ceph_put_cap_refs(ci, got); if (written >= 0) { - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); @@ -1672,8 +1657,8 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) && - !(mode & FALLOC_FL_PUNCH_HOLE)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && + !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; goto unlock; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0130a8592191..0168b49fb6ad 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -103,7 +103,6 @@ struct ceph_fs_client { #ifdef CONFIG_CEPH_FSCACHE struct fscache_cookie *fscache; - struct workqueue_struct *revalidate_wq; #endif }; @@ -360,8 +359,7 @@ struct ceph_inode_info { #ifdef CONFIG_CEPH_FSCACHE struct fscache_cookie *fscache; - u32 i_fscache_gen; /* sequence, for delayed fscache validate */ - struct work_struct i_revalidate_work; + u32 i_fscache_gen; #endif struct inode vfs_inode; /* at end */ }; diff --git a/fs/char_dev.c b/fs/char_dev.c index 687471dc04a0..6edd825231c5 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -92,7 +92,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, } if (i < CHRDEV_MAJOR_DYN_END) - pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range", + pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range\n", name, i); if (i == 0) { diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 5a53ac6b1e02..02b071bf3732 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_SLASH: *target = '\\'; break; + case SFM_SPACE: + *target = ' '; + break; + case SFM_PERIOD: + *target = '.'; + break; default: return false; } @@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char) return dest_char; } -static __le16 convert_to_sfm_char(char src_char) +static __le16 convert_to_sfm_char(char src_char, bool end_of_string) { __le16 dest_char; @@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char) case '|': dest_char = cpu_to_le16(SFM_PIPE); break; + case '.': + if (end_of_string) + dest_char = cpu_to_le16(SFM_PERIOD); + else + dest_char = 0; + break; + case ' ': + if (end_of_string) + dest_char = cpu_to_le16(SFM_SPACE); + else + dest_char = 0; + break; default: dest_char = 0; } @@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, /* see if we must remap this char */ if (map_chars == SFU_MAP_UNI_RSVD) dst_char = convert_to_sfu_char(src_char); - else if (map_chars == SFM_MAP_UNI_RSVD) - dst_char = convert_to_sfm_char(src_char); - else + else if (map_chars == SFM_MAP_UNI_RSVD) { + bool end_of_string; + + if (i == srclen - 1) + end_of_string = true; + else + end_of_string = false; + + dst_char = convert_to_sfm_char(src_char, end_of_string); + } else dst_char = 0; /* * FIXME: We can not handle remapping backslash (UNI_SLASH) diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index bdc52cb9a676..479bc0a941f3 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -64,6 +64,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) +#define SFM_PERIOD ((__u16) 0xF028) +#define SFM_SPACE ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5d8b7edf8a8f..5d841f39c4b7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +__u32 cifs_lock_secret; /* * Bumps refcount for cifs super block. @@ -1266,6 +1267,8 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); + get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret)); + if (cifs_max_pending < 2) { cifs_max_pending = 2; cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bba106cdc43c..8f1d8c1e72be 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 66736f57b5ab..7d2b15c06090 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work) * server->ops->need_neg() == true. Also, no need to ping if * we got a response recently. */ - if (!server->ops->need_neg || server->ops->need_neg(server) || + + if (server->tcpStatus == CifsNeedReconnect || + server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + echo_interval - HZ)) goto requeue_echo; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c3eb998a99bd..fb0903fffc22 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -445,7 +445,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, * Check for hashed negative dentry. We have already revalidated * the dentry and it is fine. No need to perform another lookup. */ - if (!d_unhashed(direntry)) + if (!d_in_lookup(direntry)) return -ENOENT; res = cifs_lookup(inode, direntry, 0); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9793ae0bcaa2..d4890b6dc22d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } +static __u32 +hash_lockowner(fl_owner_t owner) +{ + return cifs_lock_secret ^ hash32_ptr((const void *)owner); +} + struct lock_to_push { struct list_head llist; __u64 offset; @@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) else type = CIFS_WRLCK; lck = list_entry(el, struct lock_to_push, llist); - lck->pid = flock->fl_pid; + lck->pid = hash_lockowner(flock->fl_owner); lck->netfid = cfile->fid.netfid; lck->length = length; lck->type = type; @@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_RDLCK; else posix_lock_type = CIFS_WRLCK; - rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, + rc = CIFSSMBPosixLock(xid, tcon, netfid, + hash_lockowner(flock->fl_owner), flock->fl_start, length, flock, posix_lock_type, wait_flag); return rc; @@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_UNLCK; rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, - current->tgid, flock->fl_start, length, + hash_lockowner(flock->fl_owner), + flock->fl_start, length, NULL, posix_lock_type, wait_flag); goto out; } diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 848249fa120f..3079b38f0afb 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE { int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses); void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses); -int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen, +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index af0ec2d5ad0e..538d9b55699a 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, sec_blob->DomainName.MaximumLength = 0; } -/* We do not malloc the blob, it is passed in pbuffer, because its - maximum possible size is fixed and small, making this approach cleaner. - This function returns the length of the data in the blob */ -int build_ntlmssp_auth_blob(unsigned char *pbuffer, +static int size_of_ntlmssp_blob(struct cifs_ses *ses) +{ + int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len + - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2; + + if (ses->domainName) + sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + else + sz += 2; + + if (ses->user_name) + sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); + else + sz += 2; + + return sz; +} + +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp) { int rc; - AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; + AUTHENTICATE_MESSAGE *sec_blob; __u32 flags; unsigned char *tmp; + rc = setup_ntlmv2_rsp(ses, nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); + *buflen = 0; + goto setup_ntlmv2_ret; + } + *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL); + sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer; + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } - tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); + tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); sec_blob->LmChallengeResponse.BufferOffset = @@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.BufferOffset = + cpu_to_le32(tmp - *pbuffer); if (ses->user_name != NULL) { - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); - goto setup_ntlmv2_ret; - } memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE, ses->auth_key.len - CIFS_SESS_KEY_SIZE); tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE; @@ -423,23 +443,23 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } if (ses->domainName == NULL) { - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = 0; sec_blob->DomainName.MaximumLength = 0; tmp += 2; } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, - CIFS_MAX_USERNAME_LEN, nls_cp); + CIFS_MAX_DOMAINNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); tmp += len; } if (ses->user_name == NULL) { - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = 0; sec_blob->UserName.MaximumLength = 0; tmp += 2; @@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); tmp += len; } - sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->WorkstationName.Length = 0; sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; @@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) && !calc_seckey(ses)) { memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); sec_blob->SessionKey.MaximumLength = cpu_to_le16(CIFS_CPHTXT_SIZE); tmp += CIFS_CPHTXT_SIZE; } else { - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = 0; sec_blob->SessionKey.MaximumLength = 0; } + *buflen = tmp - *pbuffer; setup_ntlmv2_ret: - *buflen = tmp - pbuffer; return rc; } @@ -690,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data) rc = calc_lanman_hash(ses->password, ses->server->cryptkey, ses->server->sec_mode & SECMODE_PW_ENCRYPT ? true : false, lnm_session_key); + if (rc) + goto out; memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); bcc_ptr += CIFS_AUTH_RESP_SIZE; @@ -1266,7 +1288,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; __u16 bytes_remaining; char *bcc_ptr; - char *ntlmsspblob = NULL; + unsigned char *ntlmsspblob = NULL; u16 blob_len; cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n"); @@ -1279,19 +1301,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) /* Build security blob before we assemble the request */ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; smb_buf = (struct smb_hdr *)pSMB; - /* - * 5 is an empirical value, large enough to hold - * authenticate message plus max 10 of av paris, - * domain, user, workstation names, flags, etc. - */ - ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - rc = -ENOMEM; - goto out; - } - - rc = build_ntlmssp_auth_blob(ntlmsspblob, + rc = build_ntlmssp_auth_blob(&ntlmsspblob, &blob_len, ses, sess_data->nls_cp); if (rc) goto out_free_ntlmsspblob; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8f38e33d365b..29e06db5f187 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, u16 blob_length = 0; struct key *spnego_key = NULL; char *security_blob = NULL; - char *ntlmssp_blob = NULL; + unsigned char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ cifs_dbg(FYI, "Session Setup\n"); @@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_len = blob_length; } else if (phase == NtLmAuthenticate) { req->hdr.SessionId = ses->Suid; - ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, - GFP_KERNEL); - if (ntlmssp_blob == NULL) { - rc = -ENOMEM; - goto ssetup_exit; - } - rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses, + rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, nls_cp); if (rc) { cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", @@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); + if (server->tcpStatus == CifsNeedNegotiate) { + struct list_head *tmp, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + /* add check for persistent handle reconnect */ + if (tcon && tcon->need_reconnect) { + spin_unlock(&cifs_tcp_ses_lock); + rc = smb2_reconnect(SMB2_ECHO, tcon); + spin_lock(&cifs_tcp_ses_lock); + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + } + + /* if no session, renegotiate failed above */ + if (server->tcpStatus == CifsNeedNegotiate) + return -EIO; + rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 33b7ee34eda5..bbc1252a59f5 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf, len = simple_write_to_buffer(buffer->bin_buffer, buffer->bin_buffer_size, ppos, buf, count); - if (len > 0) - *ppos += len; out: mutex_unlock(&buffer->mutex); return len; diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab87e10a..281b768000e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 2fc8c43ce531..c502c116924c 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -318,6 +318,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, bio->bi_bdev = inode->i_sb->s_bdev; bio->bi_iter.bi_sector = pblk << (inode->i_sb->s_blocksize_bits - 9); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); ret = bio_add_page(bio, ciphertext_page, inode->i_sb->s_blocksize, 0); if (ret != inode->i_sb->s_blocksize) { @@ -327,7 +328,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, err = -EIO; goto errout; } - err = submit_bio_wait(WRITE, bio); + err = submit_bio_wait(bio); if ((err == 0) && bio->bi_error) err = -EIO; bio_put(bio); @@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, dax.addr += first; size = map_len - first; } - max = min(pos + size, end); + /* + * pos + size is one past the last offset for IO, + * so pos + size can overflow loff_t at extreme offsets. + * Cast to u64 to catch this and get the true minimum. + */ + max = min_t(u64, pos + size, end); } if (iov_iter_rw(iter) == WRITE) { diff --git a/fs/dcache.c b/fs/dcache.c index ad4a542e9bab..d6847d7b123d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +{ + struct dentry *next; + /* + * Inform d_walk() and shrink_dentry_list() that we are no longer + * attached to the dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (unlikely(list_empty(&dentry->d_child))) + return; + __list_del_entry(&dentry->d_child); + /* + * Cursors can move around the list of children. While we'd been + * a normal list member, it didn't matter - ->d_child.next would've + * been updated. However, from now on it won't be and for the + * things like d_walk() it might end up with a nasty surprise. + * Normally d_walk() doesn't care about cursors moving around - + * ->d_lock on parent prevents that and since a cursor has no children + * of its own, we get through it without ever unlocking the parent. + * There is one exception, though - if we ascend from a child that + * gets killed as soon as we unlock it, the next sibling is found + * using the value left in its ->d_child.next. And if _that_ + * pointed to a cursor, and cursor got moved (e.g. by lseek()) + * before d_walk() regains parent->d_lock, we'll end up skipping + * everything the cursor had been moved past. + * + * Solution: make sure that the pointer left behind in ->d_child.next + * points to something that won't be moving around. I.e. skip the + * cursors. + */ + while (dentry->d_child.next != &parent->d_subdirs) { + next = list_entry(dentry->d_child.next, struct dentry, d_child); + if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) + break; + dentry->d_child.next = next->d_child.next; + } +} + static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; @@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - __list_del_entry(&dentry->d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; + dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1203,6 +1236,9 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); @@ -1636,7 +1672,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_cursor(struct dentry * parent) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + if (dentry) { + dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; + dentry->d_parent = dget(parent); + } + return dentry; +} + /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock @@ -2358,7 +2404,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2458,7 +2503,6 @@ retry: rcu_read_unlock(); goto retry; } - rcu_read_unlock(); /* * No changes for the parent since the beginning of d_lookup(). * Since all removals from the chain happen with hlist_bl_lock(), @@ -2471,8 +2515,6 @@ retry: continue; if (dentry->d_parent != parent) continue; - if (d_unhashed(dentry)) - continue; if (parent->d_flags & DCACHE_OP_COMPARE) { int tlen = dentry->d_name.len; const char *tname = dentry->d_name.name; @@ -2484,9 +2526,18 @@ retry: if (dentry_cmp(dentry, str, len)) continue; } - dget(dentry); hlist_bl_unlock(b); - /* somebody is doing lookup for it right now; wait for it */ + /* now we can try to grab a reference */ + if (!lockref_get_not_dead(&dentry->d_lockref)) { + rcu_read_unlock(); + goto retry; + } + + rcu_read_unlock(); + /* + * somebody is likely to be still doing lookup for it; + * wait for them to finish + */ spin_lock(&dentry->d_lock); d_wait_lookup(dentry); /* @@ -2517,6 +2568,7 @@ retry: dput(new); return dentry; } + rcu_read_unlock(); /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; @@ -2843,6 +2895,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 9c1c9a01b7e5..592059f88e04 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -127,7 +127,6 @@ static int open_proxy_open(struct inode *inode, struct file *filp) r = real_fops->open(inode, filp); out: - fops_put(real_fops); debugfs_use_file_finish(srcu_idx); return r; } @@ -262,8 +261,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp) if (real_fops->open) { r = real_fops->open(inode, filp); - - if (filp->f_op != proxy_fops) { + if (r) { + replace_fops(filp, d_inode(dentry)->i_fop); + goto free_proxy; + } else if (filp->f_op != proxy_fops) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 0b2954d7172d..37c134a132c7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = { static DEFINE_MUTEX(allocated_ptys_lock); -static struct vfsmount *devpts_mnt; - struct pts_mount_opts { int setuid; int setgid; @@ -104,7 +102,7 @@ struct pts_mount_opts { kgid_t gid; umode_t mode; umode_t ptmxmode; - int newinstance; + int reserve; int max; }; @@ -117,11 +115,9 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES {Opt_ptmxmode, "ptmxmode=%o"}, {Opt_newinstance, "newinstance"}, {Opt_max, "max=%d"}, -#endif {Opt_err, NULL} }; @@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -static inline struct super_block *pts_sb_from_inode(struct inode *inode) +struct pts_fs_info *devpts_acquire(struct file *filp) { -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES - if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - return inode->i_sb; -#endif - if (!devpts_mnt) - return NULL; - return devpts_mnt->mnt_sb; + struct pts_fs_info *result; + struct path path; + struct super_block *sb; + int err; + + path = filp->f_path; + path_get(&path); + + /* Has the devpts filesystem already been found? */ + sb = path.mnt->mnt_sb; + if (sb->s_magic != DEVPTS_SUPER_MAGIC) { + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(&path); + if (err) { + result = ERR_PTR(err); + goto out; + } + + /* Is the path the root of a devpts filesystem? */ + result = ERR_PTR(-ENODEV); + sb = path.mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path.mnt->mnt_root != sb->s_root)) + goto out; + } + + /* + * pty code needs to hold extra references in case of last /dev/tty close + */ + atomic_inc(&sb->s_active); + result = DEVPTS_SB(sb); + +out: + path_put(&path); + return result; +} + +void devpts_release(struct pts_fs_info *fsi) +{ + deactivate_super(fsi->sb); } #define PARSE_MOUNT 0 @@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) /* * parse_mount_options(): * Set @opts to mount options specified in @data. If an option is not - * specified in @data, set it to its default value. The exception is - * 'newinstance' option which can only be set/cleared on a mount (i.e. - * cannot be changed during remount). + * specified in @data, set it to its default value. * * Note: @data may be NULL (in which case all options are set to default). */ @@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; opts->max = NR_UNIX98_PTY_MAX; - /* newinstance makes sense only on initial mount */ + /* Only allow instances mounted from the initial mount + * namespace to tap the reserve pool of ptys. + */ if (op == PARSE_MOUNT) - opts->newinstance = 0; + opts->reserve = + (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns); while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->mode = option & S_IALLUGO; break; -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES case Opt_ptmxmode: if (match_octal(&args[0], &option)) return -EINVAL; opts->ptmxmode = option & S_IALLUGO; break; case Opt_newinstance: - /* newinstance makes sense only on initial mount */ - if (op == PARSE_MOUNT) - opts->newinstance = 1; break; case Opt_max: if (match_int(&args[0], &option) || @@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->max = option; break; -#endif default: pr_err("called with bogus options\n"); return -EINVAL; @@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return 0; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES static int mknod_ptmx(struct super_block *sb) { int mode; @@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi) inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; } } -#else -static inline void update_ptmx_mode(struct pts_fs_info *fsi) -{ - return; -} -#endif static int devpts_remount(struct super_block *sb, int *flags, char *data) { @@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); seq_printf(seq, ",mode=%03o", opts->mode); -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); if (opts->max < NR_UNIX98_PTY_MAX) seq_printf(seq, ",max=%d", opts->max); -#endif return 0; } @@ -410,40 +426,11 @@ fail: return -ENOMEM; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES -static int compare_init_pts_sb(struct super_block *s, void *p) -{ - if (devpts_mnt) - return devpts_mnt->mnt_sb == s; - return 0; -} - /* * devpts_mount() * - * If the '-o newinstance' mount option was specified, mount a new - * (private) instance of devpts. PTYs created in this instance are - * independent of the PTYs in other devpts instances. - * - * If the '-o newinstance' option was not specified, mount/remount the - * initial kernel mount of devpts. This type of mount gives the - * legacy, single-instance semantics. - * - * The 'newinstance' option is needed to support multiple namespace - * semantics in devpts while preserving backward compatibility of the - * current 'single-namespace' semantics. i.e all mounts of devpts - * without the 'newinstance' mount option should bind to the initial - * kernel mount, like mount_single(). - * - * Mounts with 'newinstance' option create a new, private namespace. - * - * NOTE: - * - * For single-mount semantics, devpts cannot use mount_single(), - * because mount_single()/sget() find and use the super-block from - * the most recent mount of devpts. But that recent mount may be a - * 'newinstance' mount and mount_single() would pick the newinstance - * super-block instead of the initial super-block. + * Mount a new (private) instance of devpts. PTYs created in this + * instance are independent of the PTYs in other devpts instances. */ static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, if (error) return ERR_PTR(error); - /* Require newinstance for all user namespace mounts to ensure - * the mount options are not changed. - */ - if ((current_user_ns() != &init_user_ns) && !opts.newinstance) - return ERR_PTR(-EINVAL); - - if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, flags, NULL); - else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, - NULL); - + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -491,18 +467,6 @@ out_undo_sget: return ERR_PTR(error); } -#else -/* - * This supports only the legacy single-instance semantics (no - * multiple-instance semantics) - */ -static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_single(fs_type, flags, data, devpts_fill_super); -} -#endif - static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); @@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = { .name = "devpts", .mount = devpts_mount, .kill_sb = devpts_kill_sb, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, -#endif }; /* @@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi) int index; int ida_ret; - if (!fsi) - return -ENODEV; - retry: if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; mutex_lock(&allocated_ptys_lock); - if (pty_count >= pty_limit - - (fsi->mount_opts.newinstance ? pty_reserve : 0)) { + if (pty_count >= (pty_limit - + (fsi->mount_opts.reserve ? 0 : pty_reserve))) { mutex_unlock(&allocated_ptys_lock); return -ENOSPC; } @@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx) mutex_unlock(&allocated_ptys_lock); } -/* - * pty code needs to hold extra references in case of last /dev/tty close - */ -struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file) -{ - struct super_block *sb; - struct pts_fs_info *fsi; - - sb = pts_sb_from_inode(ptmx_inode); - if (!sb) - return NULL; - fsi = DEVPTS_SB(sb); - if (!fsi) - return NULL; - - atomic_inc(&sb->s_active); - return fsi; -} - -void devpts_put_ref(struct pts_fs_info *fsi) -{ - deactivate_super(fsi->sb); -} - /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master @@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi) struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) { struct dentry *dentry; - struct super_block *sb; + struct super_block *sb = fsi->sb; struct inode *inode; struct dentry *root; struct pts_mount_opts *opts; char s[12]; - if (!fsi) - return ERR_PTR(-ENODEV); - - sb = fsi->sb; root = sb->s_root; opts = &fsi->mount_opts; @@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry) static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); - struct ctl_table_header *table; - if (!err) { - struct vfsmount *mnt; - - table = register_sysctl_table(pty_root_table); - mnt = kern_mount(&devpts_fs_type); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); - unregister_filesystem(&devpts_fs_type); - unregister_sysctl_table(table); - } else { - devpts_mnt = mnt; - } + register_sysctl_table(pty_root_table); } return err; } diff --git a/fs/direct-io.c b/fs/direct-io.c index f3b4408be590..7c3ce73cb617 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -108,7 +108,8 @@ struct dio_submit { /* dio_state communicated between submission path and end_io */ struct dio { int flags; /* doesn't change */ - int rw; + int op; + int op_flags; blk_qc_t bio_cookie; struct block_device *bio_bdev; struct inode *inode; @@ -163,7 +164,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, &sdio->from); - if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { + if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) { struct page *page = ZERO_PAGE(0); /* * A memory fault, but the filesystem has some outstanding @@ -242,7 +243,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) transferred = dio->result; /* Check for short read case */ - if ((dio->rw == READ) && ((offset + transferred) > dio->i_size)) + if ((dio->op == REQ_OP_READ) && + ((offset + transferred) > dio->i_size)) transferred = dio->i_size - offset; } @@ -273,7 +275,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) */ dio->iocb->ki_pos += transferred; - if (dio->rw & WRITE) + if (dio->op == REQ_OP_WRITE) ret = generic_write_sync(dio->iocb, transferred); dio->iocb->ki_complete(dio->iocb, ret, 0); } @@ -375,6 +377,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, bio->bi_bdev = bdev; bio->bi_iter.bi_sector = first_sector; + bio_set_op_attrs(bio, dio->op, dio->op_flags); if (dio->is_async) bio->bi_end_io = dio_bio_end_aio; else @@ -402,17 +405,16 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) dio->refcount++; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (dio->is_async && dio->rw == READ && dio->should_dirty) + if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) bio_set_pages_dirty(bio); dio->bio_bdev = bio->bi_bdev; if (sdio->submit_io) { - sdio->submit_io(dio->rw, bio, dio->inode, - sdio->logical_offset_in_bio); + sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); dio->bio_cookie = BLK_QC_T_NONE; } else - dio->bio_cookie = submit_bio(dio->rw, bio); + dio->bio_cookie = submit_bio(bio); sdio->bio = NULL; sdio->boundary = 0; @@ -478,14 +480,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) if (bio->bi_error) dio->io_error = -EIO; - if (dio->is_async && dio->rw == READ && dio->should_dirty) { + if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { err = bio->bi_error; bio_check_pages_dirty(bio); /* transfers ownership */ } else { bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; - if (dio->rw == READ && !PageCompound(page) && + if (dio->op == REQ_OP_READ && !PageCompound(page) && dio->should_dirty) set_page_dirty_lock(page); put_page(page); @@ -638,7 +640,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, * which may decide to handle it or also return an unmapped * buffer head. */ - create = dio->rw & WRITE; + create = dio->op == REQ_OP_WRITE; if (dio->flags & DIO_SKIP_HOLES) { if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> i_blkbits)) @@ -788,7 +790,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, { int ret = 0; - if (dio->rw & WRITE) { + if (dio->op == REQ_OP_WRITE) { /* * Read accounting is performed in submit_bio() */ @@ -988,7 +990,7 @@ do_holes: loff_t i_size_aligned; /* AKPM: eargh, -ENOTBLK is a hack */ - if (dio->rw & WRITE) { + if (dio->op == REQ_OP_WRITE) { put_page(page); return -ENOTBLK; } @@ -1202,7 +1204,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio->is_async = true; dio->inode = inode; - dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ; + if (iov_iter_rw(iter) == WRITE) { + dio->op = REQ_OP_WRITE; + dio->op_flags = WRITE_ODIRECT; + } else { + dio->op = REQ_OP_READ; + } /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 0d8eb3455b34..e5e29f8c920b 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -45,7 +45,7 @@ * ecryptfs_to_hex * @dst: Buffer to take hex character representation of contents of * src; must be at least of size (src_size * 2) - * @src: Buffer to be converted to a hex string respresentation + * @src: Buffer to be converted to a hex string representation * @src_size: number of bytes to convert */ void ecryptfs_to_hex(char *dst, char *src, size_t src_size) @@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size) * ecryptfs_from_hex * @dst: Buffer to take the bytes from src hex; must be at least of * size (src_size / 2) - * @src: Buffer to be converted from a hex string respresentation to raw value + * @src: Buffer to be converted from a hex string representation to raw value * @dst_size: size of dst buffer, or number of hex characters pairs to convert */ void ecryptfs_from_hex(char *dst, char *src, int dst_size) @@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem { }; /* Add support for additional ciphers by adding elements here. The - * cipher_code is whatever OpenPGP applicatoins use to identify the + * cipher_code is whatever OpenPGP applications use to identify the * ciphers. List in order of probability. */ static struct ecryptfs_cipher_code_str_map_elem ecryptfs_cipher_code_str_map[] = { @@ -1410,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, * * Common entry point for reading file metadata. From here, we could * retrieve the header information from the header region of the file, - * the xattr region of the file, or some other repostory that is + * the xattr region of the file, or some other repository that is * stored separately from the file itself. The current implementation * supports retrieving the metadata information from the file contents * and from the xattr region. diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 7000b96b783e..ca4e83750214 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -169,9 +169,22 @@ out: return rc; } +static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + /* + * Don't allow mmap on top of file systems that don't support it + * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs + * allows recursive mounting, this will need to be extended. + */ + if (!lower_file->f_op->mmap) + return -ENODEV; + return generic_file_mmap(file, vma); +} + /** * ecryptfs_open - * @inode: inode speciying file to open + * @inode: inode specifying file to open * @file: Structure to return filled in * * Opens the file specified by inode. @@ -240,7 +253,7 @@ out: /** * ecryptfs_dir_open - * @inode: inode speciying file to open + * @inode: inode specifying file to open * @file: Structure to return filled in * * Opens the file specified by inode. @@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = ecryptfs_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1698132d0e57..612004495141 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void) struct ecryptfs_cache_info *info; info = &ecryptfs_cache_infos[i]; - if (*(info->cache)) - kmem_cache_destroy(*(info->cache)); + kmem_cache_destroy(*(info->cache)); } } diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 7bd8ac8dfb28..8bb72807e70d 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -878,7 +878,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) } else { bio = master_dev->bio; /* FIXME: bio_set_dir() */ - bio->bi_rw |= REQ_WRITE; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); } osd_req_write(or, _ios_obj(ios, cur_comp), diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 799a92bdf577..e04ec868e37e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -473,7 +473,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) trace_ext4_read_block_bitmap_load(sb, block_group); bh->b_end_io = ext4_end_bitmap_read; get_bh(bh); - submit_bh(READ | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); return bh; verify: err = ext4_validate_block_bitmap(sb, desc, block_group, bh); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 35f351895b89..9e66cd1d7b78 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -214,7 +214,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) trace_ext4_load_inode_bitmap(sb, block_group); bh->b_end_io = ext4_end_bitmap_read; get_bh(bh); - submit_bh(READ | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { put_bh(bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5a6277d80f7c..3131747199e1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -987,7 +987,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return bh; if (!bh || buffer_uptodate(bh)) return bh; - ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -1141,7 +1141,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh) && (block_start < from || block_end > to)) { - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); *wait_bh++ = bh; decrypt = ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode); @@ -3727,7 +3727,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (!buffer_uptodate(bh)) { err = -EIO; - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) @@ -4310,7 +4310,7 @@ make_io: trace_ext4_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 23d436d6f8b8..d89754ef1aab 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) @@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, get_bh(*bh); lock_buffer(*bh); (*bh)->b_end_io = end_buffer_read_sync; - submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh); + submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { ret = -EIO; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4637c439ca54..34c0142caf6a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1447,7 +1447,8 @@ restart: } bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ | REQ_META | REQ_PRIO, + ll_rw_block(REQ_OP_READ, + REQ_META | REQ_PRIO, 1, &bh); } } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5ad05af51dd8..a6132a730967 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -339,9 +339,10 @@ void ext4_io_submit(struct ext4_io_submit *io) struct bio *bio = io->io_bio; if (bio) { - int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : WRITE; - submit_bio(io_op, io->io_bio); + int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC : 0; + bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); + submit_bio(io->io_bio); } io->io_bio = NULL; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 18b2cf23d40f..bfc7f4d30643 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -235,7 +235,7 @@ int ext4_mpage_readpages(struct address_space *mapping, */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: - submit_bio(READ, bio); + submit_bio(bio); bio = NULL; } if (bio == NULL) { @@ -258,6 +258,7 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; + bio_set_op_attrs(bio, REQ_OP_READ, 0); } length = first_hole << blkbits; @@ -267,14 +268,14 @@ int ext4_mpage_readpages(struct address_space *mapping, if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { - submit_bio(READ, bio); + submit_bio(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { - submit_bio(READ, bio); + submit_bio(bio); bio = NULL; } if (!PageUptodate(page)) @@ -287,6 +288,6 @@ int ext4_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - submit_bio(READ, bio); + submit_bio(bio); return 0; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c13a4e464738..1c593aa0218e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4307,7 +4307,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, goto out_bdev; } journal->j_private = sb; - ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); wait_on_buffer(journal->j_sb_buffer); if (!buffer_uptodate(journal->j_sb_buffer)) { ext4_msg(sb, KERN_ERR, "I/O error on journal device"); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 389160049993..124b4a3017b5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -63,14 +63,15 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, struct f2fs_io_info fio = { .sbi = sbi, .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO, + .op = REQ_OP_READ, + .op_flags = READ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, }; if (unlikely(!is_meta)) - fio.rw &= ~REQ_META; + fio.op_flags &= ~REQ_META; repeat: page = f2fs_grab_cache_page(mapping, index, false); if (!page) { @@ -157,13 +158,14 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, struct f2fs_io_info fio = { .sbi = sbi, .type = META, - .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA, + .op = REQ_OP_READ, + .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, }; struct blk_plug plug; if (unlikely(type == META_POR)) - fio.rw &= ~REQ_META; + fio.op_flags &= ~REQ_META; blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a8bbc1fb1fa..8769e8349dff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -97,12 +97,11 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, return bio; } -static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw, - struct bio *bio) +static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio) { - if (!is_read_io(rw)) + if (!is_read_io(bio_op(bio))) atomic_inc(&sbi->nr_wb_bios); - submit_bio(rw, bio); + submit_bio(bio); } static void __submit_merged_bio(struct f2fs_bio_info *io) @@ -112,12 +111,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; - if (is_read_io(fio->rw)) + if (is_read_io(fio->op)) trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); else trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); - __submit_bio(io->sbi, fio->rw, io->bio); + bio_set_op_attrs(io->bio, fio->op, fio->op_flags); + + __submit_bio(io->sbi, io->bio); io->bio = NULL; } @@ -183,10 +184,12 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; + io->fio.op = REQ_OP_WRITE; if (test_opt(sbi, NOBARRIER)) - io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO; + io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO; else - io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; + io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META | + REQ_PRIO; } __submit_merged_bio(io); out: @@ -228,14 +231,16 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw)); + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op)); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; } + bio->bi_rw = fio->op_flags; + bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, fio->rw, bio); + __submit_bio(fio->sbi, bio); return 0; } @@ -244,7 +249,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; - bool is_read = is_read_io(fio->rw); + bool is_read = is_read_io(fio->op); struct page *bio_page; io = is_read ? &sbi->read_io : &sbi->write_io[btype]; @@ -256,7 +261,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) down_write(&io->io_rwsem); if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || - io->fio.rw != fio->rw)) + (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { @@ -390,7 +395,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) } struct page *get_read_data_page(struct inode *inode, pgoff_t index, - int rw, bool for_write) + int op_flags, bool for_write) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; @@ -400,7 +405,8 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, - .rw = rw, + .op = REQ_OP_READ, + .op_flags = op_flags, .encrypted_page = NULL, }; @@ -1051,7 +1057,7 @@ got_it: */ if (bio && (last_block_in_bio != block_nr - 1)) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), READ, bio); + __submit_bio(F2FS_I_SB(inode), bio); bio = NULL; } if (bio == NULL) { @@ -1080,6 +1086,7 @@ submit_and_realloc: bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); bio->bi_end_io = f2fs_read_end_io; bio->bi_private = ctx; + bio_set_op_attrs(bio, REQ_OP_READ, 0); } if (bio_add_page(bio, page, blocksize, 0) < blocksize) @@ -1094,7 +1101,7 @@ set_error_page: goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), READ, bio); + __submit_bio(F2FS_I_SB(inode), bio); bio = NULL; } unlock_page(page); @@ -1104,7 +1111,7 @@ next_page: } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), READ, bio); + __submit_bio(F2FS_I_SB(inode), bio); return 0; } @@ -1221,7 +1228,8 @@ static int f2fs_write_data_page(struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .op = REQ_OP_WRITE, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, .page = page, .encrypted_page = NULL, }; @@ -1662,7 +1670,8 @@ repeat: struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, - .rw = READ_SYNC, + .op = REQ_OP_READ, + .op_flags = READ_SYNC, .old_blkaddr = blkaddr, .new_blkaddr = blkaddr, .page = page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 916e7c238e3d..23ae6a81ccd6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -686,14 +686,15 @@ enum page_type { struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ - int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ + int op; /* contains REQ_OP_ */ + int op_flags; /* rq_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ }; -#define is_read_io(rw) (((rw) & 1) == READ) +#define is_read_io(rw) (rw == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ struct bio *bio; /* bios to merge */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 38d56f678912..f06ed73adf99 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -538,7 +538,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, - .rw = READ_SYNC, + .op = REQ_OP_READ, + .op_flags = READ_SYNC, .encrypted_page = NULL, }; struct dnode_of_data dn; @@ -612,7 +613,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) /* allocate block address */ f2fs_wait_on_page_writeback(dn.node_page, NODE, true); - fio.rw = WRITE_SYNC; + fio.op = REQ_OP_WRITE; + fio.op_flags = WRITE_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_mbio(&fio); @@ -649,7 +651,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, - .rw = WRITE_SYNC, + .op = REQ_OP_WRITE, + .op_flags = WRITE_SYNC, .page = page, .encrypted_page = NULL, }; @@ -730,7 +733,8 @@ next_step: start_bidx = start_bidx_of_node(nofs, inode); data_page = get_read_data_page(inode, - start_bidx + ofs_in_node, READA, true); + start_bidx + ofs_in_node, REQ_RAHEAD, + true); if (IS_ERR(data_page)) { iput(inode); continue; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a4bb155dd00a..c15e53c1d794 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -108,7 +108,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), .type = DATA, - .rw = WRITE_SYNC | REQ_PRIO, + .op = REQ_OP_WRITE, + .op_flags = WRITE_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1f21aae80c40..d1867698e601 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1070,14 +1070,15 @@ fail: * 0: f2fs_put_page(page, 0) * LOCKED_PAGE or error: f2fs_put_page(page, 1) */ -static int read_node_page(struct page *page, int rw) +static int read_node_page(struct page *page, int op_flags) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, .type = NODE, - .rw = rw, + .op = REQ_OP_READ, + .op_flags = op_flags, .page = page, .encrypted_page = NULL, }; @@ -1118,7 +1119,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (!apage) return; - err = read_node_page(apage, READA); + err = read_node_page(apage, REQ_RAHEAD); f2fs_put_page(apage, err ? 1 : 0); } @@ -1568,7 +1569,8 @@ static int f2fs_write_node_page(struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = NODE, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .op = REQ_OP_WRITE, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e6f537a0e7d..4c2d1fa1e0e2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -257,7 +257,8 @@ static int __commit_inmem_pages(struct inode *inode, struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, - .rw = WRITE_SYNC | REQ_PRIO, + .op = REQ_OP_WRITE, + .op_flags = WRITE_SYNC | REQ_PRIO, .encrypted_page = NULL, }; bool submit_bio = false; @@ -406,7 +407,8 @@ repeat: fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); bio->bi_bdev = sbi->sb->s_bdev; - ret = submit_bio_wait(WRITE_FLUSH, bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + ret = submit_bio_wait(bio); llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { @@ -438,7 +440,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) int ret; bio->bi_bdev = sbi->sb->s_bdev; - ret = submit_bio_wait(WRITE_FLUSH, bio); + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + ret = submit_bio_wait(bio); bio_put(bio); return ret; } @@ -1401,7 +1404,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_io_info fio = { .sbi = sbi, .type = META, - .rw = WRITE_SYNC | REQ_META | REQ_PRIO, + .op = REQ_OP_WRITE, + .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, .new_blkaddr = page->index, .page = page, @@ -1409,7 +1413,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) - fio.rw &= ~REQ_META; + fio.op_flags &= ~REQ_META; set_page_writeback(page); f2fs_submit_page_mbio(&fio); diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 562ce0821559..73b4e1d1912a 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -25,11 +25,11 @@ static inline void __print_last_io(void) if (!last_io.len) return; - trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n", + trace_printk("%3x:%3x %4x %-16s %2x %5x %5x %12x %4x\n", last_io.major, last_io.minor, last_io.pid, "----------------", last_io.type, - last_io.fio.rw, + last_io.fio.op, last_io.fio.op_flags, last_io.fio.new_blkaddr, last_io.len); memset(&last_io, 0, sizeof(last_io)); @@ -101,7 +101,8 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) if (last_io.major == major && last_io.minor == minor && last_io.pid == pid && last_io.type == __file_type(inode, pid) && - last_io.fio.rw == fio->rw && + last_io.fio.op == fio->op && + last_io.fio.op_flags == fio->op_flags && last_io.fio.new_blkaddr + last_io.len == fio->new_blkaddr) { last_io.len++; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index c4589e981760..8a8698119ff7 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -267,7 +267,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) int i, err = 0; for (i = 0; i < nr_bhs; i++) - write_dirty_buffer(bhs[i], WRITE); + write_dirty_buffer(bhs[i], 0); for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 989a2cef6b76..fe7e83a45eff 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) goto out_free; } inode->i_state |= I_WB_SWITCH; + __iget(inode); spin_unlock(&inode->i_lock); - ihold(inode); isw->inode = inode; atomic_inc(&isw_nr_in_flight); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3078b679fcd1..c8c4f79c7ce1 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -887,6 +887,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) put_page(results[i]); } + wake_up_bit(&cookie->flags, 0); + _leave(""); } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ccd4971cc6c1..cca7b048c07b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct dentry *newent; bool outarg_valid = true; + fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); + fuse_unlock_inode(dir); if (err == -ENOENT) { outarg_valid = false; err = 0; @@ -478,7 +480,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; - if (d_unhashed(entry)) { + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) return PTR_ERR(res); @@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, FUSE_READDIR); } + fuse_lock_inode(inode); fuse_request_send(fc, req); + fuse_unlock_inode(inode); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index eddbe02c4028..929c383432b0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -110,6 +110,9 @@ struct fuse_inode { /** Miscellaneous bits describing inode state */ unsigned long state; + + /** Lock for serializing lookup and readdir for back compatibility*/ + struct mutex mutex; }; /** FUSE inode state bits */ @@ -540,6 +543,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** allow parallel lookups and readdir (default is serialized) */ + unsigned parallel_dirops:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); +void fuse_unlock_inode(struct inode *inode); +void fuse_lock_inode(struct inode *inode); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ce67668a8e1..9961d8432ce3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); + mutex_init(&fi->mutex); fi->forget = fuse_alloc_forget(); if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); @@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); BUG_ON(!list_empty(&fi->queued_writes)); + mutex_destroy(&fi->mutex); kfree(fi->forget); call_rcu(&inode->i_rcu, fuse_i_callback); } @@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, return 0; } +void fuse_lock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_lock(&get_fuse_inode(inode)->mutex); +} + +void fuse_unlock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_unlock(&get_fuse_inode(inode)->mutex); +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PARALLEL_DIROPS) + fc->parallel_dirops = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { @@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | + FUSE_PARALLEL_DIROPS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 37b7bc14c8da..82df36886938 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -140,6 +140,32 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) return nobh_writepage(page, gfs2_get_block_noalloc, wbc); } +/* This is the same as calling block_write_full_page, but it also + * writes pages outside of i_size + */ +int gfs2_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) +{ + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_SHIFT; + unsigned offset; + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + offset = i_size & (PAGE_SIZE-1); + if (page->index == end_index && offset) + zero_user_segment(page, offset, PAGE_SIZE); + + return __block_write_full_page(inode, page, get_block, wbc, + end_buffer_async_write); +} + /** * __gfs2_jdata_writepage - The core of jdata writepage * @page: The page to write @@ -165,7 +191,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w } gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); } - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); + return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc); } /** @@ -180,27 +206,20 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); int ret; - int done_trans = 0; - if (PageChecked(page)) { - if (wbc->sync_mode != WB_SYNC_ALL) - goto out_ignore; - ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); - if (ret) - goto out_ignore; - done_trans = 1; - } - ret = gfs2_writepage_common(page, wbc); - if (ret > 0) - ret = __gfs2_jdata_writepage(page, wbc); - if (done_trans) - gfs2_trans_end(sdp); + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) + goto out; + if (PageChecked(page) || current->journal_info) + goto out_ignore; + ret = __gfs2_jdata_writepage(page, wbc); return ret; out_ignore: redirty_page_for_writepage(wbc, page); +out: unlock_page(page); return 0; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 24ce1cdd434a..6e2bec1cd289 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -285,7 +285,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; - submit_bh(READA | REQ_META, rabh); + submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, + rabh); continue; } unlock_buffer(rabh); @@ -974,7 +975,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) if (!buffer_uptodate(bh)) { err = -EIO; - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 30822b148f3e..5173b98ca036 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -117,7 +117,7 @@ static int gfs2_dentry_delete(const struct dentry *dentry) return 0; ginode = GFS2_I(d_inode(dentry)); - if (!ginode->i_iopen_gh.gh_gl) + if (!gfs2_holder_initialized(&ginode->i_iopen_gh)) return 0; if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags)) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 271d93905bac..fcb59b23f1e3 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1513,7 +1513,7 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, continue; } bh->b_end_io = end_buffer_read_sync; - submit_bh(READA | REQ_META, bh); + submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh); continue; } brelse(bh); @@ -1663,7 +1663,8 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, brelse(bh); if (fail_on_exist) return ERR_PTR(-EEXIST); - inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino); + inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, + GFS2_BLKST_FREE /* ignore */); if (!IS_ERR(inode)) GFS2_I(inode)->i_rahead = rahead; return inode; diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index d5bda8513457..a332f3cd925e 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -137,21 +137,10 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_sbd *sdp = sb->s_fs_info; struct inode *inode; - inode = gfs2_ilookup(sb, inum->no_addr); - if (inode) { - if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { - iput(inode); - return ERR_PTR(-ESTALE); - } - goto out_inode; - } - inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino, GFS2_BLKST_DINODE); if (IS_ERR(inode)) return ERR_CAST(inode); - -out_inode: return d_obtain_alias(inode); } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e0f98e483aec..320e65e61938 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1098,7 +1098,7 @@ static void do_unflock(struct file *file, struct file_lock *fl) mutex_lock(&fp->f_fl_mutex); locks_lock_file_wait(file, fl); - if (fl_gh->gh_gl) { + if (gfs2_holder_initialized(fl_gh)) { gfs2_glock_dq(fl_gh); gfs2_holder_uninit(fl_gh); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 706fd9352f36..3a90b2b5b9bb 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -575,7 +575,6 @@ static void delete_work_func(struct work_struct *work) { struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct gfs2_inode *ip; struct inode *inode; u64 no_addr = gl->gl_name.ln_number; @@ -585,13 +584,7 @@ static void delete_work_func(struct work_struct *work) if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) goto out; - ip = gl->gl_object; - /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ - - if (ip) - inode = gfs2_ilookup(sdp->sd_vfs, no_addr); - else - inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); + inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); if (inode && !IS_ERR(inode)) { d_prune_aliases(inode); iput(inode); @@ -808,7 +801,7 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) { put_pid(gh->gh_owner_pid); gfs2_glock_put(gh->gh_gl); - gh->gh_gl = NULL; + gfs2_holder_mark_uninitialized(gh); gh->gh_ip = 0; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 46ab67fc16da..ab1ef322f7a5 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -247,4 +247,14 @@ extern void gfs2_unregister_debugfs(void); extern const struct lm_lockops gfs2_dlm_ops; +static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh) +{ + gh->gh_gl = NULL; +} + +static inline bool gfs2_holder_initialized(struct gfs2_holder *gh) +{ + return gh->gh_gl; +} + #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 21dc784f66c2..e0621cacf134 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -37,9 +37,35 @@ #include "super.h" #include "glops.h" -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) +static int iget_test(struct inode *inode, void *opaque) { - return ilookup(sb, (unsigned long)no_addr); + u64 no_addr = *(u64 *)opaque; + + return GFS2_I(inode)->i_no_addr == no_addr; +} + +static int iget_set(struct inode *inode, void *opaque) +{ + u64 no_addr = *(u64 *)opaque; + + GFS2_I(inode)->i_no_addr = no_addr; + inode->i_ino = no_addr; + return 0; +} + +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) +{ + struct inode *inode; + +repeat: + inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr); + if (!inode) + return inode; + if (is_bad_inode(inode)) { + iput(inode); + goto repeat; + } + return inode; } /** @@ -78,26 +104,37 @@ static void gfs2_set_iop(struct inode *inode) /** * gfs2_inode_lookup - Lookup an inode * @sb: The super block - * @no_addr: The inode number * @type: The type of the inode + * @no_addr: The inode number + * @no_formal_ino: The inode generation number + * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; + * GFS2_BLKST_FREE do indicate not to verify) + * + * If @type is DT_UNKNOWN, the inode type is fetched from disk. + * + * If @blktype is anything other than GFS2_BLKST_FREE (which is used as a + * placeholder because it doesn't otherwise make sense), the on-disk block type + * is verified to be @blktype. * * Returns: A VFS inode, or an error */ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, - u64 no_addr, u64 no_formal_ino) + u64 no_addr, u64 no_formal_ino, + unsigned int blktype) { struct inode *inode; struct gfs2_inode *ip; struct gfs2_glock *io_gl = NULL; + struct gfs2_holder i_gh; int error; - inode = iget_locked(sb, (unsigned long)no_addr); + gfs2_holder_mark_uninitialized(&i_gh); + inode = gfs2_iget(sb, no_addr); if (!inode) return ERR_PTR(-ENOMEM); ip = GFS2_I(inode); - ip->i_no_addr = no_addr; if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -112,10 +149,29 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail_put; + if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { + /* + * The GL_SKIP flag indicates to skip reading the inode + * block. We read the inode with gfs2_inode_refresh + * after possibly checking the block type. + */ + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, + GL_SKIP, &i_gh); + if (error) + goto fail_put; + + if (blktype != GFS2_BLKST_FREE) { + error = gfs2_check_blk_type(sdp, no_addr, + blktype); + if (error) + goto fail_put; + } + } + set_bit(GIF_INVALID, &ip->i_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) - goto fail_iopen; + goto fail_put; ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); @@ -134,6 +190,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, unlock_new_inode(inode); } + if (gfs2_holder_initialized(&i_gh)) + gfs2_glock_dq_uninit(&i_gh); return inode; fail_refresh: @@ -141,10 +199,11 @@ fail_refresh: ip->i_iopen_gh.gh_gl->gl_object = NULL; gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); -fail_iopen: +fail_put: if (io_gl) gfs2_glock_put(io_gl); -fail_put: + if (gfs2_holder_initialized(&i_gh)) + gfs2_glock_dq_uninit(&i_gh); ip->i_gl->gl_object = NULL; fail: iget_failed(inode); @@ -155,23 +214,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype) { struct super_block *sb = sdp->sd_vfs; - struct gfs2_holder i_gh; - struct inode *inode = NULL; + struct inode *inode; int error; - /* Must not read in block until block type is verified */ - error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, - LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); - if (error) - return ERR_PTR(error); - - error = gfs2_check_blk_type(sdp, no_addr, blktype); - if (error) - goto fail; - - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype); if (IS_ERR(inode)) - goto fail; + return inode; /* Two extra checks for NFS only */ if (no_formal_ino) { @@ -182,16 +230,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, error = -EIO; if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) goto fail_iput; - - error = 0; } + return inode; -fail: - gfs2_glock_dq_uninit(&i_gh); - return error ? ERR_PTR(error) : inode; fail_iput: iput(inode); - goto fail; + return ERR_PTR(error); } @@ -236,8 +280,8 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct gfs2_holder d_gh; int error = 0; struct inode *inode = NULL; - int unlock = 0; + gfs2_holder_mark_uninitialized(&d_gh); if (!name->len || name->len > GFS2_FNAMESIZE) return ERR_PTR(-ENAMETOOLONG); @@ -252,7 +296,6 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) return ERR_PTR(error); - unlock = 1; } if (!is_root) { @@ -265,7 +308,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, if (IS_ERR(inode)) error = PTR_ERR(inode); out: - if (unlock) + if (gfs2_holder_initialized(&d_gh)) gfs2_glock_dq_uninit(&d_gh); if (error == -ENOENT) return NULL; @@ -1189,7 +1232,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *d; bool excl = !!(flags & O_EXCL); - if (!d_unhashed(dentry)) + if (!d_in_lookup(dentry)) goto skip_lookup; d = __gfs2_lookup(dir, dentry, file, opened); @@ -1309,7 +1352,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_inode *ip = GFS2_I(d_inode(odentry)); struct gfs2_inode *nip = NULL; struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; + struct gfs2_holder ghs[5], r_gh; struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; @@ -1317,6 +1360,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, unsigned int x; int error; + gfs2_holder_mark_uninitialized(&r_gh); if (d_really_is_positive(ndentry)) { nip = GFS2_I(d_inode(ndentry)); if (ip == nip) @@ -1506,7 +1550,7 @@ out_gunlock: gfs2_holder_uninit(ghs + x); } out_gunlock_r: - if (r_gh.gh_gl) + if (gfs2_holder_initialized(&r_gh)) gfs2_glock_dq_uninit(&r_gh); out: return error; @@ -1532,13 +1576,14 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry, struct gfs2_inode *oip = GFS2_I(odentry->d_inode); struct gfs2_inode *nip = GFS2_I(ndentry->d_inode); struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; + struct gfs2_holder ghs[5], r_gh; unsigned int num_gh; unsigned int x; umode_t old_mode = oip->i_inode.i_mode; umode_t new_mode = nip->i_inode.i_mode; int error; + gfs2_holder_mark_uninitialized(&r_gh); error = gfs2_rindex_update(sdp); if (error) return error; @@ -1646,7 +1691,7 @@ out_gunlock: gfs2_holder_uninit(ghs + x); } out_gunlock_r: - if (r_gh.gh_gl) + if (gfs2_holder_initialized(&r_gh)) gfs2_glock_dq_uninit(&r_gh); out: return error; @@ -1743,9 +1788,8 @@ int gfs2_permission(struct inode *inode, int mask) struct gfs2_inode *ip; struct gfs2_holder i_gh; int error; - int unlock = 0; - + gfs2_holder_mark_uninitialized(&i_gh); ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { if (mask & MAY_NOT_BLOCK) @@ -1753,14 +1797,13 @@ int gfs2_permission(struct inode *inode, int mask) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; - unlock = 1; } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) error = -EACCES; else error = generic_permission(inode, mask); - if (unlock) + if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); return error; @@ -1932,17 +1975,16 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; - int unlock = 0; + gfs2_holder_mark_uninitialized(&gh); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); if (error) return error; - unlock = 1; } generic_fillattr(inode, stat); - if (unlock) + if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); return 0; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e1af0d4aa308..7710dfd3af35 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -94,11 +94,11 @@ err: } extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino); + u64 no_addr, u64 no_formal_ino, + unsigned int blktype); extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype); -extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); extern int gfs2_inode_refresh(struct gfs2_inode *ip); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0ff028c15199..e58ccef09c91 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int rw = WRITE_FLUSH_FUA | REQ_META; + int op_flags = WRITE_FLUSH_FUA | REQ_META; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); @@ -682,12 +682,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); log_flush_wait(sdp); - rw = WRITE_SYNC | REQ_META | REQ_PRIO; + op_flags = WRITE_SYNC | REQ_META | REQ_PRIO; } sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); gfs2_log_write_page(sdp, page); - gfs2_log_flush_bio(sdp, rw); + gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags); log_flush_wait(sdp); if (sdp->sd_log_tail != tail) @@ -738,7 +738,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, gfs2_ordered_write(sdp); lops_before_commit(sdp, tr); - gfs2_log_flush_bio(sdp, WRITE); + gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0); if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index d5369a109781..49d5a1b61b06 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -230,17 +230,19 @@ static void gfs2_end_log_write(struct bio *bio) /** * gfs2_log_flush_bio - Submit any pending log bio * @sdp: The superblock - * @rw: The rw flags + * @op: REQ_OP + * @op_flags: rq_flag_bits * * Submit any pending part-built or full bio to the block device. If * there is no pending bio, then this is a no-op. */ -void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw) +void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags) { if (sdp->sd_log_bio) { atomic_inc(&sdp->sd_log_in_flight); - submit_bio(rw, sdp->sd_log_bio); + bio_set_op_attrs(sdp->sd_log_bio, op, op_flags); + submit_bio(sdp->sd_log_bio); sdp->sd_log_bio = NULL; } } @@ -299,7 +301,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno) nblk >>= sdp->sd_fsb2bb_shift; if (blkno == nblk) return bio; - gfs2_log_flush_bio(sdp, WRITE); + gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0); } return gfs2_log_alloc_bio(sdp, blkno); @@ -328,7 +330,7 @@ static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page, bio = gfs2_log_get_bio(sdp, blkno); ret = bio_add_page(bio, page, size, offset); if (ret == 0) { - gfs2_log_flush_bio(sdp, WRITE); + gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0); bio = gfs2_log_alloc_bio(sdp, blkno); ret = bio_add_page(bio, page, size, offset); WARN_ON(ret == 0); @@ -535,9 +537,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) return 0; - gfs2_replay_incr_blk(sdp, &start); + gfs2_replay_incr_blk(jd, &start); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) { blkno = be64_to_cpu(*ptr++); jd->jd_found_blocks++; @@ -693,7 +695,7 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, offset = sizeof(struct gfs2_log_descriptor); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) { error = gfs2_replay_read_block(jd, start, &bh); if (error) return error; @@ -762,7 +764,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, __be64 *ptr, int pass) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_glock *gl = ip->i_gl; unsigned int blks = be32_to_cpu(ld->ld_data1); struct buffer_head *bh_log, *bh_ip; @@ -773,8 +774,8 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) return 0; - gfs2_replay_incr_blk(sdp, &start); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + gfs2_replay_incr_blk(jd, &start); + for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) { blkno = be64_to_cpu(*ptr++); esc = be64_to_cpu(*ptr++); diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index a65a7ba32ffd..e529f536c117 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -27,7 +27,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops; extern const struct gfs2_log_operations *gfs2_log_ops[]; extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); -extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); +extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index f99f8e94de3f..74fd0139e6c2 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -45,6 +45,7 @@ static void gfs2_init_inode_once(void *foo) memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_hash_cache = NULL; + gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); } static void gfs2_init_glock_once(void *foo) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 8eaadabbc771..950b8be68e41 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,8 +37,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_flags = REQ_META | REQ_PRIO | + (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -79,7 +79,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(write_op, bh); + submit_bh(REQ_OP_WRITE, write_flags, bh); nr_underway++; } bh = next; @@ -213,7 +213,8 @@ static void gfs2_meta_read_endio(struct bio *bio) * Submit several consecutive buffer head I/O requests as a single bio I/O * request. (See submit_bh_wbc.) */ -static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num) +static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], + int num) { struct buffer_head *bh = bhs[0]; struct bio *bio; @@ -230,7 +231,8 @@ static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num) bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); } bio->bi_end_io = gfs2_meta_read_endio; - submit_bio(rw, bio); + bio_set_op_attrs(bio, op, op_flags); + submit_bio(bio); } /** @@ -280,7 +282,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } } - gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num); + gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; @@ -448,7 +450,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh); dblock++; extlen--; @@ -457,7 +459,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(READA | REQ_META, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 45463600fb81..ef1e1822977f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -246,7 +246,8 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - submit_bio(READ_SYNC | REQ_META, bio); + bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META); + submit_bio(bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { @@ -454,7 +455,8 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, struct dentry *dentry; struct inode *inode; - inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, + GFS2_BLKST_FREE /* ignore */); if (IS_ERR(inode)) { fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); return PTR_ERR(inode); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ce7d69a2fdc0..77930ca25303 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, if (PageUptodate(page)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; @@ -883,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); - ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); + ghs = kmalloc(num_qd * sizeof(struct gfs2_holder), GFP_NOFS); if (!ghs) return -ENOMEM; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 1b645773c98e..113b6095a58d 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -338,7 +338,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, struct gfs2_log_header_host lh; error = get_log_header(jd, start, &lh); if (!error) { - gfs2_replay_incr_blk(sdp, &start); + gfs2_replay_incr_blk(jd, &start); brelse(bh); continue; } @@ -360,7 +360,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, } while (length--) - gfs2_replay_incr_blk(sdp, &start); + gfs2_replay_incr_blk(jd, &start); brelse(bh); } @@ -390,7 +390,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; lblock = head->lh_blkno; - gfs2_replay_incr_blk(sdp, &lblock); + gfs2_replay_incr_blk(jd, &lblock); bh_map.b_size = 1 << ip->i_inode.i_blkbits; error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); if (error) diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 6142836cce96..11fdfab4bf99 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -14,9 +14,9 @@ extern struct workqueue_struct *gfs_recovery_wq; -static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) +static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, unsigned int *blk) { - if (++*blk == sdp->sd_jdesc->jd_blocks) + if (++*blk == jd->jd_blocks) *blk = 0; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5bd216901e89..86ccc0159393 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -658,6 +658,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) if (rgd) { spin_lock(&rgd->rd_rsspin); __rs_deltree(rs); + BUG_ON(rs->rs_free); spin_unlock(&rgd->rd_rsspin); } } @@ -671,10 +672,8 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) { + if ((wcount == NULL) || (atomic_read(wcount) <= 1)) gfs2_rs_deltree(&ip->i_res); - BUG_ON(ip->i_res.rs_free); - } up_write(&ip->i_rw_mutex); gfs2_qa_delete(ip, wcount); } @@ -722,6 +721,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) gfs2_free_clones(rgd); kfree(rgd->rd_bits); + rgd->rd_bits = NULL; return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } @@ -916,9 +916,6 @@ static int read_rindex_entry(struct gfs2_inode *ip) if (error) goto fail; - rgd->rd_gl->gl_object = rgd; - rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; - rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) @@ -926,14 +923,20 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_lock(&sdp->sd_rindex_spin); error = rgd_insert(rgd); spin_unlock(&sdp->sd_rindex_spin); - if (!error) + if (!error) { + rgd->rd_gl->gl_object = rgd; + rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; + rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + + rgd->rd_length) * bsize) - 1; return 0; + } error = 0; /* someone else read in the rgrp; free it and ignore it */ gfs2_glock_put(rgd->rd_gl); fail: kfree(rgd->rd_bits); + rgd->rd_bits = NULL; kmem_cache_free(gfs2_rgrpd_cachep, rgd); return error; } @@ -2096,7 +2099,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = &ip->i_res; - if (rs->rs_rgd_gh.gh_gl) + if (gfs2_holder_initialized(&rs->rs_rgd_gh)) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); } @@ -2596,7 +2599,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) { unsigned int x; - rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), + rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder), GFP_NOFS | __GFP_NOFAIL); for (x = 0; x < rlist->rl_rgrps; x++) gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9b2ff353e45f..3a7e60bb39f8 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -855,7 +855,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); - if (freeze_gh.gh_gl) + if (gfs2_holder_initialized(&freeze_gh)) gfs2_glock_dq_uninit(&freeze_gh); gfs2_quota_cleanup(sdp); @@ -1033,7 +1033,7 @@ static int gfs2_unfreeze(struct super_block *sb) mutex_lock(&sdp->sd_freeze_mutex); if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || - sdp->sd_freeze_gh.gh_gl == NULL) { + !gfs2_holder_initialized(&sdp->sd_freeze_gh)) { mutex_unlock(&sdp->sd_freeze_mutex); return 0; } @@ -1084,9 +1084,11 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host int error = 0, err; memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); - gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); + gha = kmalloc(slots * sizeof(struct gfs2_holder), GFP_KERNEL); if (!gha) return -ENOMEM; + for (x = 0; x < slots; x++) + gfs2_holder_mark_uninitialized(gha + x); rgd_next = gfs2_rgrpd_get_first(sdp); @@ -1096,7 +1098,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host for (x = 0; x < slots; x++) { gh = gha + x; - if (gh->gh_gl && gfs2_glock_poll(gh)) { + if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) { err = gfs2_glock_wait(gh); if (err) { gfs2_holder_uninit(gh); @@ -1109,7 +1111,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host } } - if (gh->gh_gl) + if (gfs2_holder_initialized(gh)) done = 0; else if (rgd_next && !error) { error = gfs2_glock_nq_init(rgd_next->rd_gl, @@ -1304,9 +1306,11 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) { + if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && + inode->i_nlink && + gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); } return generic_drop_inode(inode); @@ -1551,7 +1555,7 @@ static void gfs2_evict_inode(struct inode *inode) goto out_truncate; } - if (ip->i_iopen_gh.gh_gl && + if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); @@ -1610,7 +1614,7 @@ out_unlock: if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (ip->i_iopen_gh.gh_gl) { + if (gfs2_holder_initialized(&ip->i_iopen_gh)) { if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); @@ -1632,7 +1636,7 @@ out: gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) { + if (gfs2_holder_initialized(&ip->i_iopen_gh)) { ip->i_iopen_gh.gh_gl->gl_object = NULL; ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index fdc3446d934a..047245bd2cd6 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -526,7 +526,7 @@ int hfsplus_compare_dentry(const struct dentry *parent, /* wrapper.c */ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, - void **data, int rw); + void **data, int op, int op_flags); int hfsplus_read_wrapper(struct super_block *sb); /* time macros */ diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index eb355d81e279..63164ebc52fa 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -112,7 +112,8 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf, if ((u8 *)pm - (u8 *)buf >= buf_size) { res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK + i, - buf, (void **)&pm, READ); + buf, (void **)&pm, REQ_OP_READ, + 0); if (res) return res; } @@ -136,7 +137,7 @@ int hfs_part_find(struct super_block *sb, return -ENOMEM; res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK, - buf, &data, READ); + buf, &data, REQ_OP_READ, 0); if (res) goto out; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 755bf30ba1ce..11854dd84572 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -220,7 +220,8 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr_buf, NULL, WRITE_SYNC); + sbi->s_vhdr_buf, NULL, REQ_OP_WRITE, + WRITE_SYNC); if (!error) error = error2; if (!write_backup) @@ -228,7 +229,8 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, - sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC); + sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE, + WRITE_SYNC); if (!error) error2 = error; out: diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index cc6235671437..ebb85e5f6549 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -30,7 +30,8 @@ struct hfsplus_wd { * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes * @buf: buffer for I/O * @data: output pointer for location of requested data - * @rw: direction of I/O + * @op: direction of I/O + * @op_flags: request op flags * * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads @@ -44,7 +45,7 @@ struct hfsplus_wd { * will work correctly. */ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int rw) + void *buf, void **data, int op, int op_flags) { struct bio *bio; int ret = 0; @@ -65,8 +66,9 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, bio = bio_alloc(GFP_NOIO, 1); bio->bi_iter.bi_sector = sector; bio->bi_bdev = sb->s_bdev; + bio_set_op_attrs(bio, op, op_flags); - if (!(rw & WRITE) && data) + if (op != WRITE && data) *data = (u8 *)buf + offset; while (io_size > 0) { @@ -83,7 +85,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, buf = (u8 *)buf + len; } - ret = submit_bio_wait(rw, bio); + ret = submit_bio_wait(bio); out: bio_put(bio); return ret < 0 ? ret : 0; @@ -181,7 +183,7 @@ int hfsplus_read_wrapper(struct super_block *sb) reread: error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, - READ); + REQ_OP_READ, 0); if (error) goto out_free_backup_vhdr; @@ -213,7 +215,8 @@ reread: error = hfsplus_submit_bio(sb, part_start + part_size - 2, sbi->s_backup_vhdr_buf, - (void **)&sbi->s_backup_vhdr, READ); + (void **)&sbi->s_backup_vhdr, REQ_OP_READ, + 0); if (error) goto out_free_backup_vhdr; diff --git a/fs/internal.h b/fs/internal.h index b71deeecea17..f57ced528cde 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool); extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); +extern struct dentry *d_alloc_cursor(struct dentry *); /* * read_write.c diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 2e4e834d1a98..2ce5b75ee9a5 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -81,7 +81,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, blocknum = block_start >> bufshift; memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *)); haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks); - ll_rw_block(READ, haveblocks, bhs); + ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs); curbh = 0; curpage = 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 78313adb3c95..5bb565f9989c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -155,9 +155,9 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh); + ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh); else - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); *cbh = bh; return ret; @@ -718,7 +718,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE_SYNC, bh); + submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); } cond_resched(); stats.run.rs_blocks_logged += bufs; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fc1d7a39b082..46261a6f902d 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1351,15 +1351,15 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -static int jbd2_write_superblock(journal_t *journal, int write_op) +static int jbd2_write_superblock(journal_t *journal, int write_flags) { struct buffer_head *bh = journal->j_sb_buffer; journal_superblock_t *sb = journal->j_superblock; int ret; - trace_jbd2_write_superblock(journal, write_op); + trace_jbd2_write_superblock(journal, write_flags); if (!(journal->j_flags & JBD2_BARRIER)) - write_op &= ~(REQ_FUA | REQ_FLUSH); + write_flags &= ~(REQ_FUA | REQ_PREFLUSH); lock_buffer(bh); if (buffer_write_io_error(bh)) { /* @@ -1379,7 +1379,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_op) jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(write_op, bh); + ret = submit_bh(REQ_OP_WRITE, write_flags, bh); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); @@ -1503,7 +1503,7 @@ static int journal_get_superblock(journal_t *journal) J_ASSERT(bh != NULL); if (!buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { printk(KERN_ERR @@ -2334,18 +2334,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) BUG_ON(size & (size-1)); /* Must be a power of 2 */ - flags |= __GFP_REPEAT; - if (size == PAGE_SIZE) - ptr = (void *)__get_free_pages(flags, 0); - else if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - ptr = (void *)__get_free_pages(flags, order); - else - ptr = vmalloc(size); - } else + if (size < PAGE_SIZE) ptr = kmem_cache_alloc(get_slab(size), flags); + else + ptr = (void *)__get_free_pages(flags, get_order(size)); /* Check alignment; SLUB has gotten this wrong in the past, * and this can lead to user data corruption! */ @@ -2356,20 +2348,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) void jbd2_free(void *ptr, size_t size) { - if (size == PAGE_SIZE) { - free_pages((unsigned long)ptr, 0); - return; - } - if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - free_pages((unsigned long)ptr, order); - else - vfree(ptr); - return; - } - kmem_cache_free(get_slab(size), ptr); + if (size < PAGE_SIZE) + kmem_cache_free(get_slab(size), ptr); + else + free_pages((unsigned long)ptr, get_order(size)); }; /* diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 805bc6bcd8ab..02dd3360cb20 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -104,7 +104,7 @@ static int do_readahead(journal_t *journal, unsigned int start) if (!buffer_uptodate(bh) && !buffer_locked(bh)) { bufs[nbufs++] = bh; if (nbufs == MAXBUF) { - ll_rw_block(READ, nbufs, bufs); + ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); journal_brelse_array(bufs, nbufs); nbufs = 0; } @@ -113,7 +113,7 @@ static int do_readahead(journal_t *journal, unsigned int start) } if (nbufs) - ll_rw_block(READ, nbufs, bufs); + ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); err = 0; failed: diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 63759d723920..a74752146ec9 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2002,12 +2002,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; + bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); /*check if journaling to disk has been disabled*/ if (log->no_integrity) { bio->bi_iter.bi_size = 0; lbmIODone(bio); } else { - submit_bio(READ_SYNC, bio); + submit_bio(bio); } wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); @@ -2145,13 +2146,14 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); /* check if journaling to disk has been disabled */ if (log->no_integrity) { bio->bi_iter.bi_size = 0; lbmIODone(bio); } else { - submit_bio(WRITE_SYNC, bio); + submit_bio(bio); INCREMENT(lmStat.submitted); } } diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index b60e015cc757..e7fa9e513040 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -411,7 +411,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) inc_io(page); if (!bio->bi_iter.bi_size) goto dump_bio; - submit_bio(WRITE, bio); + submit_bio(bio); nr_underway++; bio = NULL; } else @@ -434,6 +434,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_write_end_io; bio->bi_private = page; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); /* Don't call bio_add_page yet, we may add to this vec */ bio_offset = offset; @@ -448,7 +449,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) if (!bio->bi_iter.bi_size) goto dump_bio; - submit_bio(WRITE, bio); + submit_bio(bio); nr_underway++; } if (redirty) @@ -506,7 +507,7 @@ static int metapage_readpage(struct file *fp, struct page *page) insert_metapage(page, NULL); inc_io(page); if (bio) - submit_bio(READ, bio); + submit_bio(bio); bio = bio_alloc(GFP_NOFS, 1); bio->bi_bdev = inode->i_sb->s_bdev; @@ -514,6 +515,7 @@ static int metapage_readpage(struct file *fp, struct page *page) pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_read_end_io; bio->bi_private = page; + bio_set_op_attrs(bio, REQ_OP_READ, 0); len = xlen << inode->i_blkbits; offset = block_offset << inode->i_blkbits; if (bio_add_page(bio, page, len, offset) < len) @@ -523,7 +525,7 @@ static int metapage_readpage(struct file *fp, struct page *page) block_offset++; } if (bio) - submit_bio(READ, bio); + submit_bio(bio); else unlock_page(page); diff --git a/fs/libfs.c b/fs/libfs.c index 3db2721144c2..74dc8b9e7f53 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { - static struct qstr cursor_name = QSTR_INIT(".", 1); - - file->private_data = d_alloc(file->f_path.dentry, &cursor_name); + file->private_data = d_alloc_cursor(file->f_path.dentry); return file->private_data ? 0 : -ENOMEM; } @@ -86,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file) } EXPORT_SYMBOL(dcache_dir_close); +/* parent is locked at least shared */ +static struct dentry *next_positive(struct dentry *parent, + struct list_head *from, + int count) +{ + unsigned *seq = &parent->d_inode->i_dir_seq, n; + struct dentry *res; + struct list_head *p; + bool skipped; + int i; + +retry: + i = count; + skipped = false; + n = smp_load_acquire(seq) & ~1; + res = NULL; + rcu_read_lock(); + for (p = from->next; p != &parent->d_subdirs; p = p->next) { + struct dentry *d = list_entry(p, struct dentry, d_child); + if (!simple_positive(d)) { + skipped = true; + } else if (!--i) { + res = d; + break; + } + } + rcu_read_unlock(); + if (skipped) { + smp_rmb(); + if (unlikely(*seq != n)) + goto retry; + } + return res; +} + +static void move_cursor(struct dentry *cursor, struct list_head *after) +{ + struct dentry *parent = cursor->d_parent; + unsigned n, *seq = &parent->d_inode->i_dir_seq; + spin_lock(&parent->d_lock); + for (;;) { + n = *seq; + if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) + break; + cpu_relax(); + } + __list_del(cursor->d_child.prev, cursor->d_child.next); + if (after) + list_add(&cursor->d_child, after); + else + list_add_tail(&cursor->d_child, &parent->d_subdirs); + smp_store_release(seq, n + 2); + spin_unlock(&parent->d_lock); +} + loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; @@ -101,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; if (file->f_pos >= 2) { - struct list_head *p; struct dentry *cursor = file->private_data; + struct dentry *to; loff_t n = file->f_pos - 2; - spin_lock(&dentry->d_lock); - /* d_lock not required for cursor */ - list_del(&cursor->d_child); - p = dentry->d_subdirs.next; - while (n && p != &dentry->d_subdirs) { - struct dentry *next; - next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (simple_positive(next)) - n--; - spin_unlock(&next->d_lock); - p = p->next; - } - list_add_tail(&cursor->d_child, p); - spin_unlock(&dentry->d_lock); + inode_lock_shared(dentry->d_inode); + to = next_positive(dentry, &dentry->d_subdirs, n); + move_cursor(cursor, to ? &to->d_child : NULL); + inode_unlock_shared(dentry->d_inode); } } return offset; @@ -142,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p = &cursor->d_child; + struct dentry *next; + bool moved = false; if (!dir_emit_dots(file, ctx)) return 0; - spin_lock(&dentry->d_lock); - if (ctx->pos == 2) - list_move(q, &dentry->d_subdirs); - for (p = q->next; p != &dentry->d_subdirs; p = p->next) { - struct dentry *next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (!simple_positive(next)) { - spin_unlock(&next->d_lock); - continue; - } - - spin_unlock(&next->d_lock); - spin_unlock(&dentry->d_lock); + if (ctx->pos == 2) + p = &dentry->d_subdirs; + while ((next = next_positive(dentry, p, 1)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) - return 0; - spin_lock(&dentry->d_lock); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - /* next is still alive */ - list_move(q, p); - spin_unlock(&next->d_lock); - p = q; + break; + moved = true; + p = &next->d_child; ctx->pos++; } - spin_unlock(&dentry->d_lock); + if (moved) + move_cursor(cursor, p); return 0; } EXPORT_SYMBOL(dcache_readdir); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 154a107cd376..fc4084ef4736 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static void lockd_svc_exit_thread(void) +static void lockd_unregister_notifiers(void) { unregister_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif +} + +static void lockd_svc_exit_thread(void) +{ + lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -462,7 +467,7 @@ int lockd_up(struct net *net) * Note: svc_serv structures have an initial use count of 1, * so we exit through here on both success and failure. */ -err_net: +err_put: svc_destroy(serv); err_create: mutex_unlock(&nlmsvc_mutex); @@ -470,7 +475,9 @@ err_create: err_start: lockd_down_net(serv, net); - goto err_net; +err_net: + lockd_unregister_notifiers(); + goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); diff --git a/fs/locks.c b/fs/locks.c index 7c5f91be9b65..ee1b15f6fc13 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr { struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index cc26f8f215f5..a8329cc47dec 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -14,7 +14,7 @@ #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) -static int sync_request(struct page *page, struct block_device *bdev, int rw) +static int sync_request(struct page *page, struct block_device *bdev, int op) { struct bio bio; struct bio_vec bio_vec; @@ -29,8 +29,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) bio.bi_bdev = bdev; bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9); bio.bi_iter.bi_size = PAGE_SIZE; + bio_set_op_attrs(&bio, op, 0); - return submit_bio_wait(rw, &bio); + return submit_bio_wait(&bio); } static int bdev_readpage(void *_sb, struct page *page) @@ -95,8 +96,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, bio->bi_iter.bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = writeseg_end_io; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); atomic_inc(&super->s_pending_writes); - submit_bio(WRITE, bio); + submit_bio(bio); ofs += i * PAGE_SIZE; index += i; @@ -122,8 +124,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, bio->bi_iter.bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = writeseg_end_io; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); atomic_inc(&super->s_pending_writes); - submit_bio(WRITE, bio); + submit_bio(bio); return 0; } @@ -185,8 +188,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, bio->bi_iter.bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = erase_end_io; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); atomic_inc(&super->s_pending_writes); - submit_bio(WRITE, bio); + submit_bio(bio); ofs += i * PAGE_SIZE; index += i; @@ -206,8 +210,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, bio->bi_iter.bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = erase_end_io; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); atomic_inc(&super->s_pending_writes); - submit_bio(WRITE, bio); + submit_bio(bio); return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index eedc644b78d7..37b28280ad04 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -56,11 +56,12 @@ static void mpage_end_io(struct bio *bio) bio_put(bio); } -static struct bio *mpage_bio_submit(int rw, struct bio *bio) +static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) { bio->bi_end_io = mpage_end_io; - guard_bio_eod(rw, bio); - submit_bio(rw, bio); + bio_set_op_attrs(bio, op, op_flags); + guard_bio_eod(op, bio); + submit_bio(bio); return NULL; } @@ -269,7 +270,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && (*last_block_in_bio != blocks[0] - 1)) - bio = mpage_bio_submit(READ, bio); + bio = mpage_bio_submit(REQ_OP_READ, 0, bio); alloc_new: if (bio == NULL) { @@ -286,7 +287,7 @@ alloc_new: length = first_hole << blkbits; if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(READ, bio); + bio = mpage_bio_submit(REQ_OP_READ, 0, bio); goto alloc_new; } @@ -294,7 +295,7 @@ alloc_new: nblocks = map_bh->b_size >> blkbits; if ((buffer_boundary(map_bh) && relative_block == nblocks) || (first_hole != blocks_per_page)) - bio = mpage_bio_submit(READ, bio); + bio = mpage_bio_submit(REQ_OP_READ, 0, bio); else *last_block_in_bio = blocks[blocks_per_page - 1]; out: @@ -302,7 +303,7 @@ out: confused: if (bio) - bio = mpage_bio_submit(READ, bio); + bio = mpage_bio_submit(REQ_OP_READ, 0, bio); if (!PageUptodate(page)) block_read_full_page(page, get_block); else @@ -384,7 +385,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, } BUG_ON(!list_empty(pages)); if (bio) - mpage_bio_submit(READ, bio); + mpage_bio_submit(REQ_OP_READ, 0, bio); return 0; } EXPORT_SYMBOL(mpage_readpages); @@ -405,7 +406,7 @@ int mpage_readpage(struct page *page, get_block_t get_block) bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, &map_bh, &first_logical_block, get_block, gfp); if (bio) - mpage_bio_submit(READ, bio); + mpage_bio_submit(REQ_OP_READ, 0, bio); return 0; } EXPORT_SYMBOL(mpage_readpage); @@ -486,7 +487,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -595,7 +596,7 @@ page_is_mapped: * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != blocks[0] - 1) - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); alloc_new: if (bio == NULL) { @@ -622,7 +623,7 @@ alloc_new: wbc_account_io(wbc, page, PAGE_SIZE); length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); goto alloc_new; } @@ -632,7 +633,7 @@ alloc_new: set_page_writeback(page); unlock_page(page); if (boundary || (first_unmapped != blocks_per_page)) { - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); @@ -644,7 +645,7 @@ alloc_new: confused: if (bio) - bio = mpage_bio_submit(wr, bio); + bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); if (mpd->use_writepage) { ret = mapping->a_ops->writepage(page, wbc); @@ -701,9 +702,9 @@ mpage_writepages(struct address_space *mapping, ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); if (mpd.bio) { - int wr = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : WRITE); - mpage_bio_submit(wr, mpd.bio); + int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC : 0); + mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } } blk_finish_plug(&plug); @@ -722,9 +723,9 @@ int mpage_writepage(struct page *page, get_block_t get_block, }; int ret = __mpage_writepage(page, wbc, &mpd); if (mpd.bio) { - int wr = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : WRITE); - mpage_bio_submit(wr, mpd.bio); + int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC : 0); + mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } return ret; } diff --git a/fs/namei.c b/fs/namei.c index 4c4f95ac8aa5..70580ab1445c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1416,21 +1416,28 @@ static void follow_mount(struct path *path) } } +static int path_parent_directory(struct path *path) +{ + struct dentry *old = path->dentry; + /* rare case of legitimate dget_parent()... */ + path->dentry = dget_parent(path->dentry); + dput(old); + if (unlikely(!path_connected(path))) + return -ENOENT; + return 0; +} + static int follow_dotdot(struct nameidata *nd) { while(1) { - struct dentry *old = nd->path.dentry; - if (nd->path.dentry == nd->root.dentry && nd->path.mnt == nd->root.mnt) { break; } if (nd->path.dentry != nd->path.mnt->mnt_root) { - /* rare case of legitimate dget_parent()... */ - nd->path.dentry = dget_parent(nd->path.dentry); - dput(old); - if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + int ret = path_parent_directory(&nd->path); + if (ret) + return ret; break; } if (!follow_up(&nd->path)) @@ -2514,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name, } EXPORT_SYMBOL(lookup_one_len_unlocked); +#ifdef CONFIG_UNIX98_PTYS +int path_pts(struct path *path) +{ + /* Find something mounted on "pts" in the same directory as + * the input path. + */ + struct dentry *child, *parent; + struct qstr this; + int ret; + + ret = path_parent_directory(path); + if (ret) + return ret; + + parent = path->dentry; + this.name = "pts"; + this.len = 3; + child = d_hash_and_lookup(parent, &this); + if (!child) + return -ENOENT; + + path->dentry = child; + dput(parent); + follow_mount(path); + return 0; +} +#endif + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { @@ -2995,9 +3030,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; + if (unlikely(d_is_negative(dentry))) { + error = -ENOENT; + } else { + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; + } } } dput(dentry); @@ -3166,9 +3205,7 @@ static int do_last(struct nameidata *nd, int acc_mode = op->acc_mode; unsigned seq; struct inode *inode; - struct path save_parent = { .dentry = NULL, .mnt = NULL }; struct path path; - bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -3211,7 +3248,6 @@ static int do_last(struct nameidata *nd, return -EISDIR; } -retry_lookup: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); if (!error) @@ -3263,6 +3299,10 @@ retry_lookup: got_write = false; } + error = follow_managed(&path, nd); + if (unlikely(error < 0)) + return error; + if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; @@ -3278,10 +3318,6 @@ retry_lookup: return -EEXIST; } - error = follow_managed(&path, nd); - if (unlikely(error < 0)) - return error; - seq = 0; /* out of RCU mode, so the value doesn't matter */ inode = d_backing_inode(path.dentry); finish_lookup: @@ -3292,23 +3328,14 @@ finish_lookup: if (unlikely(error)) return error; - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { - path_to_nameidata(&path, nd); - } else { - save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path.mnt); - nd->path.dentry = path.dentry; - - } + path_to_nameidata(&path, nd); nd->inode = inode; nd->seq = seq; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: error = complete_walk(nd); - if (error) { - path_put(&save_parent); + if (error) return error; - } audit_inode(nd->name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) @@ -3331,13 +3358,9 @@ finish_open_created: goto out; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); - if (!error) { - *opened |= FILE_OPENED; - } else { - if (error == -EOPENSTALE) - goto stale_open; + if (error) goto out; - } + *opened |= FILE_OPENED; opened: error = open_check_o_direct(file); if (!error) @@ -3353,26 +3376,7 @@ out: } if (got_write) mnt_drop_write(nd->path.mnt); - path_put(&save_parent); return error; - -stale_open: - /* If no saved parent or already retried then can't retry */ - if (!save_parent.dentry || retried) - goto out; - - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (got_write) { - mnt_drop_write(nd->path.mnt); - got_write = false; - } - retried = true; - goto retry_lookup; } static int do_tmpfile(struct nameidata *nd, unsigned flags, diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691b4355..419f746d851d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry) goto out_unlock; lock_mount_hash(); + event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { @@ -2409,8 +2410,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } @@ -3245,6 +3248,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC) mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC); + /* Don't miss readonly hidden in the superblock flags */ + if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + mnt_flags |= MNT_LOCK_READONLY; + /* Verify the mount flags are equal to or more permissive * than the proposed new mount. */ @@ -3271,7 +3278,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode)) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 17a42e4eb872..f55a4e756047 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -102,14 +102,15 @@ static inline void put_parallel(struct parallel_io *p) } static struct bio * -bl_submit_bio(int rw, struct bio *bio) +bl_submit_bio(struct bio *bio) { if (bio) { get_parallel(bio->bi_private); dprintk("%s submitting %s bio %u@%llu\n", __func__, - rw == READ ? "read" : "write", bio->bi_iter.bi_size, + bio_op(bio) == READ ? "read" : "write", + bio->bi_iter.bi_size, (unsigned long long)bio->bi_iter.bi_sector); - submit_bio(rw, bio); + submit_bio(bio); } return NULL; } @@ -158,7 +159,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, if (disk_addr < map->start || disk_addr >= map->start + map->len) { if (!dev->map(dev, disk_addr, map)) return ERR_PTR(-EIO); - bio = bl_submit_bio(rw, bio); + bio = bl_submit_bio(bio); } disk_addr += map->disk_offset; disk_addr -= map->start; @@ -174,9 +175,10 @@ retry: disk_addr >> SECTOR_SHIFT, end_io, par); if (!bio) return ERR_PTR(-ENOMEM); + bio_set_op_attrs(bio, rw, 0); } if (bio_add_page(bio, page, *len, offset) < *len) { - bio = bl_submit_bio(rw, bio); + bio = bl_submit_bio(bio); goto retry; } return bio; @@ -252,7 +254,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) for (i = pg_index; i < header->page_array.npages; i++) { if (extent_length <= 0) { /* We've used up the previous extent */ - bio = bl_submit_bio(READ, bio); + bio = bl_submit_bio(bio); /* Get the next one */ if (!ext_tree_lookup(bl, isect, &be, false)) { @@ -273,7 +275,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) } if (is_hole(&be)) { - bio = bl_submit_bio(READ, bio); + bio = bl_submit_bio(bio); /* Fill hole w/ zeroes w/o accessing device */ dprintk("%s Zeroing page for hole\n", __func__); zero_user_segment(pages[i], pg_offset, pg_len); @@ -306,7 +308,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) header->res.count = (isect << SECTOR_SHIFT) - header->args.offset; } out: - bl_submit_bio(READ, bio); + bl_submit_bio(bio); blk_finish_plug(&plug); put_parallel(par); return PNFS_ATTEMPTED; @@ -398,7 +400,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) for (i = pg_index; i < header->page_array.npages; i++) { if (extent_length <= 0) { /* We've used up the previous extent */ - bio = bl_submit_bio(WRITE, bio); + bio = bl_submit_bio(bio); /* Get the next one */ if (!ext_tree_lookup(bl, isect, &be, true)) { header->pnfs_error = -EINVAL; @@ -427,7 +429,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) header->res.count = header->args.count; out: - bl_submit_bio(WRITE, bio); + bl_submit_bio(bio); blk_finish_plug(&plug); put_parallel(par); return PNFS_ATTEMPTED; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index aaf7bd0cbae2..19d93d0cd400 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc, static int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { + struct inode *inode; struct nfs_inode *nfsi; if (d_really_is_negative(dentry)) return 0; - nfsi = NFS_I(d_inode(dentry)); + inode = d_inode(dentry); + if (is_bad_inode(inode) || NFS_STALE(inode)) + return 0; + + nfsi = NFS_I(inode); if (entry->fattr->fileid == nfsi->fileid) return 1; if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) @@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; - struct dentry *parent; struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; @@ -1393,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (IS_ERR(label)) goto out; - parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); @@ -1482,11 +1485,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, umode_t mode, int *opened) { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; unsigned int lookup_flags = 0; + bool switched = false; int err; /* Expect a negative dentry */ @@ -1501,7 +1506,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { - if (!d_unhashed(dentry)) { + if (!d_in_lookup(dentry)) { /* * Hashed negative dentry with O_DIRECTORY: dentry was * revalidated and is fine, no need to perform lookup @@ -1525,6 +1530,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, attr.ia_size = 0; } + if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) { + d_drop(dentry); + switched = true; + dentry = d_alloc_parallel(dentry->d_parent, + &dentry->d_name, &wq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (unlikely(!d_in_lookup(dentry))) + return finish_no_open(file, dentry); + } + ctx = create_nfs_open_context(dentry, open_flags); err = PTR_ERR(ctx); if (IS_ERR(ctx)) @@ -1536,9 +1552,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); + d_drop(dentry); switch (err) { case -ENOENT: - d_drop(dentry); d_add(dentry, NULL); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; @@ -1560,14 +1576,23 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); out: + if (unlikely(switched)) { + d_lookup_done(dentry); + dput(dentry); + } return err; no_open: res = nfs_lookup(dir, dentry, lookup_flags); - err = PTR_ERR(res); + if (switched) { + d_lookup_done(dentry); + if (!res) + res = dentry; + else + dput(dentry); + } if (IS_ERR(res)) - goto out; - + return PTR_ERR(res); return finish_no_open(file, res); } EXPORT_SYMBOL_GPL(nfs_atomic_open); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 979b3c4dee6a..c7326c2af2c3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) result = wait_for_completion_killable(&dreq->completion); + if (!result) { + result = dreq->count; + WARN_ON_ONCE(dreq->count < 0); + } if (!result) result = dreq->error; - if (!result) - result = dreq->count; out: return (ssize_t) result; @@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) if (dreq->iocb) { long res = (long) dreq->error; - if (!res) + if (dreq->count != 0) { res = (long) dreq->count; + WARN_ON_ONCE(dreq->count < 0); + } dreq->iocb->ki_complete(dreq->iocb, res, 0); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52e7d6869e3b..dda689d7a8a7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque) struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); + inode->i_mode = fattr->mode; nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de97567795a5..ff416d0e24bc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) call_close |= is_wronly; else if (is_wronly) calldata->arg.fmode |= FMODE_WRITE; + if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) + call_close |= is_rdwr; } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (calldata->arg.fmode == 0) - call_close |= is_rdwr; - if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -7924,8 +7923,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, break; } lo = NFS_I(inode)->layout; - if (lo && nfs4_stateid_match(&lgp->args.stateid, - &lo->plh_stateid)) { + if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) && + nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { LIST_HEAD(head); /* @@ -7936,10 +7935,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); + status = -EAGAIN; + goto out; } else spin_unlock(&inode->i_lock); - status = -EAGAIN; - goto out; } status = nfs4_handle_exception(server, status, exception); @@ -8036,7 +8035,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) .flags = RPC_TASK_ASYNC, }; struct pnfs_layout_segment *lseg = NULL; - struct nfs4_exception exception = { .timeout = *timeout }; + struct nfs4_exception exception = { + .inode = inode, + .timeout = *timeout, + }; int status = 0; dprintk("--> %s\n", __func__); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9679f4749364..834b875900d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1488,9 +1488,9 @@ restart: } spin_unlock(&state->state_lock); } - nfs4_put_open_state(state); clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); + nfs4_put_open_state(state); spin_lock(&sp->so_lock); goto restart; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0c7e0d45a4de..0fbe734cc38c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) + if (list_empty(&lo->plh_segs)) { + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } @@ -1290,6 +1292,7 @@ alloc_init_layout_hdr(struct inode *ino, INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->cred); + lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; return lo; } @@ -1297,6 +1300,8 @@ static struct pnfs_layout_hdr * pnfs_find_alloc_layout(struct inode *ino, struct nfs_open_context *ctx, gfp_t gfp_flags) + __releases(&ino->i_lock) + __acquires(&ino->i_lock) { struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *new = NULL; @@ -1565,8 +1570,7 @@ lookup_again: * stateid, or it has been invalidated, then we must use the open * stateid. */ - if (lo->plh_stateid.seqid == 0 || - test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { /* * The first layoutget for the file. Need to serialize per diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 0dfc476da3e1..b38e3c0dc790 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, } /* Helper function for pnfs_generic_commit_pagelist to catch an empty - * page list. This can happen when two commits race. */ + * page list. This can happen when two commits race. + * + * This must be called instead of nfs_init_commit - call one or the other, but + * not both! + */ static bool pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, struct nfs_commit_data *data, @@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages, if (list_empty(pages)) { if (atomic_dec_and_test(&cinfo->mds->rpcs_out)) wake_up_atomic_t(&cinfo->mds->rpcs_out); - nfs_commitdata_release(data); + /* don't call nfs_commitdata_release - it tries to put + * the open_context which is not acquired until nfs_init_commit + * which has not been called on @data */ + WARN_ON_ONCE(data->context); + nfs_commit_free(data); return true; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6776d7a7839e..572e5b3b06f1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page) nfs_list_remove_request(new); nfs_readpage_release(new); error = desc->pgio->pg_error; - goto out_unlock; + goto out; } return 0; out_error: error = PTR_ERR(new); -out_unlock: unlock_page(page); +out: return error; } diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e55b5242614d..31f3df193bdb 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -290,7 +290,7 @@ out_free_buf: return error; } -#define NFSD_MDS_PR_KEY 0x0100000000000000 +#define NFSD_MDS_PR_KEY 0x0100000000000000ULL /* * We use the client ID as a unique key for the reservations. diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1580ea6fd64d..d08cd88155c7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); if (error) - goto out_drop_write; + goto out_drop_lock; + + fh_unlock(fh); fh_drop_write(fh); @@ -131,7 +130,8 @@ out: posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 01df4cd7c753..0c890347cde3 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6adabd6049b7..71292a0d6f09 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode = d_inode(dentry); - if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) - return nfserr_attrnotsupp; - if (S_ISDIR(inode->i_mode)) flags = NFS4_ACL_DIR; @@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_error < 0) goto out_nfserr; - host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + fh_lock(fhp); + + host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl); if (host_error < 0) - goto out_release; + goto out_drop_lock; if (S_ISDIR(inode->i_mode)) { - host_error = inode->i_op->set_acl(inode, dpacl, - ACL_TYPE_DEFAULT); + host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl); } -out_release: +out_drop_lock: + fh_unlock(fhp); + posix_acl_release(pacl); posix_acl_release(dpacl); out_nfserr: diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7389cb1d7409..04c68d900324 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } -static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) -{ - struct rpc_xprt *xprt; - - if (args->protocol != XPRT_TRANSPORT_BC_TCP) - return rpc_create(args); - - xprt = args->bc_xprt->xpt_bc_xprt; - if (xprt) { - xprt_get(xprt); - return rpc_create_xprt(args, xprt); - } - - return rpc_create(args); -} - static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { int maxtime = max_cb_time(clp->net); @@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = create_backchannel_client(&args); + client = rpc_create(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f5f82e145018..70d0b9b33031 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3480,12 +3480,17 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, } static struct nfs4_ol_stateid * -init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, - struct nfsd4_open *open) +init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; + struct nfs4_ol_stateid *stp; + + stp = open->op_stp; + /* We are moving these outside of the spinlocks to avoid the warnings */ + mutex_init(&stp->st_mutex); + mutex_lock(&stp->st_mutex); spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3493,6 +3498,8 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; + + open->op_stp = NULL; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3502,14 +3509,19 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - init_rwsem(&stp->st_rwsem); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); - return retstp; + if (retstp) { + mutex_lock(&retstp->st_mutex); + /* To keep mutex tracking happy */ + mutex_unlock(&stp->st_mutex); + stp = retstp; + } + return stp; } /* @@ -4305,7 +4317,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; - struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4335,32 +4346,28 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } } else { - stp = open->op_stp; - open->op_stp = NULL; - swapstp = init_open_stateid(stp, fp, open); - if (swapstp) { - nfs4_put_stid(&stp->st_stid); - stp = swapstp; - down_read(&stp->st_rwsem); + /* stp is returned locked. */ + stp = init_open_stateid(fp, open); + /* See if we lost the race to some other thread */ + if (stp->st_access_bmap != 0) { status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } goto upgrade_out; } - down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); release_open_stateid(stp); goto out; } @@ -4372,7 +4379,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } upgrade_out: nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4977,12 +4984,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; - down_write(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok) - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); return status; } @@ -5030,7 +5037,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -5062,12 +5069,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto put_stateid; } oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -5143,7 +5150,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5196,7 +5203,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); @@ -5422,7 +5429,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - init_rwsem(&stp->st_rwsem); + mutex_init(&stp->st_mutex); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5591,7 +5598,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; - up_write(&open_stp->st_rwsem); + mutex_unlock(&open_stp->st_mutex); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5600,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); if (status == nfs_ok) - down_write(&lock_stp->st_rwsem); + mutex_lock(&lock_stp->st_mutex); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5704,7 +5711,7 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; - up_write(&lock_stp->st_rwsem); + mutex_unlock(&lock_stp->st_mutex); /* * If this is a new, never-before-used stateid, and we are @@ -5874,7 +5881,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 986e51e5ceac..64053eadeb81 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -535,7 +535,7 @@ struct nfs4_ol_stateid { unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; - struct rw_semaphore st_rwsem; + struct mutex st_mutex; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 0576033699bc..4cca998ec7a0 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -62,7 +62,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, int mode, + sector_t pblocknr, int mode, int mode_flags, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; @@ -95,7 +95,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, } } - if (mode == READA) { + if (mode_flags & REQ_RAHEAD) { if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { err = -EBUSY; /* internal code */ brelse(bh); @@ -114,7 +114,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, bh); + submit_bh(mode, mode_flags, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 2cc1b80e18f7..4e8aaa1aeb65 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -43,7 +43,7 @@ void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int, - struct buffer_head **, sector_t *); + int, struct buffer_head **, sector_t *); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index eccb1c89ccbb..982d1e3df3a5 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -476,7 +476,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, sector_t submit_ptr = 0; int ret; - ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); + ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, + &submit_ptr); if (ret) { if (ret != -EEXIST) return ret; @@ -492,7 +493,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, n > 0 && i < ra->ncmax; n--, i++) { ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); - ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, + ret = nilfs_btnode_submit_block(btnc, ptr2, 0, + REQ_OP_READ, REQ_RAHEAD, &ra_bh, &submit_ptr); if (likely(!ret || ret == -EEXIST)) brelse(ra_bh); diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 693aded72498..e9148f94d696 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -101,7 +101,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); if (vbn) bh->b_blocknr = vbn; out: @@ -138,7 +138,8 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, int ret; ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, - vbn ? : pbn, pbn, READ, out_bh, &pbn); + vbn ? : pbn, pbn, REQ_OP_READ, 0, + out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 3417d859a03c..0d7b71fbeff8 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -121,7 +121,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, static int nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, - int mode, struct buffer_head **out_bh) + int mode, int mode_flags, struct buffer_head **out_bh) { struct buffer_head *bh; __u64 blknum = 0; @@ -135,7 +135,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, if (buffer_uptodate(bh)) goto out; - if (mode == READA) { + if (mode_flags & REQ_RAHEAD) { if (!trylock_buffer(bh)) { ret = -EBUSY; goto failed_bh; @@ -157,7 +157,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, bh); + submit_bh(mode, mode_flags, bh); ret = 0; trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); @@ -181,7 +181,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; int err; - err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); + err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh); if (err == -EEXIST) /* internal code */ goto out; @@ -191,7 +191,8 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (readahead) { blkoff = block + 1; for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ, + REQ_RAHEAD, &bh); if (likely(!err || err == -EEXIST)) brelse(bh); else if (err != -EBUSY) diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index bf36df10540b..a962d7d83447 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -346,7 +346,8 @@ static void nilfs_end_bio_write(struct bio *bio) } static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi, int mode) + struct nilfs_write_info *wi, int mode, + int mode_flags) { struct bio *bio = wi->bio; int err; @@ -364,7 +365,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_end_io = nilfs_end_bio_write; bio->bi_private = segbuf; - submit_bio(mode, bio); + bio_set_op_attrs(bio, mode, mode_flags); + submit_bio(bio); segbuf->sb_nbio++; wi->bio = NULL; @@ -437,7 +439,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, return 0; } /* bio is FULL */ - err = nilfs_segbuf_submit_bio(segbuf, wi, mode); + err = nilfs_segbuf_submit_bio(segbuf, wi, mode, 0); /* never submit current bh */ if (likely(!err)) goto repeat; @@ -461,19 +463,19 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, { struct nilfs_write_info wi; struct buffer_head *bh; - int res = 0, rw = WRITE; + int res = 0; wi.nilfs = nilfs; nilfs_segbuf_prepare_write(segbuf, &wi); list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE); if (unlikely(res)) goto failed_bio; } @@ -483,8 +485,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= REQ_SYNC; - res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); + res = nilfs_segbuf_submit_bio(segbuf, &wi, REQ_OP_WRITE, + REQ_SYNC); } failed_bio: diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 809bd2de7ad0..e9fd241b9a0a 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) return 0; bytes = le16_to_cpu(sbp->s_bytes); - if (bytes > BLOCK_SIZE) + if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) return 0; crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, sumoff); diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 97768a1379f2..fe251f187ff8 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -362,7 +362,7 @@ handle_zblock: for (i = 0; i < nr; i++) { tbh = arr[i]; if (likely(!buffer_uptodate(tbh))) - submit_bh(READ, tbh); + submit_bh(REQ_OP_READ, 0, tbh); else ntfs_end_buffer_async_read(tbh, 1); } @@ -877,7 +877,7 @@ lock_retry_remap: do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); need_end_writeback = false; } bh = next; @@ -1202,7 +1202,7 @@ lock_retry_remap: BUG_ON(!buffer_mapped(tbh)); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, tbh); + submit_bh(REQ_OP_WRITE, 0, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (is_mft && !sync) diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index f2b5e746f49b..f8eb04387ca4 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -670,7 +670,7 @@ lock_retry_remap: } get_bh(tbh); tbh->b_end_io = end_buffer_read_sync; - submit_bh(READ, tbh); + submit_bh(REQ_OP_READ, 0, tbh); } /* Wait for io completion on all buffer heads. */ diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 5622ed5a201e..f548629dfaac 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -553,7 +553,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) lock_buffer(bh); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - return submit_bh(READ, bh); + return submit_bh(REQ_OP_READ, 0, bh); } /** diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 9d71213ca81e..761f12f7f3ef 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -821,7 +821,7 @@ map_vcn: * completed ignore errors afterwards as we can assume * that if one buffer worked all of them will work. */ - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); if (should_wait) { should_wait = false; wait_on_buffer(bh); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 37b2501caaa4..d15d492ce47b 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -592,7 +592,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, tbh); + submit_bh(REQ_OP_WRITE, 0, tbh); } /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { @@ -785,7 +785,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, tbh); + submit_bh(REQ_OP_WRITE, 0, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (!sync && ni->mft_no < vol->mftmirr_size) diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index e27e6527912b..4342c7ee7d20 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -1,7 +1,5 @@ ccflags-y := -Ifs/ocfs2 -ccflags-y += -DCATCH_BH_JBD_RACES - obj-$(CONFIG_OCFS2_FS) += \ ocfs2.o \ ocfs2_stackglue.o diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c034edf3ef38..e97a37179614 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -640,7 +640,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, !buffer_new(bh) && ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); *wait_bh++=bh; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index fe50ded1b4ce..8f040f88ade4 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); wait_on_buffer(bh); @@ -139,17 +139,22 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, lock_buffer(bh); if (buffer_jbd(bh)) { +#ifdef CATCH_BH_JBD_RACES mlog(ML_ERROR, "block %llu had the JBD bit set " "while I was in lock_buffer!", (unsigned long long)bh->b_blocknr); BUG(); +#else + unlock_buffer(bh); + continue; +#endif } clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); } for (i = nr; i > 0; i--) { @@ -305,7 +310,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, if (validate) set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); + submit_bh(REQ_OP_READ, 0, bh); continue; } } @@ -419,7 +424,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); wait_on_buffer(bh); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6aaf3e351391..636abcbd4650 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -530,7 +530,8 @@ static void o2hb_bio_end_io(struct bio *bio) static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, struct o2hb_bio_wait_ctxt *wc, unsigned int *current_slot, - unsigned int max_slots) + unsigned int max_slots, int op, + int op_flags) { int len, current_page; unsigned int vec_len, vec_start; @@ -556,6 +557,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, bio->bi_bdev = reg->hr_bdev; bio->bi_private = wc; bio->bi_end_io = o2hb_bio_end_io; + bio_set_op_attrs(bio, op, op_flags); vec_start = (cs << bits) % PAGE_SIZE; while(cs < max_slots) { @@ -591,7 +593,8 @@ static int o2hb_read_slots(struct o2hb_region *reg, o2hb_bio_wait_init(&wc); while(current_slot < max_slots) { - bio = o2hb_setup_one_bio(reg, &wc, ¤t_slot, max_slots); + bio = o2hb_setup_one_bio(reg, &wc, ¤t_slot, max_slots, + REQ_OP_READ, 0); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); @@ -599,7 +602,7 @@ static int o2hb_read_slots(struct o2hb_region *reg, } atomic_inc(&wc.wc_num_reqs); - submit_bio(READ, bio); + submit_bio(bio); } status = 0; @@ -623,7 +626,8 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, slot = o2nm_this_node(); - bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1); + bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE, + WRITE_SYNC); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); @@ -631,7 +635,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, } atomic_inc(&write_wc->wc_num_reqs); - submit_bio(WRITE_SYNC, bio); + submit_bio(bio); status = 0; bail: diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d7cae3327de5..3971146228d3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1819,7 +1819,7 @@ static int ocfs2_get_sector(struct super_block *sb, if (!buffer_dirty(*bh)) clear_buffer_uptodate(*bh); unlock_buffer(*bh); - ll_rw_block(READ, 1, bh); + ll_rw_block(REQ_OP_READ, 0, 1, bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { mlog_errno(-EIO); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 22f0253a3567..5c9d2d80ff70 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -405,12 +405,21 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, err = ovl_create_upper(dentry, inode, &stat, link, hardlink); } else { const struct cred *old_cred; + struct cred *override_cred; old_cred = ovl_override_creds(dentry->d_sb); - err = ovl_create_over_whiteout(dentry, inode, &stat, link, - hardlink); + err = -ENOMEM; + override_cred = prepare_creds(); + if (override_cred) { + override_cred->fsuid = old_cred->fsuid; + override_cred->fsgid = old_cred->fsgid; + put_cred(override_creds(override_cred)); + put_cred(override_cred); + err = ovl_create_over_whiteout(dentry, inode, &stat, + link, hardlink); + } revert_creds(old_cred); } @@ -496,6 +505,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *upper; struct dentry *opaquedir = NULL; int err; + int flags = 0; if (WARN_ON(!workdir)) return -EROFS; @@ -525,46 +535,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) if (err) goto out_dput; - whiteout = ovl_whiteout(workdir, dentry); - err = PTR_ERR(whiteout); - if (IS_ERR(whiteout)) + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) goto out_unlock; - upper = ovl_dentry_upper(dentry); - if (!upper) { - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto kill_whiteout; - - err = ovl_do_rename(wdir, whiteout, udir, upper, 0); - dput(upper); - if (err) - goto kill_whiteout; - } else { - int flags = 0; + err = -ESTALE; + if ((opaquedir && upper != opaquedir) || + (!opaquedir && ovl_dentry_upper(dentry) && + upper != ovl_dentry_upper(dentry))) { + goto out_dput_upper; + } - if (opaquedir) - upper = opaquedir; - err = -ESTALE; - if (upper->d_parent != upperdir) - goto kill_whiteout; + whiteout = ovl_whiteout(workdir, dentry); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto out_dput_upper; - if (is_dir) - flags |= RENAME_EXCHANGE; + if (d_is_dir(upper)) + flags = RENAME_EXCHANGE; - err = ovl_do_rename(wdir, whiteout, udir, upper, flags); - if (err) - goto kill_whiteout; + err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + if (err) + goto kill_whiteout; + if (flags) + ovl_cleanup(wdir, upper); - if (is_dir) - ovl_cleanup(wdir, upper); - } ovl_dentry_version_inc(dentry->d_parent); out_d_drop: d_drop(dentry); dput(whiteout); +out_dput_upper: + dput(upper); out_unlock: unlock_rename(workdir, upperdir); out_dput: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 0ed7c4012437..d1cdc60dd68f 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -59,16 +59,40 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; + if (attr->ia_valid & ATTR_SIZE) { + struct inode *realinode = d_inode(ovl_dentry_real(dentry)); + + err = -ETXTBSY; + if (atomic_read(&realinode->i_writecount) < 0) + goto out_drop_write; + } + err = ovl_copy_up(dentry); if (!err) { + struct inode *winode = NULL; + upperdentry = ovl_dentry_upper(dentry); + if (attr->ia_valid & ATTR_SIZE) { + winode = d_inode(upperdentry); + err = get_write_access(winode); + if (err) + goto out_drop_write; + } + + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) + attr->ia_valid &= ~ATTR_MODE; + inode_lock(upperdentry->d_inode); err = notify_change(upperdentry, attr, NULL); if (!err) ovl_copyattr(upperdentry->d_inode, dentry->d_inode); inode_unlock(upperdentry->d_inode); + + if (winode) + put_write_access(winode); } +out_drop_write: ovl_drop_write(dentry); out: return err; @@ -121,16 +145,18 @@ int ovl_permission(struct inode *inode, int mask) err = vfs_getattr(&realpath, &stat); if (err) - return err; + goto out_dput; + err = -ESTALE; if ((stat.mode ^ inode->i_mode) & S_IFMT) - return -ESTALE; + goto out_dput; inode->i_mode = stat.mode; inode->i_uid = stat.uid; inode->i_gid = stat.gid; - return generic_permission(inode, mask); + err = generic_permission(inode, mask); + goto out_dput; } /* Careful in RCU walk mode */ @@ -238,41 +264,27 @@ out: return err; } -static bool ovl_need_xattr_filter(struct dentry *dentry, - enum ovl_path_type type) -{ - if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER) - return S_ISDIR(dentry->d_inode->i_mode); - else - return false; -} - ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *realdentry = ovl_dentry_real(dentry); - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(realpath.dentry, name, value, size); + return vfs_getxattr(realdentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; int off; - res = vfs_listxattr(realpath.dentry, list, size); + res = vfs_listxattr(realdentry, list, size); if (res <= 0 || size == 0) return res; - if (!ovl_need_xattr_filter(dentry, type)) - return res; - /* filter out private xattrs */ for (off = 0; off < res;) { char *s = list + off; @@ -302,7 +314,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name) goto out; err = -ENODATA; - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(name)) goto out_drop_write; if (!OVL_TYPE_UPPER(type)) { @@ -401,12 +413,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, if (!inode) return NULL; - mode &= S_IFMT; - inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_flags |= S_NOATIME | S_NOCMTIME; + mode &= S_IFMT; switch (mode) { case S_IFDIR: inode->i_private = oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 4bd9b5ba8f42..cfbca53590d0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -187,6 +187,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; to->i_gid = from->i_gid; + to->i_mode = from->i_mode; } /* dir.c */ diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ce02f46029da..9a7693d5f8ff 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err < 0) goto out_put_workdir; - if (!err) { - pr_err("overlayfs: upper fs needs to support d_type.\n"); - err = -EINVAL; - goto out_put_workdir; - } + /* + * We allowed this configuration and don't want to + * break users over kernel upgrade. So warn instead + * of erroring out. + */ + if (!err) + pr_warn("overlayfs: upper fs needs to support d_type.\n"); } } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 8a4a266beff3..edc452c2a563 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler, return error; } -static int -posix_acl_xattr_set(const struct xattr_handler *handler, - struct dentry *unused, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) +int +set_posix_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct posix_acl *acl = NULL; - int ret; - if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; if (!inode->i_op->set_acl) return -EOPNOTSUPP; - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; if (!inode_owner_or_capable(inode)) return -EPERM; + if (acl) { + int ret = posix_acl_valid(acl); + if (ret) + return ret; + } + return inode->i_op->set_acl(inode, acl, type); +} +EXPORT_SYMBOL(set_posix_acl); + +static int +posix_acl_xattr_set(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + struct posix_acl *acl = NULL; + int ret; + if (value) { acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); - - if (acl) { - ret = posix_acl_valid(acl); - if (ret) - goto out; - } } - - ret = inode->i_op->set_acl(inode, acl, handler->flags); -out: + ret = set_posix_acl(inode, handler->flags, acl); posix_acl_release(acl); return ret; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 55bc7d6c8aac..06702783bf40 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) return ERR_CAST(sb); + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + if (!proc_parse_options(options, ns)) { deactivate_locked_super(sb); return ERR_PTR(-EINVAL); diff --git a/fs/read_write.c b/fs/read_write.c index 933b53a375b4..66215a7b17cf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, return do_compat_preadv64(fd, vec, vlen, pos, 0); } +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos, int, flags) +{ + return do_compat_preadv64(fd, vec, vlen, pos, flags); +} +#endif + COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, @@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, return do_compat_pwritev64(fd, vec, vlen, pos, 0); } +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos, int, flags) +{ + return do_compat_pwritev64(fd, vec, vlen, pos, flags); +} +#endif + COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 825455d3e4ba..c2c59f9ff04b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2668,7 +2668,7 @@ static int reiserfs_write_full_page(struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); nr++; } put_bh(bh); @@ -2728,7 +2728,7 @@ fail: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); nr++; } put_bh(bh); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 2ace90e981f0..bc2dde2423c2 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -652,7 +652,7 @@ static void submit_logged_buffer(struct buffer_head *bh) BUG(); if (!buffer_uptodate(bh)) BUG(); - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); } static void submit_ordered_buffer(struct buffer_head *bh) @@ -662,7 +662,7 @@ static void submit_ordered_buffer(struct buffer_head *bh) clear_buffer_dirty(bh); if (!buffer_uptodate(bh)) BUG(); - submit_bh(WRITE, bh); + submit_bh(REQ_OP_WRITE, 0, bh); } #define CHUNK_SIZE 32 @@ -870,7 +870,7 @@ loop_next: */ if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { spin_unlock(lock); - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); spin_lock(lock); } put_bh(bh); @@ -1057,7 +1057,7 @@ static int flush_commit_list(struct super_block *s, if (tbh) { if (buffer_dirty(tbh)) { depth = reiserfs_write_unlock_nested(s); - ll_rw_block(WRITE, 1, &tbh); + ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh); reiserfs_write_lock_nested(s, depth); } put_bh(tbh) ; @@ -2244,7 +2244,7 @@ abort_replay: } } /* read in the log blocks, memcpy to the corresponding real block */ - ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); + ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks); for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(log_blocks[i]); @@ -2269,7 +2269,7 @@ abort_replay: /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - write_dirty_buffer(real_blocks[i], WRITE); + write_dirty_buffer(real_blocks[i], 0); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); @@ -2346,7 +2346,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, } else bhlist[j++] = bh; } - ll_rw_block(READ, j, bhlist); + ll_rw_block(REQ_OP_READ, 0, j, bhlist); for (i = 1; i < j; i++) brelse(bhlist[i]); bh = bhlist[0]; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 5feacd689241..4032d1e87c8f 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -551,7 +551,7 @@ static int search_by_key_reada(struct super_block *s, if (!buffer_uptodate(bh[j])) { if (depth == -1) depth = reiserfs_write_unlock_nested(s); - ll_rw_block(READA, 1, bh + j); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j); } brelse(bh[j]); } @@ -660,7 +660,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, if (!buffer_uptodate(bh) && depth == -1) depth = reiserfs_write_unlock_nested(sb); - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); if (depth != -1) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b8f2d1e8c645..7a4a85a6821e 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1393,7 +1393,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts = kstrdup(arg, GFP_KERNEL); + char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1401,6 +1401,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif + new_opts = kstrdup(arg, GFP_KERNEL); + if (arg && !new_opts) + return -ENOMEM; + sync_filesystem(s); reiserfs_write_lock(s); @@ -1546,7 +1550,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok_unlocked: - replace_mount_options(s, new_opts); + if (new_opts) + replace_mount_options(s, new_opts); return 0; out_err_unlock: @@ -1661,7 +1666,7 @@ static int read_super_block(struct super_block *s, int offset) /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { - ll_rw_block(READ, 1, &SB_BUFFER_WITH_SB(s)); + ll_rw_block(REQ_OP_READ, 0, 1, &SB_BUFFER_WITH_SB(s)); wait_on_buffer(SB_BUFFER_WITH_SB(s)); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2c2618410d51..ce62a380314f 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -124,7 +124,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, goto block_release; bytes += msblk->devblksize; } - ll_rw_block(READ, b, bh); + ll_rw_block(REQ_OP_READ, 0, b, bh); } else { /* * Metadata block. @@ -156,7 +156,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, goto block_release; bytes += msblk->devblksize; } - ll_rw_block(READ, b - 1, bh + 1); + ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1); } for (i = 0; i < b; i++) { diff --git a/fs/timerfd.c b/fs/timerfd.c index 053818dd6c18..9ae4abb4110b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -390,6 +390,11 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; + if (!capable(CAP_WAKE_ALARM) && + (clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM)) + return -EPERM; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -433,6 +438,11 @@ static int do_timerfd_settime(int ufd, int flags, return ret; ctx = f.file->private_data; + if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { + fdput(f); + return -EPERM; + } + timerfd_setup_cancel(ctx, flags); /* diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 08316972ff93..7bbf420d1289 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -52,6 +52,7 @@ #include "ubifs.h" #include <linux/mount.h> #include <linux/slab.h> +#include <linux/migrate.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page) return ret; } +#ifdef CONFIG_MIGRATION +static int ubifs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + int rc; + + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + SetPagePrivate(newpage); + } + + migrate_page_copy(newpage, page); + return MIGRATEPAGE_SUCCESS; +} +#endif + static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) { /* @@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = { .write_end = ubifs_write_end, .invalidatepage = ubifs_invalidatepage, .set_page_dirty = ubifs_set_page_dirty, +#ifdef CONFIG_MIGRATION + .migratepage = ubifs_migrate_page, +#endif .releasepage = ubifs_releasepage, }; diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 4c5593abc553..aaec13c95253 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -113,7 +113,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) brelse(tmp); } if (num) { - ll_rw_block(READA, num, bha); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index c763fda257bf..988d5352bdb8 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -87,7 +87,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, brelse(tmp); } if (num) { - ll_rw_block(READA, num, bha); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index f323aff740ef..55aa587bbc38 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1199,7 +1199,7 @@ struct buffer_head *udf_bread(struct inode *inode, int block, if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 5f861ed287c3..888c364b2fe9 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -295,7 +295,8 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, map = &UDF_SB(sb)->s_partmaps[partition]; /* map to sparable/physical partition desc */ phyblock = udf_get_pblock(sb, eloc.logicalBlockNum, - map->s_partition_num, ext_offset + offset); + map->s_type_specific.s_metadata.s_phys_partition_ref, + ext_offset + offset); } brelse(epos.bh); @@ -317,14 +318,18 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, mdata = &map->s_type_specific.s_metadata; inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe; - /* We shouldn't mount such media... */ - BUG_ON(!inode); + if (!inode) + return 0xFFFFFFFF; + retblk = udf_try_read_meta(inode, block, partition, offset); if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) { udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n"); if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) { mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, - mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_mirror_file_loc, + mdata->s_phys_partition_ref); + if (IS_ERR(mdata->s_mirror_fe)) + mdata->s_mirror_fe = NULL; mdata->s_flags |= MF_MIRROR_FE_LOADED; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 5e2c8c814e1b..4942549e7dc8 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -951,13 +951,13 @@ out2: } struct inode *udf_find_metadata_inode_efe(struct super_block *sb, - u32 meta_file_loc, u32 partition_num) + u32 meta_file_loc, u32 partition_ref) { struct kernel_lb_addr addr; struct inode *metadata_fe; addr.logicalBlockNum = meta_file_loc; - addr.partitionReferenceNum = partition_num; + addr.partitionReferenceNum = partition_ref; metadata_fe = udf_iget_special(sb, &addr); @@ -974,7 +974,8 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, return metadata_fe; } -static int udf_load_metadata_files(struct super_block *sb, int partition) +static int udf_load_metadata_files(struct super_block *sb, int partition, + int type1_index) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; @@ -984,20 +985,21 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; + mdata->s_phys_partition_ref = type1_index; /* metadata address */ udf_debug("Metadata file location: block = %d part = %d\n", - mdata->s_meta_file_loc, map->s_partition_num); + mdata->s_meta_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, - map->s_partition_num); + mdata->s_phys_partition_ref); if (IS_ERR(fe)) { /* mirror file entry */ udf_debug("Mirror metadata file location: block = %d part = %d\n", - mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_mirror_file_loc, mdata->s_phys_partition_ref); fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, - map->s_partition_num); + mdata->s_phys_partition_ref); if (IS_ERR(fe)) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); @@ -1015,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) */ if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) { addr.logicalBlockNum = mdata->s_bitmap_file_loc; - addr.partitionReferenceNum = map->s_partition_num; + addr.partitionReferenceNum = mdata->s_phys_partition_ref; udf_debug("Bitmap file location: block = %d part = %d\n", addr.logicalBlockNum, addr.partitionReferenceNum); @@ -1283,7 +1285,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) p = (struct partitionDesc *)bh->b_data; partitionNumber = le16_to_cpu(p->partitionNumber); - /* First scan for TYPE1, SPARABLE and METADATA partitions */ + /* First scan for TYPE1 and SPARABLE partitions */ for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; udf_debug("Searching map: (%d == %d)\n", @@ -1333,7 +1335,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) goto out_bh; if (map->s_partition_type == UDF_METADATA_MAP25) { - ret = udf_load_metadata_files(sb, i); + ret = udf_load_metadata_files(sb, i, type1_idx); if (ret < 0) { udf_err(sb, "error loading MetaData partition map %d\n", i); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 27b5335730c9..c13875d669c0 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -61,6 +61,11 @@ struct udf_meta_data { __u32 s_bitmap_file_loc; __u32 s_alloc_unit_size; __u16 s_align_unit_size; + /* + * Partition Reference Number of the associated physical / sparable + * partition + */ + __u16 s_phys_partition_ref; int s_flags; struct inode *s_metadata_fe; struct inode *s_mirror_fe; diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 0447b949c7f5..67e085d591d8 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -292,7 +292,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg, if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, oldb + pos); if (!buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ufs_error(inode->i_sb, __func__, diff --git a/fs/ufs/util.c b/fs/ufs/util.c index a409e3e7827a..f41ad0a6106f 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -118,7 +118,7 @@ void ubh_sync_block(struct ufs_buffer_head *ubh) unsigned i; for (i = 0; i < ubh->count; i++) - write_dirty_buffer(ubh->bh[i], WRITE); + write_dirty_buffer(ubh->bh[i], 0); for (i = 0; i < ubh->count; i++) wait_on_buffer(ubh->bh[i]); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4c463b99fe57..87d2b215cbbd 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -438,7 +438,8 @@ xfs_submit_ioend( ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - + bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, + (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); /* * If we are failing the IO now, just mark the ioend with an * error and finish it. This will run IO completion immediately @@ -451,8 +452,7 @@ xfs_submit_ioend( return status; } - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, - ioend->io_bio); + submit_bio(ioend->io_bio); return 0; } @@ -510,8 +510,9 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, - ioend->io_bio); + bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, + (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + submit_bio(ioend->io_bio); ioend->io_bio = new; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e71cfbd5acb3..a87a0d5477bd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1127,7 +1127,8 @@ xfs_buf_ioapply_map( int map, int *buf_offset, int *count, - int rw) + int op, + int op_flags) { int page_index; int total_nr_pages = bp->b_page_count; @@ -1157,16 +1158,14 @@ xfs_buf_ioapply_map( next_chunk: atomic_inc(&bp->b_io_remaining); - nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); - if (nr_pages > total_nr_pages) - nr_pages = total_nr_pages; + nr_pages = min(total_nr_pages, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, nr_pages); bio->bi_bdev = bp->b_target->bt_bdev; bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; - + bio_set_op_attrs(bio, op, op_flags); for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; @@ -1190,7 +1189,7 @@ next_chunk: flush_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); } - submit_bio(rw, bio); + submit_bio(bio); if (size) goto next_chunk; } else { @@ -1210,7 +1209,8 @@ _xfs_buf_ioapply( struct xfs_buf *bp) { struct blk_plug plug; - int rw; + int op; + int op_flags = 0; int offset; int size; int i; @@ -1229,14 +1229,13 @@ _xfs_buf_ioapply( bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; if (bp->b_flags & XBF_WRITE) { + op = REQ_OP_WRITE; if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; + op_flags = WRITE_SYNC; if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; + op_flags |= REQ_FUA; if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; + op_flags |= REQ_PREFLUSH; /* * Run the write verifier callback function if it exists. If @@ -1266,13 +1265,14 @@ _xfs_buf_ioapply( } } } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; + op = REQ_OP_READ; + op_flags = REQ_RAHEAD; } else { - rw = READ; + op = REQ_OP_READ; } /* we only use the buffer cache for meta-data */ - rw |= REQ_META; + op_flags |= REQ_META; /* * Walk all the vectors issuing IO on them. Set up the initial offset @@ -1284,7 +1284,7 @@ _xfs_buf_ioapply( size = BBTOB(bp->b_io_length); blk_start_plug(&plug); for (i = 0; i < bp->b_map_count; i++) { - xfs_buf_ioapply_map(bp, i, &offset, &size, rw); + xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); if (bp->b_error) break; if (size <= 0) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index dbca7375deef..63a6ff2cfc68 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1575,6 +1575,12 @@ xfs_ioc_swapext( goto out_put_tmp_file; } + if (f.file->f_op != &xfs_file_operations || + tmp.file->f_op != &xfs_file_operations) { + error = -EINVAL; + goto out_put_tmp_file; + } + ip = XFS_I(file_inode(f.file)); tip = XFS_I(file_inode(tmp.file)); |