diff options
Diffstat (limited to 'fs')
54 files changed, 1620 insertions, 926 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF bool depends on COMPAT && BINFMT_ELF -config ARCH_BINFMT_ELF_RANDOMIZE_PIE - bool - config ARCH_BINFMT_ELF_STATE bool diff --git a/fs/Makefile b/fs/Makefile index a88ac4838c9e..cb92fd4c3172 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ +obj-$(CONFIG_TRACING) += tracefs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ @@ -310,11 +310,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) +static int aio_ring_remap(struct file *file, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct kioctx_table *table; - int i; + int i, res = -EINVAL; spin_lock(&mm->ioctx_lock); rcu_read_lock(); @@ -324,13 +324,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) ctx = table->table[i]; if (ctx && ctx->aio_ring_file == file) { - ctx->user_id = ctx->mmap_base = vma->vm_start; + if (!atomic_read(&ctx->dead)) { + ctx->user_id = ctx->mmap_base = vma->vm_start; + res = 0; + } break; } } rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + return res; } static const struct file_operations aio_ring_fops = { @@ -760,6 +764,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err_cleanup: aio_nr_sub(ctx->max_reqs); err_ctx: + atomic_set(&ctx->dead, 1); + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); aio_free_ring(ctx); err: mutex_unlock(&ctx->ring_lock); @@ -781,11 +788,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, { struct kioctx_table *table; - if (atomic_xchg(&ctx->dead, 1)) + spin_lock(&mm->ioctx_lock); + if (atomic_xchg(&ctx->dead, 1)) { + spin_unlock(&mm->ioctx_lock); return -EINVAL; + } - - spin_lock(&mm->ioctx_lock); table = rcu_dereference_raw(mm->ioctx_table); WARN_ON(ctx != table->table[ctx->id]); table->table[ctx->id] = NULL; @@ -1352,48 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); -static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - size_t *len, - struct iovec **iovec, - bool compat) +static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, + struct iovec **iovec, + bool compat, + struct iov_iter *iter) { - ssize_t ret; - - *nr_segs = *len; - #ifdef CONFIG_COMPAT if (compat) - ret = compat_rw_copy_check_uvector(rw, + return compat_import_iovec(rw, (struct compat_iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - else + len, UIO_FASTIOV, iovec, iter); #endif - ret = rw_copy_check_uvector(rw, - (struct iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - if (ret < 0) - return ret; - - /* len now reflect bytes instead of segs */ - *len = ret; - return 0; -} - -static ssize_t aio_setup_single_vector(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - size_t len, - struct iovec *iovec) -{ - if (unlikely(!access_ok(!rw, buf, len))) - return -EFAULT; - - iovec->iov_base = buf; - iovec->iov_len = len; - *nr_segs = 1; - return 0; + return import_iovec(rw, (struct iovec __user *)buf, + len, UIO_FASTIOV, iovec, iter); } /* @@ -1405,7 +1384,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, { struct file *file = req->ki_filp; ssize_t ret; - unsigned long nr_segs; int rw; fmode_t mode; aio_rw_op *rw_op; @@ -1437,16 +1415,17 @@ rw_common: return -EINVAL; if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) - ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &len, &iovec, compat); - else - ret = aio_setup_single_vector(req, rw, buf, &nr_segs, - len, iovec); + ret = aio_setup_vectored_rw(rw, buf, len, + &iovec, compat, &iter); + else { + ret = import_single_range(rw, buf, len, iovec, &iter); + iovec = NULL; + } if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, len); + ret = rw_verify_area(rw, file, &req->ki_pos, + iov_iter_count(&iter)); if (ret < 0) { - if (iovec != inline_vecs) - kfree(iovec); + kfree(iovec); return ret; } @@ -1463,14 +1442,14 @@ rw_common: file_start_write(file); if (iter_op) { - iov_iter_init(&iter, rw, iovec, nr_segs, len); ret = iter_op(req, &iter); } else { - ret = rw_op(req, iovec, nr_segs, req->ki_pos); + ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos); } if (rw == WRITE) file_end_write(file); + kfree(iovec); break; case IOCB_CMD_FDSYNC: @@ -1492,9 +1471,6 @@ rw_common: return -EINVAL; } - if (iovec != inline_vecs) - kfree(iovec); - if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 995986b8e36b..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/random.h> #include <linux/elf.h> +#include <linux/elf-randomize.h> #include <linux/utsname.h> #include <linux/coredump.h> #include <linux/sched.h> @@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm) i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; unsigned long k, vaddr; + unsigned long total_size = 0; if (elf_ppnt->p_type != PT_LOAD) continue; @@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ -#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE - /* Memory randomization might have been switched off - * in runtime via sysctl or explicit setting of - * personality flags. - * If that is the case, retain the original non-zero - * load_bias value in order to establish proper - * non-randomized mappings. - */ + load_bias = ELF_ET_DYN_BASE - vaddr; if (current->flags & PF_RANDOMIZE) - load_bias = 0; - else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#endif + load_bias += arch_mmap_rnd(); + load_bias = ELF_PAGESTART(load_bias); + total_size = total_mapping_size(elf_phdata, + loc->elf_ex.e_phnum); + if (!total_size) { + error = -EINVAL; + goto out_free_dentry; + } } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, 0); + elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; @@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) current->mm->end_data = end_data; current->mm->start_stack = bprm->p; -#ifdef arch_randomize_brk if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); -#ifdef CONFIG_COMPAT_BRK +#ifdef compat_brk_randomized current->brk_randomized = 1; #endif } -#endif if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) * to synchronise against __set_page_dirty_buffers and prevent the * dirty bit from being lost. */ - if (ret) - cancel_dirty_page(page, PAGE_CACHE_SIZE); + if (ret && TestClearPageDirty(page)) + account_page_cleaned(page, mapping); spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 480cf9c81d50..f3bfe08e177b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) length = atomic_dec_return(&tcpSesAllocCount); if (length > 0) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); } static int @@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p) length = atomic_inc_return(&tcpSesAllocCount); if (length > 1) - mempool_resize(cifs_req_poolp, length + cifs_min_rcv, - GFP_KERNEL); + mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); while (server->tcpStatus != CifsExiting) { diff --git a/fs/dcache.c b/fs/dcache.c index c71e3732e53b..d99736a63e3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - int ret = -EBUSY; + int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 96400ab42d13..61e72d44cf94 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) pr_debug("debugfs: creating file '%s'\n",name); + if (IS_ERR(parent)) + return parent; + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7b3143064af1..1be3b061c05c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); if (error) goto out; - - if (acl) - set_cached_acl(inode, type, acl); - else - forget_cached_acl(inode, type); + set_cached_acl(inode, type, acl); out: kfree(data); return error; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index fe6634d25d1d..a6e6990aea39 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (alloc_required) { struct gfs2_alloc_parms ap = { .aflags = 0, }; - error = gfs2_quota_lock_check(ip); + requested = data_blocks + ind_blocks; + ap.target = requested; + error = gfs2_quota_lock_check(ip, &ap); if (error) goto out_unlock; - requested = data_blocks + ind_blocks; - ap.target = requested; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index f0b945ab853e..61296ecbd0e2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size) if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f6fc412b1100..8ec43ab5babf 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -428,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; - ret = gfs2_quota_lock_check(ip); - if (ret) - goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; @@ -764,22 +764,30 @@ out: brelse(dibh); return error; } - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) +/** + * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of + * blocks, determine how many bytes can be written. + * @ip: The inode in question. + * @len: Max cap of bytes. What we return in *len must be <= this. + * @data_blocks: Compute and return the number of data blocks needed + * @ind_blocks: Compute and return the number of indirect blocks needed + * @max_blocks: The total blocks available to work with. + * + * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. + */ +static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, + unsigned int *data_blocks, unsigned int *ind_blocks, + unsigned int max_blocks) { + loff_t max = *len; const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_rgd->rd_free_clone; unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; + *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; @@ -796,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; + loff_t bytes, max_bytes, max_blks = UINT_MAX; int error; const loff_t pos = offset; const loff_t count = len; @@ -818,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t gfs2_size_hint(file, offset, len); + gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); + ap.min_target = data_blocks + ind_blocks; + while (len > 0) { if (len < bytes) bytes = len; @@ -826,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t offset += bytes; continue; } - error = gfs2_quota_lock_check(ip); + + /* We need to determine how many bytes we can actually + * fallocate without exceeding quota or going over the + * end of the fs. We start off optimistically by assuming + * we can write max_bytes */ + max_bytes = (len > max_chunk_size) ? max_chunk_size : len; + + /* Since max_bytes is most likely a theoretical max, we + * calculate a more realistic 'bytes' to serve as a good + * starting point for the number of bytes we may be able + * to write */ + gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + ap.target = data_blocks + ind_blocks; + + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + /* ap.allowed tells us how many blocks quota will allow + * us to write. Check if this reduces max_blks */ + if (ap.allowed && ap.allowed < max_blks) + max_blks = ap.allowed; - ap.target = data_blocks + ind_blocks; error = gfs2_inplace_reserve(ip, &ap); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - bytes &= bsize_mask; - if (bytes == 0) - bytes = sdp->sd_sb.sb_bsize; - goto retry; - } + if (error) goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, - &max_bytes, &data_blocks, &ind_blocks); + + /* check if the selected rgrp limits our max_blks further */ + if (ap.allowed && ap.allowed < max_blks) + max_blks = ap.allowed; + + /* Almost done. Calculate bytes that can be written using + * max_blks. We also recompute max_bytes, data_blocks and + * ind_blocks */ + calc_max_reserv(ip, &max_bytes, &data_blocks, + &ind_blocks, max_blks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); @@ -930,6 +955,22 @@ out_uninit: return ret; } +static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + int error; + struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); + + error = gfs2_rs_alloc(ip); + if (error) + return (ssize_t)error; + + gfs2_size_hint(out, *ppos, len); + + return iter_file_splice_write(pipe, out, ppos, len, flags); +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -1076,7 +1117,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, }; @@ -1106,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = { .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f42dffba056a..0fa8062f85a7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = { int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) { - sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); - if (!sdp->debugfs_dir) - return -ENOMEM; - sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_glocks_fops); - if (!sdp->debugfs_dentry_glocks) + struct dentry *dent; + + dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root); + if (IS_ERR_OR_NULL(dent)) + goto fail; + sdp->debugfs_dir = dent; + + dent = debugfs_create_file("glocks", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glocks_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_glocks = dent; - sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_glstats_fops); - if (!sdp->debugfs_dentry_glstats) + dent = debugfs_create_file("glstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glstats_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_glstats = dent; - sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_sbstats_fops); - if (!sdp->debugfs_dentry_sbstats) + dent = debugfs_create_file("sbstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_sbstats_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_sbstats = dent; return 0; fail: gfs2_delete_debugfs_file(sdp); - return -ENOMEM; + return dent ? PTR_ERR(dent) : -ENOMEM; } void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) @@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) int gfs2_register_debugfs(void) { gfs2_root = debugfs_create_dir("gfs2", NULL); + if (IS_ERR(gfs2_root)) + return PTR_ERR(gfs2_root); return gfs2_root ? 0 : -ENOMEM; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7a2dbbc0d634..58b75abf6ab2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -301,8 +301,10 @@ struct gfs2_blkreserv { * to the allocation code. */ struct gfs2_alloc_parms { - u32 target; + u64 target; + u32 min_target; u32 aflags; + u64 allowed; }; enum { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 73c72253faac..08bc84d7e768 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; int error; - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) goto out; @@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, int error; if (da->nr_blocks) { - error = gfs2_quota_lock_check(dip); + error = gfs2_quota_lock_check(dip, &ap); if (error) goto fail_quota_locks; @@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (da.nr_blocks) { struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; - error = gfs2_quota_lock_check(dip); + error = gfs2_quota_lock_check(dip, &ap); if (error) goto out_gunlock; @@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (da.nr_blocks) { struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; - error = gfs2_quota_lock_check(ndip); + error = gfs2_quota_lock_check(ndip, &ap); if (error) goto out_gunlock; @@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) kuid_t ouid, nuid; kgid_t ogid, ngid; int error; + struct gfs2_alloc_parms ap; ouid = inode->i_uid; ogid = inode->i_gid; @@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out; + ap.target = gfs2_get_inode_blocks(&ip->i_inode); + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { - error = gfs2_quota_check(ip, nuid, ngid); + error = gfs2_quota_check(ip, nuid, ngid, &ap); if (error) goto out_gunlock_q; } @@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { - u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); - gfs2_quota_change(ip, -blocks, ouid, ogid); - gfs2_quota_change(ip, blocks, nuid, ngid); + gfs2_quota_change(ip, -ap.target, ouid, ogid); + gfs2_quota_change(ip, ap.target, nuid, ngid); } out_end_trans: diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3aa17d4d1cfc..5c27e48aa76f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -923,6 +923,9 @@ restart: if (error) return error; + if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) + force_refresh = FORCE; + qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { @@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) sizeof(struct gfs2_quota_data *), sort_qd, NULL); for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - int force = NO_FORCE; qd = ip->i_res->rs_qa_qd[x]; - if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) - force = FORCE; - error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); + error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]); if (error) break; } @@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type) return 0; } -int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) +/** + * gfs2_quota_check - check if allocating new blocks will exceed quota + * @ip: The inode for which this check is being performed + * @uid: The uid to check against + * @gid: The gid to check against + * @ap: The allocation parameters. ap->target contains the requested + * blocks. ap->min_target, if set, contains the minimum blks + * requested. + * + * Returns: 0 on success. + * min_req = ap->min_target ? ap->min_target : ap->target; + * quota must allow atleast min_req blks for success and + * ap->allowed is set to the number of blocks allowed + * + * -EDQUOT otherwise, quota violation. ap->allowed is set to number + * of blocks available. + */ +int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; - s64 value; + s64 value, warn, limit; unsigned int x; int error = 0; + ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) return 0; @@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) qid_eq(qd->qd_id, make_kqid_gid(gid)))) continue; + warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn); + limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); value = (s64)be64_to_cpu(qd->qd_qb.qb_value); spin_lock(&qd_lock); value += qd->qd_change; spin_unlock(&qd_lock); - if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { - print_message(qd, "exceeded"); - quota_send_warning(qd->qd_id, - sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); - - error = -EDQUOT; - break; - } else if (be64_to_cpu(qd->qd_qb.qb_warn) && - (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && + if (limit > 0 && (limit - value) < ap->allowed) + ap->allowed = limit - value; + /* If we can't meet the target */ + if (limit && limit < (value + (s64)ap->target)) { + /* If no min_target specified or we don't meet + * min_target, return -EDQUOT */ + if (!ap->min_target || ap->min_target > ap->allowed) { + print_message(qd, "exceeded"); + quota_send_warning(qd->qd_id, + sdp->sd_vfs->s_dev, + QUOTA_NL_BHARDWARN); + error = -EDQUOT; + break; + } + } else if (warn && warn < value && time_after_eq(jiffies, qd->qd_last_warn + - gfs2_tune_get(sdp, - gt_quota_warn_period) * HZ)) { + gfs2_tune_get(sdp, gt_quota_warn_period) + * HZ)) { quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; } } - return error; } diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 55d506eb3c4a..ad04b3acae2b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip); extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unlock(struct gfs2_inode *ip); -extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, kuid_t uid, kgid_t gid); @@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data); extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); -static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) +static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int ret; @@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); + ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) gfs2_quota_unlock(ip); return ret; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9150207f365c..6af2396a317c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * @ip: the inode to reserve space for * @ap: the allocation parameters * - * Returns: errno + * We try our best to find an rgrp that has at least ap->target blocks + * available. After a couple of passes (loops == 2), the prospects of finding + * such an rgrp diminish. At this stage, we return the first rgrp that has + * atleast ap->min_target blocks available. Either way, we set ap->allowed to + * the number of blocks available in the chosen rgrp. + * + * Returns: 0 on success, + * -ENOMEM if a suitable rgrp can't be found + * errno otherwise */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) +int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a /* Skip unuseable resource groups */ if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) + (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) @@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { + if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || + (loops == 2 && ap->min_target && + rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { ip->i_rgd = rs->rs_rbm.rgd; + ap->allowed = ip->i_rgd->rd_free_clone; return 0; } - check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b104f4af3afd..68972ecfbb01 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 0b81f783f787..fd260ce8869a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..db76cec3ce21 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, static void truncate_huge_page(struct page *page) { - cancel_dirty_page(page, /* No IO accounting for huge pages? */0); + ClearPageDirty(page); ClearPageUptodate(page); delete_from_page_cache(page); } diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index d72817ac51f6..762c7a3cf43d 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat /* unchecked xdatum is chained with c->xattr_unchecked */ list_del_init(&xd->xindex); - dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", + dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n", xd->xid, xd->version); return 0; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5d30c56ae075..4cd9798f4948 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("ERROR: (device %s): %pf: %pV\n", + pr_err("ERROR: (device %s): %ps: %pV\n", sb->s_id, __builtin_return_address(0), &vaf); va_end(args); diff --git a/fs/namei.c b/fs/namei.c index c83145af4bfc..76fb76a0818b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -119,15 +119,14 @@ * PATH_MAX includes the nul terminator --RR. */ -#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) +#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * getname_flags(const char __user *filename, int flags, int *empty) { - struct filename *result, *err; - int len; - long max; + struct filename *result; char *kname; + int len; result = audit_reusename(filename); if (result) @@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty) result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); - result->refcnt = 1; /* * First, try to embed the struct filename inside the names_cache * allocation */ - kname = (char *)result + sizeof(*result); + kname = (char *)result->iname; result->name = kname; - result->separate = false; - max = EMBEDDED_NAME_MAX; -recopy: - len = strncpy_from_user(kname, filename, max); + len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); if (unlikely(len < 0)) { - err = ERR_PTR(len); - goto error; + __putname(result); + return ERR_PTR(len); } /* @@ -160,43 +155,49 @@ recopy: * names_cache allocation for the pathname, and re-do the copy from * userland. */ - if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { + if (unlikely(len == EMBEDDED_NAME_MAX)) { + const size_t size = offsetof(struct filename, iname[1]); kname = (char *)result; - result = kzalloc(sizeof(*result), GFP_KERNEL); - if (!result) { - err = ERR_PTR(-ENOMEM); - result = (struct filename *)kname; - goto error; + /* + * size is chosen that way we to guarantee that + * result->iname[0] is within the same object and that + * kname can't be equal to result->iname, no matter what. + */ + result = kzalloc(size, GFP_KERNEL); + if (unlikely(!result)) { + __putname(kname); + return ERR_PTR(-ENOMEM); } result->name = kname; - result->separate = true; - result->refcnt = 1; - max = PATH_MAX; - goto recopy; + len = strncpy_from_user(kname, filename, PATH_MAX); + if (unlikely(len < 0)) { + __putname(kname); + kfree(result); + return ERR_PTR(len); + } + if (unlikely(len == PATH_MAX)) { + __putname(kname); + kfree(result); + return ERR_PTR(-ENAMETOOLONG); + } } + result->refcnt = 1; /* The empty path is special. */ if (unlikely(!len)) { if (empty) *empty = 1; - err = ERR_PTR(-ENOENT); - if (!(flags & LOOKUP_EMPTY)) - goto error; + if (!(flags & LOOKUP_EMPTY)) { + putname(result); + return ERR_PTR(-ENOENT); + } } - err = ERR_PTR(-ENAMETOOLONG); - if (unlikely(len >= PATH_MAX)) - goto error; - result->uptr = filename; result->aname = NULL; audit_getname(result); return result; - -error: - putname(result); - return err; } struct filename * @@ -216,8 +217,7 @@ getname_kernel(const char * filename) return ERR_PTR(-ENOMEM); if (len <= EMBEDDED_NAME_MAX) { - result->name = (char *)(result) + sizeof(*result); - result->separate = false; + result->name = (char *)result->iname; } else if (len <= PATH_MAX) { struct filename *tmp; @@ -227,7 +227,6 @@ getname_kernel(const char * filename) return ERR_PTR(-ENOMEM); } tmp->name = (char *)result; - tmp->separate = true; result = tmp; } else { __putname(result); @@ -249,7 +248,7 @@ void putname(struct filename *name) if (--name->refcnt > 0) return; - if (name->separate) { + if (name->name != name->iname) { __putname(name->name); kfree(name); } else @@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd) return err; } -static int path_init(int dfd, const char *name, unsigned int flags, +static int path_init(int dfd, const struct filename *name, unsigned int flags, struct nameidata *nd) { int retval = 0; + const char *s = name->name; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; @@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; - if (*name) { + if (*s) { if (!d_can_lookup(root)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); @@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->root.mnt = NULL; nd->m_seq = read_seqbegin(&mount_lock); - if (*name=='/') { + if (*s == '/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); nd->seq = set_root_rcu(nd); @@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; - if (*name) { + if (*s) { if (!d_can_lookup(dentry)) { fdput(f); return -ENOTDIR; @@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, return -ECHILD; done: current->total_link_count = 0; - return link_path_walk(name, nd); + return link_path_walk(s, nd); } static void path_cleanup(struct nameidata *nd) @@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path) } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int path_lookupat(int dfd, const char *name, +static int path_lookupat(int dfd, const struct filename *name, unsigned int flags, struct nameidata *nd) { struct path path; @@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name, static int filename_lookup(int dfd, struct filename *name, unsigned int flags, struct nameidata *nd) { - int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); + int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); if (unlikely(retval == -ECHILD)) - retval = path_lookupat(dfd, name->name, flags, nd); + retval = path_lookupat(dfd, name, flags, nd); if (unlikely(retval == -ESTALE)) - retval = path_lookupat(dfd, name->name, - flags | LOOKUP_REVAL, nd); + retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); if (likely(!retval)) audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); return retval; } -static int do_path_lookup(int dfd, const char *name, - unsigned int flags, struct nameidata *nd) -{ - struct filename *filename = getname_kernel(name); - int retval = PTR_ERR(filename); - - if (!IS_ERR(filename)) { - retval = filename_lookup(dfd, filename, flags, nd); - putname(filename); - } - return retval; -} - /* does lookup, returns the object with parent locked */ struct dentry *kern_path_locked(const char *name, struct path *path) { @@ -2089,9 +2075,15 @@ out: int kern_path(const char *name, unsigned int flags, struct path *path) { struct nameidata nd; - int res = do_path_lookup(AT_FDCWD, name, flags, &nd); - if (!res) - *path = nd.path; + struct filename *filename = getname_kernel(name); + int res = PTR_ERR(filename); + + if (!IS_ERR(filename)) { + res = filename_lookup(AT_FDCWD, filename, flags, &nd); + putname(filename); + if (!res) + *path = nd.path; + } return res; } EXPORT_SYMBOL(kern_path); @@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { - struct nameidata nd; - int err; - nd.root.dentry = dentry; - nd.root.mnt = mnt; + struct filename *filename = getname_kernel(name); + int err = PTR_ERR(filename); + BUG_ON(flags & LOOKUP_PARENT); - /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ - err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); - if (!err) - *path = nd.path; + + /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */ + if (!IS_ERR(filename)) { + struct nameidata nd; + nd.root.dentry = dentry; + nd.root.mnt = mnt; + err = filename_lookup(AT_FDCWD, filename, + flags | LOOKUP_ROOT, &nd); + if (!err) + *path = nd.path; + putname(filename); + } return err; } EXPORT_SYMBOL(vfs_path_lookup); @@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd) * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. Also note that by using this function the - * nameidata argument is passed to the filesystem methods and a filesystem - * using this helper needs to be prepared for that. + * not be called by generic code. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { @@ -2341,7 +2338,8 @@ out: * Returns 0 and "path" will be valid on success; Returns error otherwise. */ static int -path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) +path_mountpoint(int dfd, const struct filename *name, struct path *path, + unsigned int flags) { struct nameidata nd; int err; @@ -2370,20 +2368,20 @@ out: } static int -filename_mountpoint(int dfd, struct filename *s, struct path *path, +filename_mountpoint(int dfd, struct filename *name, struct path *path, unsigned int flags) { int error; - if (IS_ERR(s)) - return PTR_ERR(s); - error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + if (IS_ERR(name)) + return PTR_ERR(name); + error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU); if (unlikely(error == -ECHILD)) - error = path_mountpoint(dfd, s->name, path, flags); + error = path_mountpoint(dfd, name, path, flags); if (unlikely(error == -ESTALE)) - error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); + error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL); if (likely(!error)) - audit_inode(s, path->dentry, 0); - putname(s); + audit_inode(name, path->dentry, 0); + putname(name); return error; } @@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname, static const struct qstr name = QSTR_INIT("/", 1); struct dentry *dentry, *child; struct inode *dir; - int error = path_lookupat(dfd, pathname->name, + int error = path_lookupat(dfd, pathname, flags | LOOKUP_DIRECTORY, nd); if (unlikely(error)) return error; @@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, goto out; } - error = path_init(dfd, pathname->name, flags, nd); + error = path_init(dfd, pathname, flags, nd); if (unlikely(error)) goto out; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) * request from the inode / page_private pointer and * release it */ nfs_inode_remove_request(req); - /* - * In case nfs_inode_remove_request has marked the - * page as being dirty - */ - cancel_dirty_page(page, PAGE_CACHE_SIZE); nfs_unlock_and_release_request(req); } diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 36ae529511c4..2ff263e6d363 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o -ccflags-y := -DNTFS_VERSION=\"2.1.31\" +ccflags-y := -DNTFS_VERSION=\"2.1.32\" ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index f16f2d8401fe..c1da78dad1af 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -328,62 +328,168 @@ err_out: return err; } -/** - * ntfs_fault_in_pages_readable - - * - * Fault a number of userspace pages into pagetables. - * - * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes - * with more than two userspace pages as well as handling the single page case - * elegantly. - * - * If you find this difficult to understand, then think of the while loop being - * the following code, except that we do without the integer variable ret: - * - * do { - * ret = __get_user(c, uaddr); - * uaddr += PAGE_SIZE; - * } while (!ret && uaddr < end); - * - * Note, the final __get_user() may well run out-of-bounds of the user buffer, - * but _not_ out-of-bounds of the page the user buffer belongs to, and since - * this is only a read and not a write, and since it is still in the same page, - * it should not matter and this makes the code much simpler. - */ -static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, - int bytes) +static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, + size_t *count) { - const char __user *end; - volatile char c; - - /* Set @end to the first byte outside the last page we care about. */ - end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); - - while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) - ; -} - -/** - * ntfs_fault_in_pages_readable_iovec - - * - * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. - */ -static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, - size_t iov_ofs, int bytes) -{ - do { - const char __user *buf; - unsigned len; + loff_t pos; + s64 end, ll; + ssize_t err; + unsigned long flags; + struct inode *vi = file_inode(file); + ntfs_inode *base_ni, *ni = NTFS_I(vi); + ntfs_volume *vol = ni->vol; - buf = iov->iov_base + iov_ofs; - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - ntfs_fault_in_pages_readable(buf, len); - bytes -= len; - iov++; - iov_ofs = 0; - } while (bytes); + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " + "0x%llx, count 0x%lx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)*ppos, (unsigned long)*count); + /* We can write back this queue in page reclaim. */ + current->backing_dev_info = inode_to_bdi(vi); + err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode)); + if (unlikely(err)) + goto out; + /* + * All checks have passed. Before we start doing any writing we want + * to abort any totally illegal writes. + */ + BUG_ON(NInoMstProtected(ni)); + BUG_ON(ni->type != AT_DATA); + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + /* Only $DATA attributes can be encrypted. */ + /* + * Reminder for later: Encrypted files are _always_ + * non-resident so that the content can always be encrypted. + */ + ntfs_debug("Denying write access to encrypted file."); + err = -EACCES; + goto out; + } + if (NInoCompressed(ni)) { + /* Only unnamed $DATA attribute can be compressed. */ + BUG_ON(ni->name_len); + /* + * Reminder for later: If resident, the data is not actually + * compressed. Only on the switch to non-resident does + * compression kick in. This is in contrast to encrypted files + * (see above). + */ + ntfs_error(vi->i_sb, "Writing to compressed files is not " + "implemented yet. Sorry."); + err = -EOPNOTSUPP; + goto out; + } + if (*count == 0) + goto out; + base_ni = ni; + if (NInoAttr(ni)) + base_ni = ni->ext.base_ntfs_ino; + err = file_remove_suid(file); + if (unlikely(err)) + goto out; + /* + * Our ->update_time method always succeeds thus file_update_time() + * cannot fail either so there is no need to check the return code. + */ + file_update_time(file); + pos = *ppos; + /* The first byte after the last cluster being written to. */ + end = (pos + *count + vol->cluster_size_mask) & + ~(u64)vol->cluster_size_mask; + /* + * If the write goes beyond the allocated size, extend the allocation + * to cover the whole of the write, rounded up to the nearest cluster. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (end > ll) { + /* + * Extend the allocation without changing the data size. + * + * Note we ensure the allocation is big enough to at least + * write some data but we do not require the allocation to be + * complete, i.e. it may be partial. + */ + ll = ntfs_attr_extend_allocation(ni, end, -1, pos); + if (likely(ll >= 0)) { + BUG_ON(pos >= ll); + /* If the extension was partial truncate the write. */ + if (end > ll) { + ntfs_debug("Truncating write to inode 0x%lx, " + "attribute type 0x%x, because " + "the allocation was only " + "partially extended.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + *count = ll - pos; + } + } else { + err = ll; + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* Perform a partial write if possible or fail. */ + if (pos < ll) { + ntfs_debug("Truncating write to inode 0x%lx " + "attribute type 0x%x, because " + "extending the allocation " + "failed (error %d).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), + (int)-err); + *count = ll - pos; + } else { + if (err != -ENOSPC) + ntfs_error(vi->i_sb, "Cannot perform " + "write to inode " + "0x%lx, attribute " + "type 0x%x, because " + "extending the " + "allocation failed " + "(error %ld).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), + (long)-err); + else + ntfs_debug("Cannot perform write to " + "inode 0x%lx, " + "attribute type 0x%x, " + "because there is not " + "space left.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + goto out; + } + } + } + /* + * If the write starts beyond the initialized size, extend it up to the + * beginning of the write and initialize all non-sparse space between + * the old initialized size and the new one. This automatically also + * increments the vfs inode->i_size to keep it above or equal to the + * initialized_size. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (pos > ll) { + /* + * Wait for ongoing direct i/o to complete before proceeding. + * New direct i/o cannot start as we hold i_mutex. + */ + inode_dio_wait(vi); + err = ntfs_attr_extend_initialized(ni, pos); + if (unlikely(err < 0)) + ntfs_error(vi->i_sb, "Cannot perform write to inode " + "0x%lx, attribute type 0x%x, because " + "extending the initialized size " + "failed (error %d).", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (int)-err); + } +out: + return err; } /** @@ -420,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, goto err_out; } } - err = add_to_page_cache_lru(*cached_page, mapping, index, - GFP_KERNEL); + err = add_to_page_cache_lru(*cached_page, mapping, + index, GFP_KERNEL); if (unlikely(err)) { if (err == -EEXIST) continue; @@ -1267,180 +1373,6 @@ rl_not_mapped_enoent: return err; } -/* - * Copy as much as we can into the pages and return the number of bytes which - * were successfully copied. If a fault is encountered then clear the pages - * out to (ofs + bytes) and return the number of bytes which were copied. - */ -static inline size_t ntfs_copy_from_user(struct page **pages, - unsigned nr_pages, unsigned ofs, const char __user *buf, - size_t bytes) -{ - struct page **last_page = pages + nr_pages; - char *addr; - size_t total = 0; - unsigned len; - int left; - - do { - len = PAGE_CACHE_SIZE - ofs; - if (len > bytes) - len = bytes; - addr = kmap_atomic(*pages); - left = __copy_from_user_inatomic(addr + ofs, buf, len); - kunmap_atomic(addr); - if (unlikely(left)) { - /* Do it the slow way. */ - addr = kmap(*pages); - left = __copy_from_user(addr + ofs, buf, len); - kunmap(*pages); - if (unlikely(left)) - goto err_out; - } - total += len; - bytes -= len; - if (!bytes) - break; - buf += len; - ofs = 0; - } while (++pages < last_page); -out: - return total; -err_out: - total += len - left; - /* Zero the rest of the target like __copy_from_user(). */ - while (++pages < last_page) { - bytes -= len; - if (!bytes) - break; - len = PAGE_CACHE_SIZE; - if (len > bytes) - len = bytes; - zero_user(*pages, 0, len); - } - goto out; -} - -static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr, - const struct iovec *iov, size_t iov_ofs, size_t bytes) -{ - size_t total = 0; - - while (1) { - const char __user *buf = iov->iov_base + iov_ofs; - unsigned len; - size_t left; - - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - left = __copy_from_user_inatomic(vaddr, buf, len); - total += len; - bytes -= len; - vaddr += len; - if (unlikely(left)) { - total -= left; - break; - } - if (!bytes) - break; - iov++; - iov_ofs = 0; - } - return total; -} - -static inline void ntfs_set_next_iovec(const struct iovec **iovp, - size_t *iov_ofsp, size_t bytes) -{ - const struct iovec *iov = *iovp; - size_t iov_ofs = *iov_ofsp; - - while (bytes) { - unsigned len; - - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - bytes -= len; - iov_ofs += len; - if (iov->iov_len == iov_ofs) { - iov++; - iov_ofs = 0; - } - } - *iovp = iov; - *iov_ofsp = iov_ofs; -} - -/* - * This has the same side-effects and return value as ntfs_copy_from_user(). - * The difference is that on a fault we need to memset the remainder of the - * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s - * single-segment behaviour. - * - * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when - * atomic and when not atomic. This is ok because it calls - * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In - * fact, the only difference between __copy_from_user_inatomic() and - * __copy_from_user() is that the latter calls might_sleep() and the former - * should not zero the tail of the buffer on error. And on many architectures - * __copy_from_user_inatomic() is just defined to __copy_from_user() so it - * makes no difference at all on those architectures. - */ -static inline size_t ntfs_copy_from_user_iovec(struct page **pages, - unsigned nr_pages, unsigned ofs, const struct iovec **iov, - size_t *iov_ofs, size_t bytes) -{ - struct page **last_page = pages + nr_pages; - char *addr; - size_t copied, len, total = 0; - - do { - len = PAGE_CACHE_SIZE - ofs; - if (len > bytes) - len = bytes; - addr = kmap_atomic(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, - *iov, *iov_ofs, len); - kunmap_atomic(addr); - if (unlikely(copied != len)) { - /* Do it the slow way. */ - addr = kmap(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + - ofs, *iov, *iov_ofs, len); - if (unlikely(copied != len)) - goto err_out; - kunmap(*pages); - } - total += len; - ntfs_set_next_iovec(iov, iov_ofs, len); - bytes -= len; - if (!bytes) - break; - ofs = 0; - } while (++pages < last_page); -out: - return total; -err_out: - BUG_ON(copied > len); - /* Zero the rest of the target like __copy_from_user(). */ - memset(addr + ofs + copied, 0, len - copied); - kunmap(*pages); - total += copied; - ntfs_set_next_iovec(iov, iov_ofs, copied); - while (++pages < last_page) { - bytes -= len; - if (!bytes) - break; - len = PAGE_CACHE_SIZE; - if (len > bytes) - len = bytes; - zero_user(*pages, 0, len); - } - goto out; -} - static inline void ntfs_flush_dcache_pages(struct page **pages, unsigned nr_pages) { @@ -1761,86 +1693,83 @@ err_out: return err; } -static void ntfs_write_failed(struct address_space *mapping, loff_t to) +/* + * Copy as much as we can into the pages and return the number of bytes which + * were successfully copied. If a fault is encountered then clear the pages + * out to (ofs + bytes) and return the number of bytes which were copied. + */ +static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, + unsigned ofs, struct iov_iter *i, size_t bytes) { - struct inode *inode = mapping->host; + struct page **last_page = pages + nr_pages; + size_t total = 0; + struct iov_iter data = *i; + unsigned len, copied; - if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); - ntfs_truncate_vfs(inode); - } + do { + len = PAGE_CACHE_SIZE - ofs; + if (len > bytes) + len = bytes; + copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, + len); + total += copied; + bytes -= copied; + if (!bytes) + break; + iov_iter_advance(&data, copied); + if (copied < len) + goto err; + ofs = 0; + } while (++pages < last_page); +out: + return total; +err: + /* Zero the rest of the target like __copy_from_user(). */ + len = PAGE_CACHE_SIZE - copied; + do { + if (len > bytes) + len = bytes; + zero_user(*pages, copied, len); + bytes -= len; + copied = 0; + len = PAGE_CACHE_SIZE; + } while (++pages < last_page); + goto out; } /** - * ntfs_file_buffered_write - - * - * Locking: The vfs is holding ->i_mutex on the inode. + * ntfs_perform_write - perform buffered write to a file + * @file: file to write to + * @i: iov_iter with data to write + * @pos: byte offset in file at which to begin writing to */ -static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, - loff_t pos, loff_t *ppos, size_t count) +static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, + loff_t pos) { - struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *vi = mapping->host; ntfs_inode *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; struct page *cached_page = NULL; - char __user *buf = NULL; - s64 end, ll; VCN last_vcn; LCN lcn; - unsigned long flags; - size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ - ssize_t status, written; + size_t bytes; + ssize_t status, written = 0; unsigned nr_pages; - int err; - ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " - "pos 0x%llx, count 0x%lx.", - vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)pos, (unsigned long)count); - if (unlikely(!count)) - return 0; - BUG_ON(NInoMstProtected(ni)); - /* - * If the attribute is not an index root and it is encrypted or - * compressed, we cannot write to it yet. Note we need to check for - * AT_INDEX_ALLOCATION since this is the type of both directory and - * index inodes. - */ - if (ni->type != AT_INDEX_ALLOCATION) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - /* - * Reminder for later: Encrypted files are _always_ - * non-resident so that the content can always be - * encrypted. - */ - ntfs_debug("Denying write access to encrypted file."); - return -EACCES; - } - if (NInoCompressed(ni)) { - /* Only unnamed $DATA attribute can be compressed. */ - BUG_ON(ni->type != AT_DATA); - BUG_ON(ni->name_len); - /* - * Reminder for later: If resident, the data is not - * actually compressed. Only on the switch to non- - * resident does compression kick in. This is in - * contrast to encrypted files (see above). - */ - ntfs_error(vi->i_sb, "Writing to compressed files is " - "not implemented yet. Sorry."); - return -EOPNOTSUPP; - } - } + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " + "0x%llx, count 0x%lx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)pos, + (unsigned long)iov_iter_count(i)); /* * If a previous ntfs_truncate() failed, repeat it and abort if it * fails again. */ if (unlikely(NInoTruncateFailed(ni))) { + int err; + inode_dio_wait(vi); err = ntfs_truncate(vi); if (err || NInoTruncateFailed(ni)) { @@ -1854,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, return err; } } - /* The first byte after the write. */ - end = pos + count; - /* - * If the write goes beyond the allocated size, extend the allocation - * to cover the whole of the write, rounded up to the nearest cluster. - */ - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->allocated_size; - read_unlock_irqrestore(&ni->size_lock, flags); - if (end > ll) { - /* Extend the allocation without changing the data size. */ - ll = ntfs_attr_extend_allocation(ni, end, -1, pos); - if (likely(ll >= 0)) { - BUG_ON(pos >= ll); - /* If the extension was partial truncate the write. */ - if (end > ll) { - ntfs_debug("Truncating write to inode 0x%lx, " - "attribute type 0x%x, because " - "the allocation was only " - "partially extended.", - vi->i_ino, (unsigned) - le32_to_cpu(ni->type)); - end = ll; - count = ll - pos; - } - } else { - err = ll; - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->allocated_size; - read_unlock_irqrestore(&ni->size_lock, flags); - /* Perform a partial write if possible or fail. */ - if (pos < ll) { - ntfs_debug("Truncating write to inode 0x%lx, " - "attribute type 0x%x, because " - "extending the allocation " - "failed (error code %i).", - vi->i_ino, (unsigned) - le32_to_cpu(ni->type), err); - end = ll; - count = ll - pos; - } else { - ntfs_error(vol->sb, "Cannot perform write to " - "inode 0x%lx, attribute type " - "0x%x, because extending the " - "allocation failed (error " - "code %i).", vi->i_ino, - (unsigned) - le32_to_cpu(ni->type), err); - return err; - } - } - } - written = 0; - /* - * If the write starts beyond the initialized size, extend it up to the - * beginning of the write and initialize all non-sparse space between - * the old initialized size and the new one. This automatically also - * increments the vfs inode->i_size to keep it above or equal to the - * initialized_size. - */ - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->initialized_size; - read_unlock_irqrestore(&ni->size_lock, flags); - if (pos > ll) { - err = ntfs_attr_extend_initialized(ni, pos); - if (err < 0) { - ntfs_error(vol->sb, "Cannot perform write to inode " - "0x%lx, attribute type 0x%x, because " - "extending the initialized size " - "failed (error code %i).", vi->i_ino, - (unsigned)le32_to_cpu(ni->type), err); - status = err; - goto err_out; - } - } /* * Determine the number of pages per cluster for non-resident * attributes. @@ -1936,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, nr_pages = 1; if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; - /* Finally, perform the actual write. */ last_vcn = -1; - if (likely(nr_segs == 1)) - buf = iov->iov_base; do { VCN vcn; pgoff_t idx, start_idx; @@ -1964,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, vol->cluster_size_bits, false); up_read(&ni->runlist.lock); if (unlikely(lcn < LCN_HOLE)) { - status = -EIO; if (lcn == LCN_ENOMEM) status = -ENOMEM; - else + else { + status = -EIO; ntfs_error(vol->sb, "Cannot " "perform write to " "inode 0x%lx, " @@ -1976,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, "is corrupt.", vi->i_ino, (unsigned) le32_to_cpu(ni->type)); + } break; } if (lcn == LCN_HOLE) { @@ -1988,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, } } } - if (bytes > count) - bytes = count; + if (bytes > iov_iter_count(i)) + bytes = iov_iter_count(i); +again: /* * Bring in the user page(s) that we will copy from _first_. * Otherwise there is a nasty deadlock on copying from the same @@ -1998,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, * pages being swapped out between us bringing them into memory * and doing the actual copying. */ - if (likely(nr_segs == 1)) - ntfs_fault_in_pages_readable(buf, bytes); - else - ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); + if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) { + status = -EFAULT; + break; + } /* Get and lock @do_pages starting at index @start_idx. */ status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, pages, &cached_page); @@ -2017,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, status = ntfs_prepare_pages_for_non_resident_write( pages, do_pages, pos, bytes); if (unlikely(status)) { - loff_t i_size; - do { unlock_page(pages[--do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); - /* - * The write preparation may have instantiated - * allocated space outside i_size. Trim this - * off again. We can ignore any errors in this - * case as we will just be waisting a bit of - * allocated space, which is not a disaster. - */ - i_size = i_size_read(vi); - if (pos + bytes > i_size) { - ntfs_write_failed(mapping, pos + bytes); - } break; } } u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; - if (likely(nr_segs == 1)) { - copied = ntfs_copy_from_user(pages + u, do_pages - u, - ofs, buf, bytes); - buf += copied; - } else - copied = ntfs_copy_from_user_iovec(pages + u, - do_pages - u, ofs, &iov, &iov_ofs, - bytes); + copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, + i, bytes); ntfs_flush_dcache_pages(pages + u, do_pages - u); - status = ntfs_commit_pages_after_write(pages, do_pages, pos, - bytes); - if (likely(!status)) { - written += copied; - count -= copied; - pos += copied; - if (unlikely(copied != bytes)) - status = -EFAULT; + status = 0; + if (likely(copied == bytes)) { + status = ntfs_commit_pages_after_write(pages, do_pages, + pos, bytes); + if (!status) + status = bytes; } do { unlock_page(pages[--do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); - if (unlikely(status)) + if (unlikely(status < 0)) break; - balance_dirty_pages_ratelimited(mapping); + copied = status; cond_resched(); - } while (count); -err_out: - *ppos = pos; + if (unlikely(!copied)) { + size_t sc; + + /* + * We failed to copy anything. Fall back to single + * segment length write. + * + * This is needed to avoid possible livelock in the + * case that all segments in the iov cannot be copied + * at once without a pagefault. + */ + sc = iov_iter_single_seg_count(i); + if (bytes > sc) + bytes = sc; + goto again; + } + iov_iter_advance(i, copied); + pos += copied; + written += copied; + balance_dirty_pages_ratelimited(mapping); + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + } while (iov_iter_count(i)); if (cached_page) page_cache_release(cached_page); ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", @@ -2076,59 +1930,56 @@ err_out: } /** - * ntfs_file_aio_write_nolock - + * ntfs_file_write_iter_nolock - write data to a file + * @iocb: IO state structure (file, offset, etc.) + * @from: iov_iter with data to write + * + * Basically the same as __generic_file_write_iter() except that it ends + * up calling ntfs_perform_write() instead of generic_perform_write() and that + * O_DIRECT is not implemented. */ -static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) +static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb, + struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - loff_t pos; - size_t count; /* after file limit checks */ - ssize_t written, err; + loff_t pos = iocb->ki_pos; + ssize_t written = 0; + ssize_t err; + size_t count = iov_iter_count(from); - count = iov_length(iov, nr_segs); - pos = *ppos; - /* We can write back this queue in page reclaim. */ - current->backing_dev_info = inode_to_bdi(inode); - written = 0; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (!count) - goto out; - err = file_remove_suid(file); - if (err) - goto out; - err = file_update_time(file); - if (err) - goto out; - written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, - count); -out: + err = ntfs_prepare_file_for_write(file, &pos, &count); + if (count && !err) { + iov_iter_truncate(from, count); + written = ntfs_perform_write(file, from, pos); + if (likely(written >= 0)) + iocb->ki_pos = pos + written; + } current->backing_dev_info = NULL; return written ? written : err; } /** - * ntfs_file_aio_write - + * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() + * @iocb: IO state structure + * @from: iov_iter with data to write + * + * Basically the same as generic_file_write_iter() except that it ends up + * calling ntfs_file_write_iter_nolock() instead of + * __generic_file_write_iter(). */ -static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *vi = file_inode(file); ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - - mutex_lock(&inode->i_mutex); - ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); + mutex_lock(&vi->i_mutex); + ret = ntfs_file_write_iter_nolock(iocb, from); + mutex_unlock(&vi->i_mutex); if (ret > 0) { - int err = generic_write_sync(file, iocb->ki_pos - ret, ret); + ssize_t err; + + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } @@ -2196,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, #endif /* NTFS_RW */ const struct file_operations ntfs_file_ops = { - .llseek = generic_file_llseek, /* Seek inside file. */ - .read = new_sync_read, /* Read from file. */ - .read_iter = generic_file_read_iter, /* Async read from file. */ + .llseek = generic_file_llseek, + .read = new_sync_read, + .read_iter = generic_file_read_iter, #ifdef NTFS_RW - .write = do_sync_write, /* Write to file. */ - .aio_write = ntfs_file_aio_write, /* Async write to file. */ - /*.release = ,*/ /* Last file is closed. See - fs/ext2/file.c:: - ext2_release_file() for - how to use this to discard - preallocated space for - write opened files. */ - .fsync = ntfs_file_fsync, /* Sync a file to disk. */ - /*.aio_fsync = ,*/ /* Sync all outstanding async - i/o operations on a - kiocb. */ + .write = new_sync_write, + .write_iter = ntfs_file_write_iter, + .fsync = ntfs_file_fsync, #endif /* NTFS_RW */ - /*.ioctl = ,*/ /* Perform function on the - mounted filesystem. */ - .mmap = generic_file_mmap, /* Mmap file. */ - .open = ntfs_file_open, /* Open file. */ - .splice_read = generic_file_splice_read /* Zero-copy data send with - the data source being on - the ntfs partition. We do - not need to care about the - data destination. */ - /*.sendpage = ,*/ /* Zero-copy data send with - the data destination being - on the ntfs partition. We - do not need to care about - the data source. */ + .mmap = generic_file_mmap, + .open = ntfs_file_open, + .splice_read = generic_file_splice_read, }; const struct inode_operations ntfs_file_inode_ops = { diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 044158bd22be..2d7f76e52c37 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, ret = ocfs2_get_right_path(et, left_path, &right_path); if (ret) { mlog_errno(ret); - goto out; + return ret; } right_el = path_leaf_el(right_path); @@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, subtree_index); } out: - if (right_path) - ocfs2_free_path(right_path); + ocfs2_free_path(right_path); return ret; } @@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, ret = ocfs2_get_left_path(et, right_path, &left_path); if (ret) { mlog_errno(ret); - goto out; + return ret; } left_el = path_leaf_el(left_path); @@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, right_path, subtree_index); } out: - if (left_path) - ocfs2_free_path(left_path); + ocfs2_free_path(left_path); return ret; } @@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, } else if (path->p_tree_depth > 0) { status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); if (status) - goto out; + goto exit; if (left_cpos != 0) { left_path = ocfs2_new_path_from_path(path); if (!left_path) - goto out; + goto exit; status = ocfs2_find_path(et->et_ci, left_path, left_cpos); if (status) - goto out; + goto free_left_path; new_el = path_leaf_el(left_path); @@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, le16_to_cpu(new_el->l_next_free_rec), le16_to_cpu(new_el->l_count)); status = -EINVAL; - goto out; + goto free_left_path; } rec = &new_el->l_recs[ le16_to_cpu(new_el->l_next_free_rec) - 1]; @@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, path->p_tree_depth > 0) { status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); if (status) - goto out; + goto free_left_path; if (right_cpos == 0) - goto out; + goto free_left_path; right_path = ocfs2_new_path_from_path(path); if (!right_path) - goto out; + goto free_left_path; status = ocfs2_find_path(et->et_ci, right_path, right_cpos); if (status) - goto out; + goto free_right_path; new_el = path_leaf_el(right_path); rec = &new_el->l_recs[0]; @@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, (unsigned long long)le64_to_cpu(eb->h_blkno), le16_to_cpu(new_el->l_next_free_rec)); status = -EINVAL; - goto out; + goto free_right_path; } rec = &new_el->l_recs[1]; } @@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, ret = contig_type; } -out: - if (left_path) - ocfs2_free_path(left_path); - if (right_path) - ocfs2_free_path(right_path); - +free_right_path: + ocfs2_free_path(right_path); +free_left_path: + ocfs2_free_path(left_path); +exit: return ret; } @@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, if (pages == NULL) { ret = -ENOMEM; mlog_errno(ret); - goto out; + return ret; } ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { mlog_errno(ret); - goto out; + goto free_pages; } } @@ -6996,9 +6993,8 @@ out_commit: out: if (data_ac) ocfs2_free_alloc_context(data_ac); - if (pages) - kfree(pages); - +free_pages: + kfree(pages); return ret; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e1bf18c5d25e..8d2bc840c288 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -664,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb, return 0; } +static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, + struct inode *inode, loff_t offset, + u64 zero_len, int cluster_align) +{ + u32 p_cpos = 0; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + int ret = 0; + + if (offset <= i_size_read(inode) || cluster_align) + return 0; + + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, + &ext_flags); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + u64 s = i_size_read(inode); + sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + (do_div(s, osb->s_clustersize) >> 9); + + ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, + zero_len >> 9, GFP_NOFS, false); + if (ret < 0) + mlog_errno(ret); + } + + return ret; +} + +static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, + struct inode *inode, loff_t offset) +{ + u64 zero_start, zero_len, total_zero_len; + u32 p_cpos = 0, clusters_to_add; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + u32 size_div, offset_div; + int ret = 0; + + { + u64 o = offset; + u64 s = i_size_read(inode); + + offset_div = do_div(o, osb->s_clustersize); + size_div = do_div(s, osb->s_clustersize); + } + + if (offset <= i_size_read(inode)) + return 0; + + clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - + ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); + total_zero_len = offset - i_size_read(inode); + if (clusters_to_add) + total_zero_len -= offset_div; + + /* Allocate clusters to fill out holes, and this is only needed + * when we add more than one clusters. Otherwise the cluster will + * be allocated during direct IO */ + if (clusters_to_add > 1) { + ret = ocfs2_extend_allocation(inode, + OCFS2_I(inode)->ip_clusters, + clusters_to_add - 1, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + while (total_zero_len) { + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, + &ext_flags); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + + size_div; + zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - + size_div; + zero_len = min(total_zero_len, zero_len); + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + ret = blkdev_issue_zeroout(osb->sb->s_bdev, + zero_start >> 9, zero_len >> 9, + GFP_NOFS, false); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + total_zero_len -= zero_len; + v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); + + /* Only at first iteration can be cluster not aligned. + * So set size_div to 0 for the rest */ + size_div = 0; + } + +out: + return ret; +} + static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) @@ -678,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, struct buffer_head *di_bh = NULL; size_t count = iter->count; journal_t *journal = osb->journal->j_journal; - u32 zero_len; - int cluster_align; + u64 zero_len_head, zero_len_tail; + int cluster_align_head, cluster_align_tail; loff_t final_size = offset + count; int append_write = offset >= i_size_read(inode) ? 1 : 0; unsigned int num_clusters = 0; @@ -687,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, { u64 o = offset; + u64 s = i_size_read(inode); + + zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); + cluster_align_head = !zero_len_head; - zero_len = do_div(o, 1 << osb->s_clustersize_bits); - cluster_align = !zero_len; + zero_len_tail = osb->s_clustersize - + do_div(s, osb->s_clustersize); + if ((offset - i_size_read(inode)) < zero_len_tail) + zero_len_tail = offset - i_size_read(inode); + cluster_align_tail = !zero_len_tail; } /* @@ -707,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, } if (append_write) { - ret = ocfs2_inode_lock(inode, &di_bh, 1); + ret = ocfs2_inode_lock(inode, NULL, 1); if (ret < 0) { mlog_errno(ret); goto clean_orphan; } + /* zeroing out the previously allocated cluster tail + * that but not zeroed */ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - ret = ocfs2_zero_extend(inode, di_bh, offset); + ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, + zero_len_tail, cluster_align_tail); else - ret = ocfs2_extend_no_holes(inode, di_bh, offset, + ret = ocfs2_direct_IO_extend_no_holes(osb, inode, offset); if (ret < 0) { mlog_errno(ret); ocfs2_inode_unlock(inode, 1); - brelse(di_bh); goto clean_orphan; } @@ -729,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (is_overwrite < 0) { mlog_errno(is_overwrite); ocfs2_inode_unlock(inode, 1); - brelse(di_bh); goto clean_orphan; } ocfs2_inode_unlock(inode, 1); - brelse(di_bh); - di_bh = NULL; } written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, @@ -772,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, if (ret < 0) mlog_errno(ret); } - } else if (written < 0 && append_write && !is_overwrite && - !cluster_align) { + } else if (written > 0 && append_write && !is_overwrite && + !cluster_align_head) { + /* zeroing out the allocated cluster head */ u32 p_cpos = 0; u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); + ret = ocfs2_inode_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto clean_orphan; + } + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, &ext_flags); if (ret < 0) { mlog_errno(ret); + ocfs2_inode_unlock(inode, 0); goto clean_orphan; } @@ -788,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, ret = blkdev_issue_zeroout(osb->sb->s_bdev, p_cpos << (osb->s_clustersize_bits - 9), - zero_len >> 9, GFP_KERNEL, false); + zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); + + ocfs2_inode_unlock(inode, 0); } clean_orphan: diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..8e19b9d7aba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void) int ret = -ENOMEM; o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); - if (!o2hb_debug_dir) { + if (IS_ERR_OR_NULL(o2hb_debug_dir)) { + ret = o2hb_debug_dir ? + PTR_ERR(o2hb_debug_dir) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_live_node_bitmap), O2NM_MAX_NODES, o2hb_live_node_bitmap); - if (!o2hb_debug_livenodes) { + if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { + ret = o2hb_debug_livenodes ? + PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS, o2hb_live_region_bitmap); - if (!o2hb_debug_liveregions) { + if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { + ret = o2hb_debug_liveregions ? + PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS, o2hb_quorum_region_bitmap); - if (!o2hb_debug_quorumregions) { + if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { + ret = o2hb_debug_quorumregions ? + PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void) sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS, o2hb_failed_region_bitmap); - if (!o2hb_debug_failedregions) { + if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { + ret = o2hb_debug_failedregions ? + PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) reg->hr_debug_dir = debugfs_create_dir(config_item_name(®->hr_item), dir); - if (!reg->hr_debug_dir) { + if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { + ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) O2HB_DB_TYPE_REGION_LIVENODES, sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES, reg); - if (!reg->hr_debug_livenodes) { + if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { + ret = reg->hr_debug_livenodes ? + PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_regnum)), O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg); - if (!reg->hr_debug_regnum) { + if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { + ret = reg->hr_debug_regnum ? + PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_elapsed_time)), O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg); - if (!reg->hr_debug_elapsed_time) { + if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { + ret = reg->hr_debug_elapsed_time ? + PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; mlog_errno(ret); goto bail; } @@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) sizeof(*(reg->hr_db_pinned)), O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg); - if (!reg->hr_debug_pinned) { + if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { + ret = reg->hr_debug_pinned ? + PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; mlog_errno(ret); goto bail; } - ret = 0; + return 0; bail: + debugfs_remove_recursive(reg->hr_debug_dir); return ret; } diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9e6508..7fdc25a4d8c0 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; } \ } while (0) -#define mlog_errno(st) do { \ +#define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ _st != -EDQUOT) \ mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ -} while (0) + _st; \ +}) #define mlog_bug_on_msg(cond, fmt, args...) do { \ if (cond) { \ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b08050bd3f2e..ccd4dcfc3645 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -18,7 +18,7 @@ * * linux/fs/minix/dir.c * - * Copyright (C) 1991, 1992 Linux Torvalds + * Copyright (C) 1991, 1992 Linus Torvalds * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir, const char *name, int namelen) { - int ret; + int ret = 0; struct ocfs2_dir_lookup_result lookup = { NULL, }; trace_ocfs2_check_dir_for_entry( (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); - ret = -EEXIST; - if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) - goto bail; + if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { + ret = -EEXIST; + mlog_errno(ret); + } - ret = 0; -bail: ocfs2_free_dir_lookup_result(&lookup); - if (ret) - mlog_errno(ret); return ret; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 11849a44dc5a..956edf67be20 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, int noqueue_attempted = 0; int dlm_locked = 0; + if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { + mlog_errno(-EINVAL); + return -EINVAL; + } + ocfs2_init_mask_waiter(&mw); if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) @@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops); - if (!dlm_debug->d_locking_state) { + if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { ret = -EINVAL; mlog(ML_ERROR, "Unable to create locking state debugfs file.\n"); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 29651167190d..540dc4bdd042 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, } status = ocfs2_test_inode_bit(osb, blkno, &set); - trace_ocfs2_get_dentry_test_bit(status, set); if (status < 0) { if (status == -EINVAL) { /* @@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, goto unlock_nfs_sync; } + trace_ocfs2_get_dentry_test_bit(status, set); /* If the inode allocator bit is clear, this inode must be stale */ if (!set) { status = -ESTALE; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 266845de2100..91f03ce98108 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2392,7 +2392,6 @@ relock: /* * for completing the rest of the request. */ - *ppos += written; count -= written; written_buffered = generic_perform_write(file, from, *ppos); /* @@ -2407,7 +2406,6 @@ relock: goto out_dio; } - iocb->ki_pos = *ppos + written_buffered; /* We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. @@ -2416,6 +2414,7 @@ relock: ret = filemap_write_and_wait_range(file->f_mapping, *ppos, endbyte); if (ret == 0) { + iocb->ki_pos = *ppos + written_buffered; written += written_buffered; invalidate_mapping_pages(mapping, *ppos >> PAGE_CACHE_SHIFT, @@ -2438,10 +2437,14 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if (unlikely(written <= 0)) + goto no_sync; + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); if (ret < 0) written = ret; @@ -2452,10 +2455,12 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } +no_sync: /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3025c0da6b8a..be71ca0937f7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode, ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(di->i_suballoc_slot)); if (!inode_alloc_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto bail; } @@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode, ORPHAN_DIR_SYSTEM_INODE, orphaned_slot); if (!orphan_dir_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 044013455621..857bbbcd39f3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { ocfs2_error(osb->sb, "local alloc inode %llu says it has " - "%u free bits, but a count shows %u", + "%u used bits, but a count shows %u", (unsigned long long)le64_to_cpu(alloc->i_blkno), le32_to_cpu(alloc->id1.bitmap1.i_used), ocfs2_local_alloc_count_bits(alloc)); @@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, u32 *numbits, struct ocfs2_alloc_reservation *resv) { - int numfound, bitoff, left, startoff, lastzero; + int numfound = 0, bitoff, left, startoff, lastzero; int local_resv = 0; struct ocfs2_alloc_reservation r; void *bitmap = NULL; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b5c3a5ea3ee6..09f90cbf0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, trace_ocfs2_orphan_del( (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, - name, namelen); + name, strlen(name)); /* find it's spot in the orphan directory */ - status = ocfs2_find_entry(name, namelen, orphan_dir_inode, + status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, &lookup); if (status) { mlog_errno(status); @@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, ORPHAN_DIR_SYSTEM_INODE, osb->slot_num); if (!orphan_dir_inode) { - status = -EEXIST; + status = -ENOENT; mlog_errno(status); goto leave; } diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee541f92dab4..df3a500789c7 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) { mlog_errno(error); - goto out; + return error; } error = ocfs2_create_inode_in_orphan(dir, mode, diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d5493e361a38..e78a203d44c8 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) if (!si) { status = -ENOMEM; mlog_errno(status); - goto bail; + return status; } si->si_extended = ocfs2_uses_extended_slot_map(osb); @@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) osb->slot_info = (struct ocfs2_slot_info *)si; bail: - if (status < 0 && si) + if (status < 0) __ocfs2_free_slot_info(si); return status; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 1724d43d3da1..220cae7bbdbc 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -295,7 +295,7 @@ static int o2cb_cluster_check(void) set_bit(node_num, netmap); if (!memcmp(hbmap, netmap, sizeof(hbmap))) return 0; - if (i < O2CB_MAP_STABILIZE_COUNT) + if (i < O2CB_MAP_STABILIZE_COUNT - 1) msleep(1000); } diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 720aa389e0ea..2768eb1da2b8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) BUG_ON(conn == NULL); lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); - if (!lc) { - rc = -ENOMEM; - goto out; - } + if (!lc) + return -ENOMEM; init_waitqueue_head(&lc->oc_wait); init_completion(&lc->oc_sync_wait); @@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) } out: - if (rc && lc) + if (rc) kfree(lc); return rc; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 0cb889a17ae1..4479029630bb 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); + ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, + start_bit, count); goto bail; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26675185b886..837ddce4b659 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); - if (!osb->osb_debug_root) { + if (IS_ERR_OR_NULL(osb->osb_debug_root)) { status = -EINVAL; mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); goto read_super_error; @@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (!osb->osb_ctxt) { + if (IS_ERR_OR_NULL(osb->osb_ctxt)) { status = -EINVAL; mlog_errno(status); goto read_super_error; @@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void) } ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); - if (!ocfs2_debugfs_root) { - status = -ENOMEM; + if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { + status = ocfs2_debugfs_root ? + PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); goto out4; } @@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb, cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); + memcpy(sb->s_uuid, di->id2.i_super.s_uuid, + sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; @@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto bail; } - cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); + cleancache_init_shared_fs(sb); bail: return status; @@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb) ocfs2_set_ro_flag(osb, 0); } -static char error_buf[1024]; - -void __ocfs2_error(struct super_block *sb, - const char *function, - const char *fmt, ...) +void __ocfs2_error(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); - va_end(args); + vaf.fmt = fmt; + vaf.va = &args; /* Not using mlog here because we want to show the actual * function the error came from. */ - printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", - sb->s_id, function, error_buf); + printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + + va_end(args); ocfs2_handle_error(sb); } @@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb, /* Handle critical errors. This is intentionally more drastic than * ocfs2_handle_error, so we only use for things like journal errors, * etc. */ -void __ocfs2_abort(struct super_block* sb, - const char *function, +void __ocfs2_abort(struct super_block *sb, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); - va_end(args); - printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", - sb->s_id, function, error_buf); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + + va_end(args); /* We don't have the cluster support yet to go straight to * hard readonly in here. Until then, we want to keep diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 85b190dc132f..4ca7533be479 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode, i, &block_off, &name_offset); + if (ret) { + mlog_errno(ret); + goto cleanup; + } xs->base = bucket_block(xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { @@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, &xv, NULL); + if (ret) { + mlog_errno(ret); + break; + } ret = ocfs2_lock_xattr_remove_allocators(inode, xv, args->ref_ci, diff --git a/fs/open.c b/fs/open.c index 33f9cbf2610b..6a83c47d5904 100644 --- a/fs/open.c +++ b/fs/open.c @@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); +retry_deleg: newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { if (!uid_valid(uid)) @@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; -retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) @@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, return ERR_PTR(err); if (flags & O_CREAT) return ERR_PTR(-EINVAL); - if (!filename && (flags & O_DIRECTORY)) - if (!dentry->d_inode->i_op->lookup) - return ERR_PTR(-ENOTDIR); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 39d1373128e9..44a549beeafa 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev) mem_address = pdata->mem_address; record_size = pdata->record_size; dump_oops = pdata->dump_oops; + ramoops_console_size = pdata->console_size; + ramoops_pmsg_size = pdata->pmsg_size; + ramoops_ftrace_size = pdata->ftrace_size; pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", cxt->size, (unsigned long long)cxt->phys_addr, diff --git a/fs/read_write.c b/fs/read_write.c index 99a6ef946d01..69128b378646 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -695,25 +695,23 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } EXPORT_SYMBOL(iov_shorten); -static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, - unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) +static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, iter_fn_t fn) { struct kiocb kiocb; - struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - iov_iter_init(&iter, rw, iov, nr_segs, len); - ret = fn(&kiocb, &iter); + ret = fn(&kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } -static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) +static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, iov_fn_t fn) { struct kiocb kiocb; ssize_t ret; @@ -721,30 +719,23 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); + ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } /* Do it by hand, with file-ops */ -static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, io_fn_t fn) +static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, io_fn_t fn) { - struct iovec *vector = iov; ssize_t ret = 0; - while (nr_segs > 0) { - void __user *base; - size_t len; + while (iov_iter_count(iter)) { + struct iovec iovec = iov_iter_iovec(iter); ssize_t nr; - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(filp, base, len, ppos); + nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); if (nr < 0) { if (!ret) @@ -752,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, break; } ret += nr; - if (nr != len) + if (nr != iovec.iov_len) break; + iov_iter_advance(iter, nr); } return ret; @@ -844,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file, size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; + struct iov_iter iter; ssize_t ret; io_fn_t fn; iov_fn_t fnv; iter_fn_t iter_fn; - ret = rw_copy_check_uvector(type, uvector, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); - if (ret <= 0) - goto out; + ret = import_iovec(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; - tot_len = ret; + tot_len = iov_iter_count(&iter); + if (!tot_len) + goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -872,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file, } if (iter_fn) - ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, - pos, iter_fn); + ret = do_iter_readv_writev(file, &iter, pos, iter_fn); else if (fnv) - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, - pos, fnv); + ret = do_sync_readv_writev(file, &iter, pos, fnv); else - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + ret = do_loop_readv_writev(file, &iter, pos, fn); if (type != READ) file_end_write(file); out: - if (iov != iovstack) - kfree(iov); + kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); @@ -1024,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; + struct iov_iter iter; ssize_t ret; io_fn_t fn; iov_fn_t fnv; iter_fn_t iter_fn; - ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, - UIO_FASTIOV, iovstack, &iov); - if (ret <= 0) - goto out; + ret = compat_import_iovec(type, uvector, nr_segs, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + return ret; - tot_len = ret; + tot_len = iov_iter_count(&iter); + if (!tot_len) + goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -1052,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, } if (iter_fn) - ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, - pos, iter_fn); + ret = do_iter_readv_writev(file, &iter, pos, iter_fn); else if (fnv) - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, - pos, fnv); + ret = do_sync_readv_writev(file, &iter, pos, fnv); else - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + ret = do_loop_readv_writev(file, &iter, pos, fn); if (type != READ) file_end_write(file); out: - if (iov != iovstack) - kfree(iov); + kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); diff --git a/fs/splice.c b/fs/splice.c index 4bbfa95b5bfe..41cbb16299e0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1533,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - ssize_t count; pipe = get_pipe_info(file); if (!pipe) return -EBADF; - ret = rw_copy_check_uvector(READ, uiov, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); - if (ret <= 0) - goto out; - - count = ret; - iov_iter_init(&iter, READ, iov, nr_segs, count); + ret = import_iovec(READ, uiov, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; + sd.total_len = iov_iter_count(&iter); sd.len = 0; - sd.total_len = count; sd.flags = flags; sd.u.data = &iter; sd.pos = 0; - pipe_lock(pipe); - ret = __splice_from_pipe(pipe, &sd, pipe_to_user); - pipe_unlock(pipe); - -out: - if (iov != iovstack) - kfree(iov); + if (sd.total_len) { + pipe_lock(pipe); + ret = __splice_from_pipe(pipe, &sd, pipe_to_user); + pipe_unlock(pipe); + } + kfree(iov); return ret; } diff --git a/fs/stat.c b/fs/stat.c index ae0c3cef9927..19636af5e75c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat) { int retval; - retval = security_inode_getattr(path->mnt, path->dentry); + retval = security_inode_getattr(path); if (retval) return retval; return vfs_getattr_nosec(path, stat); diff --git a/fs/super.c b/fs/super.c index 2b7dc90ccdbb..928c20f47af9 100644 --- a/fs/super.c +++ b/fs/super.c @@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; + s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2554d8835b48..b400c04371f0 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (grp->attrs) { for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { - umode_t mode = 0; + umode_t mode = (*attr)->mode; /* * In update mode, we're changing the permissions or @@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (!mode) continue; } + + WARN(mode & ~(SYSFS_PREALLOC | 0664), + "Attribute %s: Invalid permissions 0%o\n", + (*attr)->name, mode); + + mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, *attr, false, - (*attr)->mode | mode, - NULL); + mode, NULL); if (unlikely(error)) break; } diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile new file mode 100644 index 000000000000..82fa35b656c4 --- /dev/null +++ b/fs/tracefs/Makefile @@ -0,0 +1,4 @@ +tracefs-objs := inode.o + +obj-$(CONFIG_TRACING) += tracefs.o + diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c new file mode 100644 index 000000000000..d92bdf3b079a --- /dev/null +++ b/fs/tracefs/inode.c @@ -0,0 +1,650 @@ +/* + * inode.c - part of tracefs, a pseudo file system for activating tracing + * + * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> + * + * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * tracefs is the file system that is used by the tracing infrastructure. + * + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/kobject.h> +#include <linux/namei.h> +#include <linux/tracefs.h> +#include <linux/fsnotify.h> +#include <linux/seq_file.h> +#include <linux/parser.h> +#include <linux/magic.h> +#include <linux/slab.h> + +#define TRACEFS_DEFAULT_MODE 0700 + +static struct vfsmount *tracefs_mount; +static int tracefs_mount_count; +static bool tracefs_registered; + +static ssize_t default_read_file(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t default_write_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return count; +} + +static const struct file_operations tracefs_file_operations = { + .read = default_read_file, + .write = default_write_file, + .open = simple_open, + .llseek = noop_llseek, +}; + +static struct tracefs_dir_ops { + int (*mkdir)(const char *name); + int (*rmdir)(const char *name); +} tracefs_ops; + +static char *get_dname(struct dentry *dentry) +{ + const char *dname; + char *name; + int len = dentry->d_name.len; + + dname = dentry->d_name.name; + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return NULL; + memcpy(name, dname, len); + name[len] = 0; + return name; +} + +static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode) +{ + char *name; + int ret; + + name = get_dname(dentry); + if (!name) + return -ENOMEM; + + /* + * The mkdir call can call the generic functions that create + * the files within the tracefs system. It is up to the individual + * mkdir routine to handle races. + */ + mutex_unlock(&inode->i_mutex); + ret = tracefs_ops.mkdir(name); + mutex_lock(&inode->i_mutex); + + kfree(name); + + return ret; +} + +static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) +{ + char *name; + int ret; + + name = get_dname(dentry); + if (!name) + return -ENOMEM; + + /* + * The rmdir call can call the generic functions that create + * the files within the tracefs system. It is up to the individual + * rmdir routine to handle races. + * This time we need to unlock not only the parent (inode) but + * also the directory that is being deleted. + */ + mutex_unlock(&inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); + + ret = tracefs_ops.rmdir(name); + + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock(&dentry->d_inode->i_mutex); + + kfree(name); + + return ret; +} + +static const struct inode_operations tracefs_dir_inode_operations = { + .lookup = simple_lookup, + .mkdir = tracefs_syscall_mkdir, + .rmdir = tracefs_syscall_rmdir, +}; + +static struct inode *tracefs_get_inode(struct super_block *sb) +{ + struct inode *inode = new_inode(sb); + if (inode) { + inode->i_ino = get_next_ino(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +struct tracefs_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; +}; + +enum { + Opt_uid, + Opt_gid, + Opt_mode, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_mode, "mode=%o"}, + {Opt_err, NULL} +}; + +struct tracefs_fs_info { + struct tracefs_mount_opts mount_opts; +}; + +static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + kuid_t uid; + kgid_t gid; + char *p; + + opts->mode = TRACEFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + opts->uid = uid; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + opts->gid = gid; + break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->mode = option & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally tracefs has ignored all mount options + */ + } + } + + return 0; +} + +static int tracefs_apply_options(struct super_block *sb) +{ + struct tracefs_fs_info *fsi = sb->s_fs_info; + struct inode *inode = sb->s_root->d_inode; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + + inode->i_uid = opts->uid; + inode->i_gid = opts->gid; + + return 0; +} + +static int tracefs_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct tracefs_fs_info *fsi = sb->s_fs_info; + + sync_filesystem(sb); + err = tracefs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + tracefs_apply_options(sb); + +fail: + return err; +} + +static int tracefs_show_options(struct seq_file *m, struct dentry *root) +{ + struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + + if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->uid)); + if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->gid)); + if (opts->mode != TRACEFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", opts->mode); + + return 0; +} + +static const struct super_operations tracefs_super_operations = { + .statfs = simple_statfs, + .remount_fs = tracefs_remount, + .show_options = tracefs_show_options, +}; + +static int trace_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct tree_descr trace_files[] = {{""}}; + struct tracefs_fs_info *fsi; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) { + err = -ENOMEM; + goto fail; + } + + err = tracefs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); + if (err) + goto fail; + + sb->s_op = &tracefs_super_operations; + + tracefs_apply_options(sb); + + return 0; + +fail: + kfree(fsi); + sb->s_fs_info = NULL; + return err; +} + +static struct dentry *trace_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return mount_single(fs_type, flags, data, trace_fill_super); +} + +static struct file_system_type trace_fs_type = { + .owner = THIS_MODULE, + .name = "tracefs", + .mount = trace_mount, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("tracefs"); + +static struct dentry *start_creating(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + int error; + + pr_debug("tracefs: creating file '%s'\n",name); + + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, + &tracefs_mount_count); + if (error) + return ERR_PTR(error); + + /* If the parent is not specified, we create it in the root. + * We need the root dentry to do this, which is in the super + * block. A pointer to that is in the struct vfsmount that we + * have around. + */ + if (!parent) + parent = tracefs_mount->mnt_root; + + mutex_lock(&parent->d_inode->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(dentry) && dentry->d_inode) { + dput(dentry); + dentry = ERR_PTR(-EEXIST); + } + if (IS_ERR(dentry)) + mutex_unlock(&parent->d_inode->i_mutex); + return dentry; +} + +static struct dentry *failed_creating(struct dentry *dentry) +{ + mutex_unlock(&dentry->d_parent->d_inode->i_mutex); + dput(dentry); + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + return NULL; +} + +static struct dentry *end_creating(struct dentry *dentry) +{ + mutex_unlock(&dentry->d_parent->d_inode->i_mutex); + return dentry; +} + +/** + * tracefs_create_file - create a file in the tracefs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the tracefs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations that should be used for + * this file. + * + * This is the basic "create a file" function for tracefs. It allows for a + * wide range of flexibility in creating a file, or a directory (if you want + * to create a directory, the tracefs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the tracefs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, %NULL will be returned. + * + * If tracefs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *tracefs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + struct dentry *dentry; + struct inode *inode; + + if (!(mode & S_IFMT)) + mode |= S_IFREG; + BUG_ON(!S_ISREG(mode)); + dentry = start_creating(name, parent); + + if (IS_ERR(dentry)) + return NULL; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = mode; + inode->i_fop = fops ? fops : &tracefs_file_operations; + inode->i_private = data; + d_instantiate(dentry, inode); + fsnotify_create(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +} + +static struct dentry *__create_dir(const char *name, struct dentry *parent, + const struct inode_operations *ops) +{ + struct dentry *dentry = start_creating(name, parent); + struct inode *inode; + + if (IS_ERR(dentry)) + return NULL; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + inode->i_op = ops; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + d_instantiate(dentry, inode); + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +} + +/** + * tracefs_create_dir - create a directory in the tracefs filesystem + * @name: a pointer to a string containing the name of the directory to + * create. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * directory will be created in the root of the tracefs filesystem. + * + * This function creates a directory in tracefs with the given name. + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the tracefs_remove() function when the file is + * to be removed. If an error occurs, %NULL will be returned. + * + * If tracing is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) +{ + return __create_dir(name, parent, &simple_dir_inode_operations); +} + +/** + * tracefs_create_instance_dir - create the tracing instances directory + * @name: The name of the instances directory to create + * @parent: The parent directory that the instances directory will exist + * @mkdir: The function to call when a mkdir is performed. + * @rmdir: The function to call when a rmdir is performed. + * + * Only one instances directory is allowed. + * + * The instances directory is special as it allows for mkdir and rmdir to + * to be done by userspace. When a mkdir or rmdir is performed, the inode + * locks are released and the methhods passed in (@mkdir and @rmdir) are + * called without locks and with the name of the directory being created + * within the instances directory. + * + * Returns the dentry of the instances directory. + */ +struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, + int (*mkdir)(const char *name), + int (*rmdir)(const char *name)) +{ + struct dentry *dentry; + + /* Only allow one instance of the instances directory. */ + if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) + return NULL; + + dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); + if (!dentry) + return NULL; + + tracefs_ops.mkdir = mkdir; + tracefs_ops.rmdir = rmdir; + + return dentry; +} + +static inline int tracefs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) +{ + int ret = 0; + + if (tracefs_positive(dentry)) { + if (dentry->d_inode) { + dget(dentry); + switch (dentry->d_inode->i_mode & S_IFMT) { + case S_IFDIR: + ret = simple_rmdir(parent->d_inode, dentry); + break; + default: + simple_unlink(parent->d_inode, dentry); + break; + } + if (!ret) + d_delete(dentry); + dput(dentry); + } + } + return ret; +} + +/** + * tracefs_remove - removes a file or directory from the tracefs filesystem + * @dentry: a pointer to a the dentry of the file or directory to be + * removed. + * + * This function removes a file or directory in tracefs that was previously + * created with a call to another tracefs function (like + * tracefs_create_file() or variants thereof.) + */ +void tracefs_remove(struct dentry *dentry) +{ + struct dentry *parent; + int ret; + + if (IS_ERR_OR_NULL(dentry)) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + mutex_lock(&parent->d_inode->i_mutex); + ret = __tracefs_remove(dentry, parent); + mutex_unlock(&parent->d_inode->i_mutex); + if (!ret) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); +} + +/** + * tracefs_remove_recursive - recursively removes a directory + * @dentry: a pointer to a the dentry of the directory to be removed. + * + * This function recursively removes a directory tree in tracefs that + * was previously created with a call to another tracefs function + * (like tracefs_create_file() or variants thereof.) + */ +void tracefs_remove_recursive(struct dentry *dentry) +{ + struct dentry *child, *parent; + + if (IS_ERR_OR_NULL(dentry)) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + parent = dentry; + down: + mutex_lock(&parent->d_inode->i_mutex); + loop: + /* + * The parent->d_subdirs is protected by the d_lock. Outside that + * lock, the child can be unlinked and set to be freed which can + * use the d_u.d_child as the rcu head and corrupt this list. + */ + spin_lock(&parent->d_lock); + list_for_each_entry(child, &parent->d_subdirs, d_child) { + if (!tracefs_positive(child)) + continue; + + /* perhaps simple_empty(child) makes more sense */ + if (!list_empty(&child->d_subdirs)) { + spin_unlock(&parent->d_lock); + mutex_unlock(&parent->d_inode->i_mutex); + parent = child; + goto down; + } + + spin_unlock(&parent->d_lock); + + if (!__tracefs_remove(child, parent)) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + + /* + * The parent->d_lock protects agaist child from unlinking + * from d_subdirs. When releasing the parent->d_lock we can + * no longer trust that the next pointer is valid. + * Restart the loop. We'll skip this one with the + * tracefs_positive() check. + */ + goto loop; + } + spin_unlock(&parent->d_lock); + + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + + if (child != dentry) + /* go up */ + goto loop; + + if (!__tracefs_remove(child, parent)) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + mutex_unlock(&parent->d_inode->i_mutex); +} + +/** + * tracefs_initialized - Tells whether tracefs has been registered + */ +bool tracefs_initialized(void) +{ + return tracefs_registered; +} + +static struct kobject *trace_kobj; + +static int __init tracefs_init(void) +{ + int retval; + + trace_kobj = kobject_create_and_add("tracing", kernel_kobj); + if (!trace_kobj) + return -EINVAL; + + retval = register_filesystem(&trace_fs_type); + if (!retval) + tracefs_registered = true; + + return retval; +} +core_initcall(tracefs_init); |