summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/aio.c90
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c6
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c101
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c62
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/namei.c168
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c783
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c155
-rw-r--r--fs/ocfs2/cluster/heartbeat.c42
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dlmglue.c7
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c46
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/open.c5
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/read_write.c82
-rw-r--r--fs/splice.c27
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
54 files changed, 1620 insertions, 926 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
-config ARCH_BINFMT_ELF_RANDOMIZE_PIE
- bool
-
config ARCH_BINFMT_ELF_STATE
bool
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_CACHEFILES) += cachefiles/
obj-$(CONFIG_DEBUG_FS) += debugfs/
+obj-$(CONFIG_TRACING) += tracefs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/aio.c b/fs/aio.c
index 435ca29eca31..1ab60010cf6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -310,11 +310,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct kioctx_table *table;
- int i;
+ int i, res = -EINVAL;
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
@@ -324,13 +324,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
ctx = table->table[i];
if (ctx && ctx->aio_ring_file == file) {
- ctx->user_id = ctx->mmap_base = vma->vm_start;
+ if (!atomic_read(&ctx->dead)) {
+ ctx->user_id = ctx->mmap_base = vma->vm_start;
+ res = 0;
+ }
break;
}
}
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
+ return res;
}
static const struct file_operations aio_ring_fops = {
@@ -760,6 +764,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err_cleanup:
aio_nr_sub(ctx->max_reqs);
err_ctx:
+ atomic_set(&ctx->dead, 1);
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
aio_free_ring(ctx);
err:
mutex_unlock(&ctx->ring_lock);
@@ -781,11 +788,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
{
struct kioctx_table *table;
- if (atomic_xchg(&ctx->dead, 1))
+ spin_lock(&mm->ioctx_lock);
+ if (atomic_xchg(&ctx->dead, 1)) {
+ spin_unlock(&mm->ioctx_lock);
return -EINVAL;
+ }
-
- spin_lock(&mm->ioctx_lock);
table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]);
table->table[ctx->id] = NULL;
@@ -1352,48 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
-static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
- int rw, char __user *buf,
- unsigned long *nr_segs,
- size_t *len,
- struct iovec **iovec,
- bool compat)
+static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
+ struct iovec **iovec,
+ bool compat,
+ struct iov_iter *iter)
{
- ssize_t ret;
-
- *nr_segs = *len;
-
#ifdef CONFIG_COMPAT
if (compat)
- ret = compat_rw_copy_check_uvector(rw,
+ return compat_import_iovec(rw,
(struct compat_iovec __user *)buf,
- *nr_segs, UIO_FASTIOV, *iovec, iovec);
- else
+ len, UIO_FASTIOV, iovec, iter);
#endif
- ret = rw_copy_check_uvector(rw,
- (struct iovec __user *)buf,
- *nr_segs, UIO_FASTIOV, *iovec, iovec);
- if (ret < 0)
- return ret;
-
- /* len now reflect bytes instead of segs */
- *len = ret;
- return 0;
-}
-
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
- int rw, char __user *buf,
- unsigned long *nr_segs,
- size_t len,
- struct iovec *iovec)
-{
- if (unlikely(!access_ok(!rw, buf, len)))
- return -EFAULT;
-
- iovec->iov_base = buf;
- iovec->iov_len = len;
- *nr_segs = 1;
- return 0;
+ return import_iovec(rw, (struct iovec __user *)buf,
+ len, UIO_FASTIOV, iovec, iter);
}
/*
@@ -1405,7 +1384,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
{
struct file *file = req->ki_filp;
ssize_t ret;
- unsigned long nr_segs;
int rw;
fmode_t mode;
aio_rw_op *rw_op;
@@ -1437,16 +1415,17 @@ rw_common:
return -EINVAL;
if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
- ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs,
- &len, &iovec, compat);
- else
- ret = aio_setup_single_vector(req, rw, buf, &nr_segs,
- len, iovec);
+ ret = aio_setup_vectored_rw(rw, buf, len,
+ &iovec, compat, &iter);
+ else {
+ ret = import_single_range(rw, buf, len, iovec, &iter);
+ iovec = NULL;
+ }
if (!ret)
- ret = rw_verify_area(rw, file, &req->ki_pos, len);
+ ret = rw_verify_area(rw, file, &req->ki_pos,
+ iov_iter_count(&iter));
if (ret < 0) {
- if (iovec != inline_vecs)
- kfree(iovec);
+ kfree(iovec);
return ret;
}
@@ -1463,14 +1442,14 @@ rw_common:
file_start_write(file);
if (iter_op) {
- iov_iter_init(&iter, rw, iovec, nr_segs, len);
ret = iter_op(req, &iter);
} else {
- ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+ ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
}
if (rw == WRITE)
file_end_write(file);
+ kfree(iovec);
break;
case IOCB_CMD_FDSYNC:
@@ -1492,9 +1471,6 @@ rw_common:
return -EINVAL;
}
- if (iovec != inline_vecs)
- kfree(iovec);
-
if (ret != -EIOCBQUEUED) {
/*
* There's no easy way to restart the syscall since other AIO's
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/elf-randomize.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
+ unsigned long total_size = 0;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
-#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
- /* Memory randomization might have been switched off
- * in runtime via sysctl or explicit setting of
- * personality flags.
- * If that is the case, retain the original non-zero
- * load_bias value in order to establish proper
- * non-randomized mappings.
- */
+ load_bias = ELF_ET_DYN_BASE - vaddr;
if (current->flags & PF_RANDOMIZE)
- load_bias = 0;
- else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#endif
+ load_bias += arch_mmap_rnd();
+ load_bias = ELF_PAGESTART(load_bias);
+ total_size = total_mapping_size(elf_phdata,
+ loc->elf_ex.e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_free_dentry;
+ }
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, 0);
+ elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
-#ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
-#ifdef CONFIG_COMPAT_BRK
+#ifdef compat_brk_randomized
current->brk_randomized = 1;
#endif
}
-#endif
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
- if (ret)
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ if (ret && TestClearPageDirty(page))
+ account_page_cleaned(page, mapping);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 480cf9c81d50..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
length = atomic_dec_return(&tcpSesAllocCount);
if (length > 0)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
}
static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
length = atomic_inc_return(&tcpSesAllocCount);
if (length > 1)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable();
while (server->tcpStatus != CifsExiting) {
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e3732e53b..d99736a63e3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode,
struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
- int ret = -EBUSY;
+ int ret = -ESTALE;
/* If alias and dentry share a parent, then no extra locks required */
if (alias->d_parent == dentry->d_parent)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 96400ab42d13..61e72d44cf94 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
pr_debug("debugfs: creating file '%s'\n",name);
+ if (IS_ERR(parent))
+ return parent;
+
error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
&debugfs_mount_count);
if (error)
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
if (error)
goto out;
-
- if (acl)
- set_cached_acl(inode, type, acl);
- else
- forget_cached_acl(inode, type);
+ set_cached_acl(inode, type, acl);
out:
kfree(data);
return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index fe6634d25d1d..a6e6990aea39 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (alloc_required) {
struct gfs2_alloc_parms ap = { .aflags = 0, };
- error = gfs2_quota_lock_check(ip);
+ requested = data_blocks + ind_blocks;
+ ap.target = requested;
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
goto out_unlock;
- requested = data_blocks + ind_blocks;
- ap.target = requested;
error = gfs2_inplace_reserve(ip, &ap);
if (error)
goto out_qunlock;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
if (gfs2_is_stuffed(ip) &&
(size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f6fc412b1100..8ec43ab5babf 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -428,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_unlock;
- ret = gfs2_quota_lock_check(ip);
- if (ret)
- goto out_unlock;
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
ap.target = data_blocks + ind_blocks;
+ ret = gfs2_quota_lock_check(ip, &ap);
+ if (ret)
+ goto out_unlock;
ret = gfs2_inplace_reserve(ip, &ap);
if (ret)
goto out_quota_unlock;
@@ -764,22 +764,30 @@ out:
brelse(dibh);
return error;
}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
- unsigned int *data_blocks, unsigned int *ind_blocks)
+/**
+ * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
+ * blocks, determine how many bytes can be written.
+ * @ip: The inode in question.
+ * @len: Max cap of bytes. What we return in *len must be <= this.
+ * @data_blocks: Compute and return the number of data blocks needed
+ * @ind_blocks: Compute and return the number of indirect blocks needed
+ * @max_blocks: The total blocks available to work with.
+ *
+ * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
+ */
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
+ unsigned int *data_blocks, unsigned int *ind_blocks,
+ unsigned int max_blocks)
{
+ loff_t max = *len;
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_rgd->rd_free_clone;
unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
for (tmp = max_data; tmp > sdp->sd_diptrs;) {
tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
max_data -= tmp;
}
- /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
- so it might end up with fewer data blocks */
- if (max_data <= *data_blocks)
- return;
+
*data_blocks = max_data;
*ind_blocks = max_blocks - max_data;
*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -796,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
- loff_t bytes, max_bytes;
+ loff_t bytes, max_bytes, max_blks = UINT_MAX;
int error;
const loff_t pos = offset;
const loff_t count = len;
@@ -818,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
gfs2_size_hint(file, offset, len);
+ gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
+ ap.min_target = data_blocks + ind_blocks;
+
while (len > 0) {
if (len < bytes)
bytes = len;
@@ -826,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
offset += bytes;
continue;
}
- error = gfs2_quota_lock_check(ip);
+
+ /* We need to determine how many bytes we can actually
+ * fallocate without exceeding quota or going over the
+ * end of the fs. We start off optimistically by assuming
+ * we can write max_bytes */
+ max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
+
+ /* Since max_bytes is most likely a theoretical max, we
+ * calculate a more realistic 'bytes' to serve as a good
+ * starting point for the number of bytes we may be able
+ * to write */
+ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+ ap.target = data_blocks + ind_blocks;
+
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
-retry:
- gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+ /* ap.allowed tells us how many blocks quota will allow
+ * us to write. Check if this reduces max_blks */
+ if (ap.allowed && ap.allowed < max_blks)
+ max_blks = ap.allowed;
- ap.target = data_blocks + ind_blocks;
error = gfs2_inplace_reserve(ip, &ap);
- if (error) {
- if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
- bytes >>= 1;
- bytes &= bsize_mask;
- if (bytes == 0)
- bytes = sdp->sd_sb.sb_bsize;
- goto retry;
- }
+ if (error)
goto out_qunlock;
- }
- max_bytes = bytes;
- calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
- &max_bytes, &data_blocks, &ind_blocks);
+
+ /* check if the selected rgrp limits our max_blks further */
+ if (ap.allowed && ap.allowed < max_blks)
+ max_blks = ap.allowed;
+
+ /* Almost done. Calculate bytes that can be written using
+ * max_blks. We also recompute max_bytes, data_blocks and
+ * ind_blocks */
+ calc_max_reserv(ip, &max_bytes, &data_blocks,
+ &ind_blocks, max_blks);
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -930,6 +955,22 @@ out_uninit:
return ret;
}
+static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ int error;
+ struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
+
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ return (ssize_t)error;
+
+ gfs2_size_hint(out, *ppos, len);
+
+ return iter_file_splice_write(pipe, out, ppos, len, flags);
+}
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
/**
@@ -1076,7 +1117,7 @@ const struct file_operations gfs2_file_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
};
@@ -1106,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
{
- sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
- if (!sdp->debugfs_dir)
- return -ENOMEM;
- sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_glocks_fops);
- if (!sdp->debugfs_dentry_glocks)
+ struct dentry *dent;
+
+ dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
+ if (IS_ERR_OR_NULL(dent))
+ goto fail;
+ sdp->debugfs_dir = dent;
+
+ dent = debugfs_create_file("glocks",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_glocks_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_glocks = dent;
- sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_glstats_fops);
- if (!sdp->debugfs_dentry_glstats)
+ dent = debugfs_create_file("glstats",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_glstats_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_glstats = dent;
- sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_sbstats_fops);
- if (!sdp->debugfs_dentry_sbstats)
+ dent = debugfs_create_file("sbstats",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_sbstats_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_sbstats = dent;
return 0;
fail:
gfs2_delete_debugfs_file(sdp);
- return -ENOMEM;
+ return dent ? PTR_ERR(dent) : -ENOMEM;
}
void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
int gfs2_register_debugfs(void)
{
gfs2_root = debugfs_create_dir("gfs2", NULL);
+ if (IS_ERR(gfs2_root))
+ return PTR_ERR(gfs2_root);
return gfs2_root ? 0 : -ENOMEM;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
* to the allocation code.
*/
struct gfs2_alloc_parms {
- u32 target;
+ u64 target;
+ u32 min_target;
u32 aflags;
+ u64 allowed;
};
enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
int error;
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
goto out;
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
int error;
if (da->nr_blocks) {
- error = gfs2_quota_lock_check(dip);
+ error = gfs2_quota_lock_check(dip, &ap);
if (error)
goto fail_quota_locks;
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (da.nr_blocks) {
struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
- error = gfs2_quota_lock_check(dip);
+ error = gfs2_quota_lock_check(dip, &ap);
if (error)
goto out_gunlock;
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (da.nr_blocks) {
struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
- error = gfs2_quota_lock_check(ndip);
+ error = gfs2_quota_lock_check(ndip, &ap);
if (error)
goto out_gunlock;
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
kuid_t ouid, nuid;
kgid_t ogid, ngid;
int error;
+ struct gfs2_alloc_parms ap;
ouid = inode->i_uid;
ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (error)
goto out;
+ ap.target = gfs2_get_inode_blocks(&ip->i_inode);
+
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
- error = gfs2_quota_check(ip, nuid, ngid);
+ error = gfs2_quota_check(ip, nuid, ngid, &ap);
if (error)
goto out_gunlock_q;
}
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
- u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
- gfs2_quota_change(ip, -blocks, ouid, ogid);
- gfs2_quota_change(ip, blocks, nuid, ngid);
+ gfs2_quota_change(ip, -ap.target, ouid, ogid);
+ gfs2_quota_change(ip, ap.target, nuid, ngid);
}
out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..5c27e48aa76f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
if (error)
return error;
+ if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+ force_refresh = FORCE;
+
qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
sizeof(struct gfs2_quota_data *), sort_qd, NULL);
for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- int force = NO_FORCE;
qd = ip->i_res->rs_qa_qd[x];
- if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
- force = FORCE;
- error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
+ error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
if (error)
break;
}
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
return 0;
}
-int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
+/**
+ * gfs2_quota_check - check if allocating new blocks will exceed quota
+ * @ip: The inode for which this check is being performed
+ * @uid: The uid to check against
+ * @gid: The gid to check against
+ * @ap: The allocation parameters. ap->target contains the requested
+ * blocks. ap->min_target, if set, contains the minimum blks
+ * requested.
+ *
+ * Returns: 0 on success.
+ * min_req = ap->min_target ? ap->min_target : ap->target;
+ * quota must allow atleast min_req blks for success and
+ * ap->allowed is set to the number of blocks allowed
+ *
+ * -EDQUOT otherwise, quota violation. ap->allowed is set to number
+ * of blocks available.
+ */
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
- s64 value;
+ s64 value, warn, limit;
unsigned int x;
int error = 0;
+ ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
return 0;
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
qid_eq(qd->qd_id, make_kqid_gid(gid))))
continue;
+ warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
+ limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
spin_lock(&qd_lock);
value += qd->qd_change;
spin_unlock(&qd_lock);
- if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
- print_message(qd, "exceeded");
- quota_send_warning(qd->qd_id,
- sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
-
- error = -EDQUOT;
- break;
- } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
- (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
+ if (limit > 0 && (limit - value) < ap->allowed)
+ ap->allowed = limit - value;
+ /* If we can't meet the target */
+ if (limit && limit < (value + (s64)ap->target)) {
+ /* If no min_target specified or we don't meet
+ * min_target, return -EDQUOT */
+ if (!ap->min_target || ap->min_target > ap->allowed) {
+ print_message(qd, "exceeded");
+ quota_send_warning(qd->qd_id,
+ sdp->sd_vfs->s_dev,
+ QUOTA_NL_BHARDWARN);
+ error = -EDQUOT;
+ break;
+ }
+ } else if (warn && warn < value &&
time_after_eq(jiffies, qd->qd_last_warn +
- gfs2_tune_get(sdp,
- gt_quota_warn_period) * HZ)) {
+ gfs2_tune_get(sdp, gt_quota_warn_period)
+ * HZ)) {
quota_send_warning(qd->qd_id,
sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
error = print_message(qd, "warning");
qd->qd_last_warn = jiffies;
}
}
-
return error;
}
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unlock(struct gfs2_inode *ip);
-extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap);
extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
kuid_t uid, kgid_t gid);
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
-static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
+static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
- ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
+ ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
if (ret)
gfs2_quota_unlock(ip);
return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
* @ip: the inode to reserve space for
* @ap: the allocation parameters
*
- * Returns: errno
+ * We try our best to find an rgrp that has at least ap->target blocks
+ * available. After a couple of passes (loops == 2), the prospects of finding
+ * such an rgrp diminish. At this stage, we return the first rgrp that has
+ * atleast ap->min_target blocks available. Either way, we set ap->allowed to
+ * the number of blocks available in the chosen rgrp.
+ *
+ * Returns: 0 on success,
+ * -ENOMEM if a suitable rgrp can't be found
+ * errno otherwise
*/
-int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
/* Skip unuseable resource groups */
if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
GFS2_RDF_ERROR)) ||
- (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+ (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
goto check_rgrp;
/* If rgrp has enough free space, use it */
- if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
+ if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
+ (loops == 2 && ap->min_target &&
+ rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
ip->i_rgd = rs->rs_rbm.rgd;
+ ap->allowed = ip->i_rgd->rd_free_clone;
return 0;
}
-
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
#define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap);
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap);
extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
return error;
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca8e8dc..db76cec3ce21 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
static void truncate_huge_page(struct page *page)
{
- cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
+ ClearPageDirty(page);
ClearPageUptodate(page);
delete_from_page_cache(page);
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817ac51f6..762c7a3cf43d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
/* unchecked xdatum is chained with c->xattr_unchecked */
list_del_init(&xd->xindex);
- dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n",
+ dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
xd->xid, xd->version);
return 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("ERROR: (device %s): %pf: %pV\n",
+ pr_err("ERROR: (device %s): %ps: %pV\n",
sb->s_id, __builtin_return_address(0), &vaf);
va_end(args);
diff --git a/fs/namei.c b/fs/namei.c
index c83145af4bfc..76fb76a0818b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
* PATH_MAX includes the nul terminator --RR.
*/
-#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename))
+#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
- struct filename *result, *err;
- int len;
- long max;
+ struct filename *result;
char *kname;
+ int len;
result = audit_reusename(filename);
if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
result = __getname();
if (unlikely(!result))
return ERR_PTR(-ENOMEM);
- result->refcnt = 1;
/*
* First, try to embed the struct filename inside the names_cache
* allocation
*/
- kname = (char *)result + sizeof(*result);
+ kname = (char *)result->iname;
result->name = kname;
- result->separate = false;
- max = EMBEDDED_NAME_MAX;
-recopy:
- len = strncpy_from_user(kname, filename, max);
+ len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
if (unlikely(len < 0)) {
- err = ERR_PTR(len);
- goto error;
+ __putname(result);
+ return ERR_PTR(len);
}
/*
@@ -160,43 +155,49 @@ recopy:
* names_cache allocation for the pathname, and re-do the copy from
* userland.
*/
- if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
+ if (unlikely(len == EMBEDDED_NAME_MAX)) {
+ const size_t size = offsetof(struct filename, iname[1]);
kname = (char *)result;
- result = kzalloc(sizeof(*result), GFP_KERNEL);
- if (!result) {
- err = ERR_PTR(-ENOMEM);
- result = (struct filename *)kname;
- goto error;
+ /*
+ * size is chosen that way we to guarantee that
+ * result->iname[0] is within the same object and that
+ * kname can't be equal to result->iname, no matter what.
+ */
+ result = kzalloc(size, GFP_KERNEL);
+ if (unlikely(!result)) {
+ __putname(kname);
+ return ERR_PTR(-ENOMEM);
}
result->name = kname;
- result->separate = true;
- result->refcnt = 1;
- max = PATH_MAX;
- goto recopy;
+ len = strncpy_from_user(kname, filename, PATH_MAX);
+ if (unlikely(len < 0)) {
+ __putname(kname);
+ kfree(result);
+ return ERR_PTR(len);
+ }
+ if (unlikely(len == PATH_MAX)) {
+ __putname(kname);
+ kfree(result);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
}
+ result->refcnt = 1;
/* The empty path is special. */
if (unlikely(!len)) {
if (empty)
*empty = 1;
- err = ERR_PTR(-ENOENT);
- if (!(flags & LOOKUP_EMPTY))
- goto error;
+ if (!(flags & LOOKUP_EMPTY)) {
+ putname(result);
+ return ERR_PTR(-ENOENT);
+ }
}
- err = ERR_PTR(-ENAMETOOLONG);
- if (unlikely(len >= PATH_MAX))
- goto error;
-
result->uptr = filename;
result->aname = NULL;
audit_getname(result);
return result;
-
-error:
- putname(result);
- return err;
}
struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
return ERR_PTR(-ENOMEM);
if (len <= EMBEDDED_NAME_MAX) {
- result->name = (char *)(result) + sizeof(*result);
- result->separate = false;
+ result->name = (char *)result->iname;
} else if (len <= PATH_MAX) {
struct filename *tmp;
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
return ERR_PTR(-ENOMEM);
}
tmp->name = (char *)result;
- tmp->separate = true;
result = tmp;
} else {
__putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
if (--name->refcnt > 0)
return;
- if (name->separate) {
+ if (name->name != name->iname) {
__putname(name->name);
kfree(name);
} else
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
return err;
}
-static int path_init(int dfd, const char *name, unsigned int flags,
+static int path_init(int dfd, const struct filename *name, unsigned int flags,
struct nameidata *nd)
{
int retval = 0;
+ const char *s = name->name;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
- if (*name) {
+ if (*s) {
if (!d_can_lookup(root))
return -ENOTDIR;
retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->root.mnt = NULL;
nd->m_seq = read_seqbegin(&mount_lock);
- if (*name=='/') {
+ if (*s == '/') {
if (flags & LOOKUP_RCU) {
rcu_read_lock();
nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
dentry = f.file->f_path.dentry;
- if (*name) {
+ if (*s) {
if (!d_can_lookup(dentry)) {
fdput(f);
return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
return -ECHILD;
done:
current->total_link_count = 0;
- return link_path_walk(name, nd);
+ return link_path_walk(s, nd);
}
static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const char *name,
+static int path_lookupat(int dfd, const struct filename *name,
unsigned int flags, struct nameidata *nd)
{
struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
static int filename_lookup(int dfd, struct filename *name,
unsigned int flags, struct nameidata *nd)
{
- int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd);
+ int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
if (unlikely(retval == -ECHILD))
- retval = path_lookupat(dfd, name->name, flags, nd);
+ retval = path_lookupat(dfd, name, flags, nd);
if (unlikely(retval == -ESTALE))
- retval = path_lookupat(dfd, name->name,
- flags | LOOKUP_REVAL, nd);
+ retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
if (likely(!retval))
audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
return retval;
}
-static int do_path_lookup(int dfd, const char *name,
- unsigned int flags, struct nameidata *nd)
-{
- struct filename *filename = getname_kernel(name);
- int retval = PTR_ERR(filename);
-
- if (!IS_ERR(filename)) {
- retval = filename_lookup(dfd, filename, flags, nd);
- putname(filename);
- }
- return retval;
-}
-
/* does lookup, returns the object with parent locked */
struct dentry *kern_path_locked(const char *name, struct path *path)
{
@@ -2089,9 +2075,15 @@ out:
int kern_path(const char *name, unsigned int flags, struct path *path)
{
struct nameidata nd;
- int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
- if (!res)
- *path = nd.path;
+ struct filename *filename = getname_kernel(name);
+ int res = PTR_ERR(filename);
+
+ if (!IS_ERR(filename)) {
+ res = filename_lookup(AT_FDCWD, filename, flags, &nd);
+ putname(filename);
+ if (!res)
+ *path = nd.path;
+ }
return res;
}
EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct path *path)
{
- struct nameidata nd;
- int err;
- nd.root.dentry = dentry;
- nd.root.mnt = mnt;
+ struct filename *filename = getname_kernel(name);
+ int err = PTR_ERR(filename);
+
BUG_ON(flags & LOOKUP_PARENT);
- /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
- err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
- if (!err)
- *path = nd.path;
+
+ /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
+ if (!IS_ERR(filename)) {
+ struct nameidata nd;
+ nd.root.dentry = dentry;
+ nd.root.mnt = mnt;
+ err = filename_lookup(AT_FDCWD, filename,
+ flags | LOOKUP_ROOT, &nd);
+ if (!err)
+ *path = nd.path;
+ putname(filename);
+ }
return err;
}
EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
* @len: maximum length @len should be interpreted to
*
* Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code. Also note that by using this function the
- * nameidata argument is passed to the filesystem methods and a filesystem
- * using this helper needs to be prepared for that.
+ * not be called by generic code.
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
@@ -2341,7 +2338,8 @@ out:
* Returns 0 and "path" will be valid on success; Returns error otherwise.
*/
static int
-path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+path_mountpoint(int dfd, const struct filename *name, struct path *path,
+ unsigned int flags)
{
struct nameidata nd;
int err;
@@ -2370,20 +2368,20 @@ out:
}
static int
-filename_mountpoint(int dfd, struct filename *s, struct path *path,
+filename_mountpoint(int dfd, struct filename *name, struct path *path,
unsigned int flags)
{
int error;
- if (IS_ERR(s))
- return PTR_ERR(s);
- error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+ error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
if (unlikely(error == -ECHILD))
- error = path_mountpoint(dfd, s->name, path, flags);
+ error = path_mountpoint(dfd, name, path, flags);
if (unlikely(error == -ESTALE))
- error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+ error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
if (likely(!error))
- audit_inode(s, path->dentry, 0);
- putname(s);
+ audit_inode(name, path->dentry, 0);
+ putname(name);
return error;
}
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
static const struct qstr name = QSTR_INIT("/", 1);
struct dentry *dentry, *child;
struct inode *dir;
- int error = path_lookupat(dfd, pathname->name,
+ int error = path_lookupat(dfd, pathname,
flags | LOOKUP_DIRECTORY, nd);
if (unlikely(error))
return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
goto out;
}
- error = path_init(dfd, pathname->name, flags, nd);
+ error = path_init(dfd, pathname, flags, nd);
if (unlikely(error))
goto out;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 849ed784d6ac..759931088094 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
* request from the inode / page_private pointer and
* release it */
nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
nfs_unlock_and_release_request(req);
}
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529511c4..2ff263e6d363 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
-ccflags-y := -DNTFS_VERSION=\"2.1.31\"
+ccflags-y := -DNTFS_VERSION=\"2.1.32\"
ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f16f2d8401fe..c1da78dad1af 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
/*
* file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -328,62 +328,168 @@ err_out:
return err;
}
-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- * do {
- * ret = __get_user(c, uaddr);
- * uaddr += PAGE_SIZE;
- * } while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
- int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
+ size_t *count)
{
- const char __user *end;
- volatile char c;
-
- /* Set @end to the first byte outside the last page we care about. */
- end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
- while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
- ;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
- size_t iov_ofs, int bytes)
-{
- do {
- const char __user *buf;
- unsigned len;
+ loff_t pos;
+ s64 end, ll;
+ ssize_t err;
+ unsigned long flags;
+ struct inode *vi = file_inode(file);
+ ntfs_inode *base_ni, *ni = NTFS_I(vi);
+ ntfs_volume *vol = ni->vol;
- buf = iov->iov_base + iov_ofs;
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- ntfs_fault_in_pages_readable(buf, len);
- bytes -= len;
- iov++;
- iov_ofs = 0;
- } while (bytes);
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+ "0x%llx, count 0x%lx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)*ppos, (unsigned long)*count);
+ /* We can write back this queue in page reclaim. */
+ current->backing_dev_info = inode_to_bdi(vi);
+ err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
+ if (unlikely(err))
+ goto out;
+ /*
+ * All checks have passed. Before we start doing any writing we want
+ * to abort any totally illegal writes.
+ */
+ BUG_ON(NInoMstProtected(ni));
+ BUG_ON(ni->type != AT_DATA);
+ /* If file is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ /* Only $DATA attributes can be encrypted. */
+ /*
+ * Reminder for later: Encrypted files are _always_
+ * non-resident so that the content can always be encrypted.
+ */
+ ntfs_debug("Denying write access to encrypted file.");
+ err = -EACCES;
+ goto out;
+ }
+ if (NInoCompressed(ni)) {
+ /* Only unnamed $DATA attribute can be compressed. */
+ BUG_ON(ni->name_len);
+ /*
+ * Reminder for later: If resident, the data is not actually
+ * compressed. Only on the switch to non-resident does
+ * compression kick in. This is in contrast to encrypted files
+ * (see above).
+ */
+ ntfs_error(vi->i_sb, "Writing to compressed files is not "
+ "implemented yet. Sorry.");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ if (*count == 0)
+ goto out;
+ base_ni = ni;
+ if (NInoAttr(ni))
+ base_ni = ni->ext.base_ntfs_ino;
+ err = file_remove_suid(file);
+ if (unlikely(err))
+ goto out;
+ /*
+ * Our ->update_time method always succeeds thus file_update_time()
+ * cannot fail either so there is no need to check the return code.
+ */
+ file_update_time(file);
+ pos = *ppos;
+ /* The first byte after the last cluster being written to. */
+ end = (pos + *count + vol->cluster_size_mask) &
+ ~(u64)vol->cluster_size_mask;
+ /*
+ * If the write goes beyond the allocated size, extend the allocation
+ * to cover the whole of the write, rounded up to the nearest cluster.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (end > ll) {
+ /*
+ * Extend the allocation without changing the data size.
+ *
+ * Note we ensure the allocation is big enough to at least
+ * write some data but we do not require the allocation to be
+ * complete, i.e. it may be partial.
+ */
+ ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+ if (likely(ll >= 0)) {
+ BUG_ON(pos >= ll);
+ /* If the extension was partial truncate the write. */
+ if (end > ll) {
+ ntfs_debug("Truncating write to inode 0x%lx, "
+ "attribute type 0x%x, because "
+ "the allocation was only "
+ "partially extended.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ *count = ll - pos;
+ }
+ } else {
+ err = ll;
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /* Perform a partial write if possible or fail. */
+ if (pos < ll) {
+ ntfs_debug("Truncating write to inode 0x%lx "
+ "attribute type 0x%x, because "
+ "extending the allocation "
+ "failed (error %d).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type),
+ (int)-err);
+ *count = ll - pos;
+ } else {
+ if (err != -ENOSPC)
+ ntfs_error(vi->i_sb, "Cannot perform "
+ "write to inode "
+ "0x%lx, attribute "
+ "type 0x%x, because "
+ "extending the "
+ "allocation failed "
+ "(error %ld).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type),
+ (long)-err);
+ else
+ ntfs_debug("Cannot perform write to "
+ "inode 0x%lx, "
+ "attribute type 0x%x, "
+ "because there is not "
+ "space left.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ goto out;
+ }
+ }
+ }
+ /*
+ * If the write starts beyond the initialized size, extend it up to the
+ * beginning of the write and initialize all non-sparse space between
+ * the old initialized size and the new one. This automatically also
+ * increments the vfs inode->i_size to keep it above or equal to the
+ * initialized_size.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (pos > ll) {
+ /*
+ * Wait for ongoing direct i/o to complete before proceeding.
+ * New direct i/o cannot start as we hold i_mutex.
+ */
+ inode_dio_wait(vi);
+ err = ntfs_attr_extend_initialized(ni, pos);
+ if (unlikely(err < 0))
+ ntfs_error(vi->i_sb, "Cannot perform write to inode "
+ "0x%lx, attribute type 0x%x, because "
+ "extending the initialized size "
+ "failed (error %d).", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (int)-err);
+ }
+out:
+ return err;
}
/**
@@ -420,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
goto err_out;
}
}
- err = add_to_page_cache_lru(*cached_page, mapping, index,
- GFP_KERNEL);
+ err = add_to_page_cache_lru(*cached_page, mapping,
+ index, GFP_KERNEL);
if (unlikely(err)) {
if (err == -EEXIST)
continue;
@@ -1267,180 +1373,6 @@ rl_not_mapped_enoent:
return err;
}
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied. If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
- unsigned nr_pages, unsigned ofs, const char __user *buf,
- size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- char *addr;
- size_t total = 0;
- unsigned len;
- int left;
-
- do {
- len = PAGE_CACHE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- addr = kmap_atomic(*pages);
- left = __copy_from_user_inatomic(addr + ofs, buf, len);
- kunmap_atomic(addr);
- if (unlikely(left)) {
- /* Do it the slow way. */
- addr = kmap(*pages);
- left = __copy_from_user(addr + ofs, buf, len);
- kunmap(*pages);
- if (unlikely(left))
- goto err_out;
- }
- total += len;
- bytes -= len;
- if (!bytes)
- break;
- buf += len;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err_out:
- total += len - left;
- /* Zero the rest of the target like __copy_from_user(). */
- while (++pages < last_page) {
- bytes -= len;
- if (!bytes)
- break;
- len = PAGE_CACHE_SIZE;
- if (len > bytes)
- len = bytes;
- zero_user(*pages, 0, len);
- }
- goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
- const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
- size_t total = 0;
-
- while (1) {
- const char __user *buf = iov->iov_base + iov_ofs;
- unsigned len;
- size_t left;
-
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- left = __copy_from_user_inatomic(vaddr, buf, len);
- total += len;
- bytes -= len;
- vaddr += len;
- if (unlikely(left)) {
- total -= left;
- break;
- }
- if (!bytes)
- break;
- iov++;
- iov_ofs = 0;
- }
- return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
- size_t *iov_ofsp, size_t bytes)
-{
- const struct iovec *iov = *iovp;
- size_t iov_ofs = *iov_ofsp;
-
- while (bytes) {
- unsigned len;
-
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- bytes -= len;
- iov_ofs += len;
- if (iov->iov_len == iov_ofs) {
- iov++;
- iov_ofs = 0;
- }
- }
- *iovp = iov;
- *iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic. This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error. And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
- unsigned nr_pages, unsigned ofs, const struct iovec **iov,
- size_t *iov_ofs, size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- char *addr;
- size_t copied, len, total = 0;
-
- do {
- len = PAGE_CACHE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- addr = kmap_atomic(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
- *iov, *iov_ofs, len);
- kunmap_atomic(addr);
- if (unlikely(copied != len)) {
- /* Do it the slow way. */
- addr = kmap(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr +
- ofs, *iov, *iov_ofs, len);
- if (unlikely(copied != len))
- goto err_out;
- kunmap(*pages);
- }
- total += len;
- ntfs_set_next_iovec(iov, iov_ofs, len);
- bytes -= len;
- if (!bytes)
- break;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err_out:
- BUG_ON(copied > len);
- /* Zero the rest of the target like __copy_from_user(). */
- memset(addr + ofs + copied, 0, len - copied);
- kunmap(*pages);
- total += copied;
- ntfs_set_next_iovec(iov, iov_ofs, copied);
- while (++pages < last_page) {
- bytes -= len;
- if (!bytes)
- break;
- len = PAGE_CACHE_SIZE;
- if (len > bytes)
- len = bytes;
- zero_user(*pages, 0, len);
- }
- goto out;
-}
-
static inline void ntfs_flush_dcache_pages(struct page **pages,
unsigned nr_pages)
{
@@ -1761,86 +1693,83 @@ err_out:
return err;
}
-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied. If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+ unsigned ofs, struct iov_iter *i, size_t bytes)
{
- struct inode *inode = mapping->host;
+ struct page **last_page = pages + nr_pages;
+ size_t total = 0;
+ struct iov_iter data = *i;
+ unsigned len, copied;
- if (to > inode->i_size) {
- truncate_pagecache(inode, inode->i_size);
- ntfs_truncate_vfs(inode);
- }
+ do {
+ len = PAGE_CACHE_SIZE - ofs;
+ if (len > bytes)
+ len = bytes;
+ copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+ len);
+ total += copied;
+ bytes -= copied;
+ if (!bytes)
+ break;
+ iov_iter_advance(&data, copied);
+ if (copied < len)
+ goto err;
+ ofs = 0;
+ } while (++pages < last_page);
+out:
+ return total;
+err:
+ /* Zero the rest of the target like __copy_from_user(). */
+ len = PAGE_CACHE_SIZE - copied;
+ do {
+ if (len > bytes)
+ len = bytes;
+ zero_user(*pages, copied, len);
+ bytes -= len;
+ copied = 0;
+ len = PAGE_CACHE_SIZE;
+ } while (++pages < last_page);
+ goto out;
}
/**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file: file to write to
+ * @i: iov_iter with data to write
+ * @pos: byte offset in file at which to begin writing to
*/
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
- const struct iovec *iov, unsigned long nr_segs,
- loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+ loff_t pos)
{
- struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *vi = mapping->host;
ntfs_inode *ni = NTFS_I(vi);
ntfs_volume *vol = ni->vol;
struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
struct page *cached_page = NULL;
- char __user *buf = NULL;
- s64 end, ll;
VCN last_vcn;
LCN lcn;
- unsigned long flags;
- size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */
- ssize_t status, written;
+ size_t bytes;
+ ssize_t status, written = 0;
unsigned nr_pages;
- int err;
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
- "pos 0x%llx, count 0x%lx.",
- vi->i_ino, (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)pos, (unsigned long)count);
- if (unlikely(!count))
- return 0;
- BUG_ON(NInoMstProtected(ni));
- /*
- * If the attribute is not an index root and it is encrypted or
- * compressed, we cannot write to it yet. Note we need to check for
- * AT_INDEX_ALLOCATION since this is the type of both directory and
- * index inodes.
- */
- if (ni->type != AT_INDEX_ALLOCATION) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- /*
- * Reminder for later: Encrypted files are _always_
- * non-resident so that the content can always be
- * encrypted.
- */
- ntfs_debug("Denying write access to encrypted file.");
- return -EACCES;
- }
- if (NInoCompressed(ni)) {
- /* Only unnamed $DATA attribute can be compressed. */
- BUG_ON(ni->type != AT_DATA);
- BUG_ON(ni->name_len);
- /*
- * Reminder for later: If resident, the data is not
- * actually compressed. Only on the switch to non-
- * resident does compression kick in. This is in
- * contrast to encrypted files (see above).
- */
- ntfs_error(vi->i_sb, "Writing to compressed files is "
- "not implemented yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+ "0x%llx, count 0x%lx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)pos,
+ (unsigned long)iov_iter_count(i));
/*
* If a previous ntfs_truncate() failed, repeat it and abort if it
* fails again.
*/
if (unlikely(NInoTruncateFailed(ni))) {
+ int err;
+
inode_dio_wait(vi);
err = ntfs_truncate(vi);
if (err || NInoTruncateFailed(ni)) {
@@ -1854,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
return err;
}
}
- /* The first byte after the write. */
- end = pos + count;
- /*
- * If the write goes beyond the allocated size, extend the allocation
- * to cover the whole of the write, rounded up to the nearest cluster.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (end > ll) {
- /* Extend the allocation without changing the data size. */
- ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
- if (likely(ll >= 0)) {
- BUG_ON(pos >= ll);
- /* If the extension was partial truncate the write. */
- if (end > ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "the allocation was only "
- "partially extended.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- end = ll;
- count = ll - pos;
- }
- } else {
- err = ll;
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- /* Perform a partial write if possible or fail. */
- if (pos < ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "extending the allocation "
- "failed (error code %i).",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type), err);
- end = ll;
- count = ll - pos;
- } else {
- ntfs_error(vol->sb, "Cannot perform write to "
- "inode 0x%lx, attribute type "
- "0x%x, because extending the "
- "allocation failed (error "
- "code %i).", vi->i_ino,
- (unsigned)
- le32_to_cpu(ni->type), err);
- return err;
- }
- }
- }
- written = 0;
- /*
- * If the write starts beyond the initialized size, extend it up to the
- * beginning of the write and initialize all non-sparse space between
- * the old initialized size and the new one. This automatically also
- * increments the vfs inode->i_size to keep it above or equal to the
- * initialized_size.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (pos > ll) {
- err = ntfs_attr_extend_initialized(ni, pos);
- if (err < 0) {
- ntfs_error(vol->sb, "Cannot perform write to inode "
- "0x%lx, attribute type 0x%x, because "
- "extending the initialized size "
- "failed (error code %i).", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- status = err;
- goto err_out;
- }
- }
/*
* Determine the number of pages per cluster for non-resident
* attributes.
@@ -1936,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
nr_pages = 1;
if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
- /* Finally, perform the actual write. */
last_vcn = -1;
- if (likely(nr_segs == 1))
- buf = iov->iov_base;
do {
VCN vcn;
pgoff_t idx, start_idx;
@@ -1964,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
vol->cluster_size_bits, false);
up_read(&ni->runlist.lock);
if (unlikely(lcn < LCN_HOLE)) {
- status = -EIO;
if (lcn == LCN_ENOMEM)
status = -ENOMEM;
- else
+ else {
+ status = -EIO;
ntfs_error(vol->sb, "Cannot "
"perform write to "
"inode 0x%lx, "
@@ -1976,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
"is corrupt.",
vi->i_ino, (unsigned)
le32_to_cpu(ni->type));
+ }
break;
}
if (lcn == LCN_HOLE) {
@@ -1988,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
}
}
}
- if (bytes > count)
- bytes = count;
+ if (bytes > iov_iter_count(i))
+ bytes = iov_iter_count(i);
+again:
/*
* Bring in the user page(s) that we will copy from _first_.
* Otherwise there is a nasty deadlock on copying from the same
@@ -1998,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
* pages being swapped out between us bringing them into memory
* and doing the actual copying.
*/
- if (likely(nr_segs == 1))
- ntfs_fault_in_pages_readable(buf, bytes);
- else
- ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+ if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
/* Get and lock @do_pages starting at index @start_idx. */
status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
pages, &cached_page);
@@ -2017,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
status = ntfs_prepare_pages_for_non_resident_write(
pages, do_pages, pos, bytes);
if (unlikely(status)) {
- loff_t i_size;
-
do {
unlock_page(pages[--do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
- /*
- * The write preparation may have instantiated
- * allocated space outside i_size. Trim this
- * off again. We can ignore any errors in this
- * case as we will just be waisting a bit of
- * allocated space, which is not a disaster.
- */
- i_size = i_size_read(vi);
- if (pos + bytes > i_size) {
- ntfs_write_failed(mapping, pos + bytes);
- }
break;
}
}
u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
- if (likely(nr_segs == 1)) {
- copied = ntfs_copy_from_user(pages + u, do_pages - u,
- ofs, buf, bytes);
- buf += copied;
- } else
- copied = ntfs_copy_from_user_iovec(pages + u,
- do_pages - u, ofs, &iov, &iov_ofs,
- bytes);
+ copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+ i, bytes);
ntfs_flush_dcache_pages(pages + u, do_pages - u);
- status = ntfs_commit_pages_after_write(pages, do_pages, pos,
- bytes);
- if (likely(!status)) {
- written += copied;
- count -= copied;
- pos += copied;
- if (unlikely(copied != bytes))
- status = -EFAULT;
+ status = 0;
+ if (likely(copied == bytes)) {
+ status = ntfs_commit_pages_after_write(pages, do_pages,
+ pos, bytes);
+ if (!status)
+ status = bytes;
}
do {
unlock_page(pages[--do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
- if (unlikely(status))
+ if (unlikely(status < 0))
break;
- balance_dirty_pages_ratelimited(mapping);
+ copied = status;
cond_resched();
- } while (count);
-err_out:
- *ppos = pos;
+ if (unlikely(!copied)) {
+ size_t sc;
+
+ /*
+ * We failed to copy anything. Fall back to single
+ * segment length write.
+ *
+ * This is needed to avoid possible livelock in the
+ * case that all segments in the iov cannot be copied
+ * at once without a pagefault.
+ */
+ sc = iov_iter_single_seg_count(i);
+ if (bytes > sc)
+ bytes = sc;
+ goto again;
+ }
+ iov_iter_advance(i, copied);
+ pos += copied;
+ written += copied;
+ balance_dirty_pages_ratelimited(mapping);
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
+ } while (iov_iter_count(i));
if (cached_page)
page_cache_release(cached_page);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
@@ -2076,59 +1930,56 @@ err_out:
}
/**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter_nolock - write data to a file
+ * @iocb: IO state structure (file, offset, etc.)
+ * @from: iov_iter with data to write
+ *
+ * Basically the same as __generic_file_write_iter() except that it ends
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
*/
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
- const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- loff_t pos;
- size_t count; /* after file limit checks */
- ssize_t written, err;
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
+ ssize_t err;
+ size_t count = iov_iter_count(from);
- count = iov_length(iov, nr_segs);
- pos = *ppos;
- /* We can write back this queue in page reclaim. */
- current->backing_dev_info = inode_to_bdi(inode);
- written = 0;
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
- goto out;
- if (!count)
- goto out;
- err = file_remove_suid(file);
- if (err)
- goto out;
- err = file_update_time(file);
- if (err)
- goto out;
- written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
- count);
-out:
+ err = ntfs_prepare_file_for_write(file, &pos, &count);
+ if (count && !err) {
+ iov_iter_truncate(from, count);
+ written = ntfs_perform_write(file, from, pos);
+ if (likely(written >= 0))
+ iocb->ki_pos = pos + written;
+ }
current->backing_dev_info = NULL;
return written ? written : err;
}
/**
- * ntfs_file_aio_write -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * calling ntfs_file_write_iter_nolock() instead of
+ * __generic_file_write_iter().
*/
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *vi = file_inode(file);
ssize_t ret;
- BUG_ON(iocb->ki_pos != pos);
-
- mutex_lock(&inode->i_mutex);
- ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
+ mutex_lock(&vi->i_mutex);
+ ret = ntfs_file_write_iter_nolock(iocb, from);
+ mutex_unlock(&vi->i_mutex);
if (ret > 0) {
- int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+ ssize_t err;
+
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
if (err < 0)
ret = err;
}
@@ -2196,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
#endif /* NTFS_RW */
const struct file_operations ntfs_file_ops = {
- .llseek = generic_file_llseek, /* Seek inside file. */
- .read = new_sync_read, /* Read from file. */
- .read_iter = generic_file_read_iter, /* Async read from file. */
+ .llseek = generic_file_llseek,
+ .read = new_sync_read,
+ .read_iter = generic_file_read_iter,
#ifdef NTFS_RW
- .write = do_sync_write, /* Write to file. */
- .aio_write = ntfs_file_aio_write, /* Async write to file. */
- /*.release = ,*/ /* Last file is closed. See
- fs/ext2/file.c::
- ext2_release_file() for
- how to use this to discard
- preallocated space for
- write opened files. */
- .fsync = ntfs_file_fsync, /* Sync a file to disk. */
- /*.aio_fsync = ,*/ /* Sync all outstanding async
- i/o operations on a
- kiocb. */
+ .write = new_sync_write,
+ .write_iter = ntfs_file_write_iter,
+ .fsync = ntfs_file_fsync,
#endif /* NTFS_RW */
- /*.ioctl = ,*/ /* Perform function on the
- mounted filesystem. */
- .mmap = generic_file_mmap, /* Mmap file. */
- .open = ntfs_file_open, /* Open file. */
- .splice_read = generic_file_splice_read /* Zero-copy data send with
- the data source being on
- the ntfs partition. We do
- not need to care about the
- data destination. */
- /*.sendpage = ,*/ /* Zero-copy data send with
- the data destination being
- on the ntfs partition. We
- do not need to care about
- the data source. */
+ .mmap = generic_file_mmap,
+ .open = ntfs_file_open,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 044158bd22be..2d7f76e52c37 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
ret = ocfs2_get_right_path(et, left_path, &right_path);
if (ret) {
mlog_errno(ret);
- goto out;
+ return ret;
}
right_el = path_leaf_el(right_path);
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
subtree_index);
}
out:
- if (right_path)
- ocfs2_free_path(right_path);
+ ocfs2_free_path(right_path);
return ret;
}
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
ret = ocfs2_get_left_path(et, right_path, &left_path);
if (ret) {
mlog_errno(ret);
- goto out;
+ return ret;
}
left_el = path_leaf_el(left_path);
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
right_path, subtree_index);
}
out:
- if (left_path)
- ocfs2_free_path(left_path);
+ ocfs2_free_path(left_path);
return ret;
}
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
} else if (path->p_tree_depth > 0) {
status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
if (status)
- goto out;
+ goto exit;
if (left_cpos != 0) {
left_path = ocfs2_new_path_from_path(path);
if (!left_path)
- goto out;
+ goto exit;
status = ocfs2_find_path(et->et_ci, left_path,
left_cpos);
if (status)
- goto out;
+ goto free_left_path;
new_el = path_leaf_el(left_path);
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
le16_to_cpu(new_el->l_next_free_rec),
le16_to_cpu(new_el->l_count));
status = -EINVAL;
- goto out;
+ goto free_left_path;
}
rec = &new_el->l_recs[
le16_to_cpu(new_el->l_next_free_rec) - 1];
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
path->p_tree_depth > 0) {
status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
if (status)
- goto out;
+ goto free_left_path;
if (right_cpos == 0)
- goto out;
+ goto free_left_path;
right_path = ocfs2_new_path_from_path(path);
if (!right_path)
- goto out;
+ goto free_left_path;
status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
if (status)
- goto out;
+ goto free_right_path;
new_el = path_leaf_el(right_path);
rec = &new_el->l_recs[0];
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
(unsigned long long)le64_to_cpu(eb->h_blkno),
le16_to_cpu(new_el->l_next_free_rec));
status = -EINVAL;
- goto out;
+ goto free_right_path;
}
rec = &new_el->l_recs[1];
}
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
ret = contig_type;
}
-out:
- if (left_path)
- ocfs2_free_path(left_path);
- if (right_path)
- ocfs2_free_path(right_path);
-
+free_right_path:
+ ocfs2_free_path(right_path);
+free_left_path:
+ ocfs2_free_path(left_path);
+exit:
return ret;
}
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
- goto out;
+ return ret;
}
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto free_pages;
}
}
@@ -6996,9 +6993,8 @@ out_commit:
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
- if (pages)
- kfree(pages);
-
+free_pages:
+ kfree(pages);
return ret;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e1bf18c5d25e..8d2bc840c288 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -664,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb,
return 0;
}
+static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset,
+ u64 zero_len, int cluster_align)
+{
+ u32 p_cpos = 0;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+ int ret = 0;
+
+ if (offset <= i_size_read(inode) || cluster_align)
+ return 0;
+
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
+ &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ u64 s = i_size_read(inode);
+ sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
+ (do_div(s, osb->s_clustersize) >> 9);
+
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
+ zero_len >> 9, GFP_NOFS, false);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+ return ret;
+}
+
+static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset)
+{
+ u64 zero_start, zero_len, total_zero_len;
+ u32 p_cpos = 0, clusters_to_add;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+ u32 size_div, offset_div;
+ int ret = 0;
+
+ {
+ u64 o = offset;
+ u64 s = i_size_read(inode);
+
+ offset_div = do_div(o, osb->s_clustersize);
+ size_div = do_div(s, osb->s_clustersize);
+ }
+
+ if (offset <= i_size_read(inode))
+ return 0;
+
+ clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
+ ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
+ total_zero_len = offset - i_size_read(inode);
+ if (clusters_to_add)
+ total_zero_len -= offset_div;
+
+ /* Allocate clusters to fill out holes, and this is only needed
+ * when we add more than one clusters. Otherwise the cluster will
+ * be allocated during direct IO */
+ if (clusters_to_add > 1) {
+ ret = ocfs2_extend_allocation(inode,
+ OCFS2_I(inode)->ip_clusters,
+ clusters_to_add - 1, 0);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ while (total_zero_len) {
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
+ &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
+ size_div;
+ zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
+ size_div;
+ zero_len = min(total_zero_len, zero_len);
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev,
+ zero_start >> 9, zero_len >> 9,
+ GFP_NOFS, false);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ total_zero_len -= zero_len;
+ v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
+
+ /* Only at first iteration can be cluster not aligned.
+ * So set size_div to 0 for the rest */
+ size_div = 0;
+ }
+
+out:
+ return ret;
+}
+
static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
struct iov_iter *iter,
loff_t offset)
@@ -678,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
struct buffer_head *di_bh = NULL;
size_t count = iter->count;
journal_t *journal = osb->journal->j_journal;
- u32 zero_len;
- int cluster_align;
+ u64 zero_len_head, zero_len_tail;
+ int cluster_align_head, cluster_align_tail;
loff_t final_size = offset + count;
int append_write = offset >= i_size_read(inode) ? 1 : 0;
unsigned int num_clusters = 0;
@@ -687,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
{
u64 o = offset;
+ u64 s = i_size_read(inode);
+
+ zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
+ cluster_align_head = !zero_len_head;
- zero_len = do_div(o, 1 << osb->s_clustersize_bits);
- cluster_align = !zero_len;
+ zero_len_tail = osb->s_clustersize -
+ do_div(s, osb->s_clustersize);
+ if ((offset - i_size_read(inode)) < zero_len_tail)
+ zero_len_tail = offset - i_size_read(inode);
+ cluster_align_tail = !zero_len_tail;
}
/*
@@ -707,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
}
if (append_write) {
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
goto clean_orphan;
}
+ /* zeroing out the previously allocated cluster tail
+ * that but not zeroed */
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- ret = ocfs2_zero_extend(inode, di_bh, offset);
+ ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
+ zero_len_tail, cluster_align_tail);
else
- ret = ocfs2_extend_no_holes(inode, di_bh, offset,
+ ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
offset);
if (ret < 0) {
mlog_errno(ret);
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
goto clean_orphan;
}
@@ -729,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
if (is_overwrite < 0) {
mlog_errno(is_overwrite);
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
goto clean_orphan;
}
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
- di_bh = NULL;
}
written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
@@ -772,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
if (ret < 0)
mlog_errno(ret);
}
- } else if (written < 0 && append_write && !is_overwrite &&
- !cluster_align) {
+ } else if (written > 0 && append_write && !is_overwrite &&
+ !cluster_align_head) {
+ /* zeroing out the allocated cluster head */
u32 p_cpos = 0;
u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
&num_clusters, &ext_flags);
if (ret < 0) {
mlog_errno(ret);
+ ocfs2_inode_unlock(inode, 0);
goto clean_orphan;
}
@@ -788,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
ret = blkdev_issue_zeroout(osb->sb->s_bdev,
p_cpos << (osb->s_clustersize_bits - 9),
- zero_len >> 9, GFP_KERNEL, false);
+ zero_len_head >> 9, GFP_NOFS, false);
if (ret < 0)
mlog_errno(ret);
+
+ ocfs2_inode_unlock(inode, 0);
}
clean_orphan:
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 16eff45727ee..8e19b9d7aba8 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void)
int ret = -ENOMEM;
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
- if (!o2hb_debug_dir) {
+ if (IS_ERR_OR_NULL(o2hb_debug_dir)) {
+ ret = o2hb_debug_dir ?
+ PTR_ERR(o2hb_debug_dir) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_live_node_bitmap),
O2NM_MAX_NODES,
o2hb_live_node_bitmap);
- if (!o2hb_debug_livenodes) {
+ if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) {
+ ret = o2hb_debug_livenodes ?
+ PTR_ERR(o2hb_debug_livenodes) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_live_region_bitmap),
O2NM_MAX_REGIONS,
o2hb_live_region_bitmap);
- if (!o2hb_debug_liveregions) {
+ if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) {
+ ret = o2hb_debug_liveregions ?
+ PTR_ERR(o2hb_debug_liveregions) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_quorum_region_bitmap),
O2NM_MAX_REGIONS,
o2hb_quorum_region_bitmap);
- if (!o2hb_debug_quorumregions) {
+ if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) {
+ ret = o2hb_debug_quorumregions ?
+ PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_failed_region_bitmap),
O2NM_MAX_REGIONS,
o2hb_failed_region_bitmap);
- if (!o2hb_debug_failedregions) {
+ if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) {
+ ret = o2hb_debug_failedregions ?
+ PTR_ERR(o2hb_debug_failedregions) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
reg->hr_debug_dir =
debugfs_create_dir(config_item_name(&reg->hr_item), dir);
- if (!reg->hr_debug_dir) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_dir)) {
+ ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
O2HB_DB_TYPE_REGION_LIVENODES,
sizeof(reg->hr_live_node_bitmap),
O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_livenodes) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) {
+ ret = reg->hr_debug_livenodes ?
+ PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
sizeof(*(reg->hr_db_regnum)),
O2HB_DB_TYPE_REGION_NUMBER,
0, O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_regnum) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) {
+ ret = reg->hr_debug_regnum ?
+ PTR_ERR(reg->hr_debug_regnum) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
sizeof(*(reg->hr_db_elapsed_time)),
O2HB_DB_TYPE_REGION_ELAPSED_TIME,
0, 0, reg);
- if (!reg->hr_debug_elapsed_time) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) {
+ ret = reg->hr_debug_elapsed_time ?
+ PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
sizeof(*(reg->hr_db_pinned)),
O2HB_DB_TYPE_REGION_PINNED,
0, 0, reg);
- if (!reg->hr_debug_pinned) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) {
+ ret = reg->hr_debug_pinned ?
+ PTR_ERR(reg->hr_debug_pinned) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
- ret = 0;
+ return 0;
bail:
+ debugfs_remove_recursive(reg->hr_debug_dir);
return ret;
}
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9e6508..7fdc25a4d8c0 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
} \
} while (0)
-#define mlog_errno(st) do { \
+#define mlog_errno(st) ({ \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
_st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
_st != -EDQUOT) \
mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
-} while (0)
+ _st; \
+})
#define mlog_bug_on_msg(cond, fmt, args...) do { \
if (cond) { \
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b08050bd3f2e..ccd4dcfc3645 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -18,7 +18,7 @@
*
* linux/fs/minix/dir.c
*
- * Copyright (C) 1991, 1992 Linux Torvalds
+ * Copyright (C) 1991, 1992 Linus Torvalds
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen)
{
- int ret;
+ int ret = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
trace_ocfs2_check_dir_for_entry(
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
- ret = -EEXIST;
- if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
- goto bail;
+ if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
+ ret = -EEXIST;
+ mlog_errno(ret);
+ }
- ret = 0;
-bail:
ocfs2_free_dir_lookup_result(&lookup);
- if (ret)
- mlog_errno(ret);
return ret;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 11849a44dc5a..956edf67be20 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
int noqueue_attempted = 0;
int dlm_locked = 0;
+ if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
+ mlog_errno(-EINVAL);
+ return -EINVAL;
+ }
+
ocfs2_init_mask_waiter(&mw);
if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
@@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
osb->osb_debug_root,
osb,
&ocfs2_dlm_debug_fops);
- if (!dlm_debug->d_locking_state) {
+ if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) {
ret = -EINVAL;
mlog(ML_ERROR,
"Unable to create locking state debugfs file.\n");
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 29651167190d..540dc4bdd042 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
}
status = ocfs2_test_inode_bit(osb, blkno, &set);
- trace_ocfs2_get_dentry_test_bit(status, set);
if (status < 0) {
if (status == -EINVAL) {
/*
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
goto unlock_nfs_sync;
}
+ trace_ocfs2_get_dentry_test_bit(status, set);
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
status = -ESTALE;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 266845de2100..91f03ce98108 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2392,7 +2392,6 @@ relock:
/*
* for completing the rest of the request.
*/
- *ppos += written;
count -= written;
written_buffered = generic_perform_write(file, from, *ppos);
/*
@@ -2407,7 +2406,6 @@ relock:
goto out_dio;
}
- iocb->ki_pos = *ppos + written_buffered;
/* We need to ensure that the page cache pages are written to
* disk and invalidated to preserve the expected O_DIRECT
* semantics.
@@ -2416,6 +2414,7 @@ relock:
ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
endbyte);
if (ret == 0) {
+ iocb->ki_pos = *ppos + written_buffered;
written += written_buffered;
invalidate_mapping_pages(mapping,
*ppos >> PAGE_CACHE_SHIFT,
@@ -2438,10 +2437,14 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
+ if (unlikely(written <= 0))
+ goto no_sync;
+
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
- *ppos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
if (ret < 0)
written = ret;
@@ -2452,10 +2455,12 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, *ppos,
- *ppos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
}
+no_sync:
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3025c0da6b8a..be71ca0937f7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode,
ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
le16_to_cpu(di->i_suballoc_slot));
if (!inode_alloc_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto bail;
}
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
ORPHAN_DIR_SYSTEM_INODE,
orphaned_slot);
if (!orphan_dir_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 044013455621..857bbbcd39f3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
ocfs2_local_alloc_count_bits(alloc)) {
ocfs2_error(osb->sb, "local alloc inode %llu says it has "
- "%u free bits, but a count shows %u",
+ "%u used bits, but a count shows %u",
(unsigned long long)le64_to_cpu(alloc->i_blkno),
le32_to_cpu(alloc->id1.bitmap1.i_used),
ocfs2_local_alloc_count_bits(alloc));
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
- int numfound, bitoff, left, startoff, lastzero;
+ int numfound = 0, bitoff, left, startoff, lastzero;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b5c3a5ea3ee6..09f90cbf0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
trace_ocfs2_orphan_del(
(unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
- name, namelen);
+ name, strlen(name));
/* find it's spot in the orphan directory */
- status = ocfs2_find_entry(name, namelen, orphan_dir_inode,
+ status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
&lookup);
if (status) {
mlog_errno(status);
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
ORPHAN_DIR_SYSTEM_INODE,
osb->slot_num);
if (!orphan_dir_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto leave;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee541f92dab4..df3a500789c7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error) {
mlog_errno(error);
- goto out;
+ return error;
}
error = ocfs2_create_inode_in_orphan(dir, mode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d5493e361a38..e78a203d44c8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
if (!si) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ return status;
}
si->si_extended = ocfs2_uses_extended_slot_map(osb);
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
osb->slot_info = (struct ocfs2_slot_info *)si;
bail:
- if (status < 0 && si)
+ if (status < 0)
__ocfs2_free_slot_info(si);
return status;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 1724d43d3da1..220cae7bbdbc 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void)
set_bit(node_num, netmap);
if (!memcmp(hbmap, netmap, sizeof(hbmap)))
return 0;
- if (i < O2CB_MAP_STABILIZE_COUNT)
+ if (i < O2CB_MAP_STABILIZE_COUNT - 1)
msleep(1000);
}
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 720aa389e0ea..2768eb1da2b8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
BUG_ON(conn == NULL);
lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
- if (!lc) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!lc)
+ return -ENOMEM;
init_waitqueue_head(&lc->oc_wait);
init_completion(&lc->oc_sync_wait);
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
}
out:
- if (rc && lc)
+ if (rc)
kfree(lc);
return rc;
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0cb889a17ae1..4479029630bb 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
+ ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
+ start_bit, count);
goto bail;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26675185b886..837ddce4b659 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
ocfs2_debugfs_root);
- if (!osb->osb_debug_root) {
+ if (IS_ERR_OR_NULL(osb->osb_debug_root)) {
status = -EINVAL;
mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n");
goto read_super_error;
@@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root,
osb,
&ocfs2_osb_debug_fops);
- if (!osb->osb_ctxt) {
+ if (IS_ERR_OR_NULL(osb->osb_ctxt)) {
status = -EINVAL;
mlog_errno(status);
goto read_super_error;
@@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void)
}
ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
- if (!ocfs2_debugfs_root) {
- status = -ENOMEM;
+ if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) {
+ status = ocfs2_debugfs_root ?
+ PTR_ERR(ocfs2_debugfs_root) : -ENOMEM;
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
goto out4;
}
@@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
+ memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+ sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
@@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog_errno(status);
goto bail;
}
- cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb);
+ cleancache_init_shared_fs(sb);
bail:
return status;
@@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb)
ocfs2_set_ro_flag(osb, 0);
}
-static char error_buf[1024];
-
-void __ocfs2_error(struct super_block *sb,
- const char *function,
- const char *fmt, ...)
+void __ocfs2_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(error_buf, sizeof(error_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
/* Not using mlog here because we want to show the actual
* function the error came from. */
- printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
+ va_end(args);
ocfs2_handle_error(sb);
}
@@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb,
/* Handle critical errors. This is intentionally more drastic than
* ocfs2_handle_error, so we only use for things like journal errors,
* etc. */
-void __ocfs2_abort(struct super_block* sb,
- const char *function,
+void __ocfs2_abort(struct super_block *sb, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(error_buf, sizeof(error_buf), fmt, args);
- va_end(args);
- printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
+ va_end(args);
/* We don't have the cluster support yet to go straight to
* hard readonly in here. Until then, we want to keep
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 85b190dc132f..4ca7533be479 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode,
i,
&block_off,
&name_offset);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
xs->base = bucket_block(xs->bucket, block_off);
}
if (ocfs2_xattr_is_local(xs->here)) {
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
i, &xv, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
args->ref_ci,
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..6a83c47d5904 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
uid = make_kuid(current_user_ns(), user);
gid = make_kgid(current_user_ns(), group);
+retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
if (user != (uid_t) -1) {
if (!uid_valid(uid))
@@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-retry_deleg:
mutex_lock(&inode->i_mutex);
error = security_path_chown(path, uid, gid);
if (!error)
@@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
return ERR_PTR(err);
if (flags & O_CREAT)
return ERR_PTR(-EINVAL);
- if (!filename && (flags & O_DIRECTORY))
- if (!dentry->d_inode->i_op->lookup)
- return ERR_PTR(-ENOTDIR);
return do_file_open_root(dentry, mnt, filename, &op);
}
EXPORT_SYMBOL(file_open_root);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
mem_address = pdata->mem_address;
record_size = pdata->record_size;
dump_oops = pdata->dump_oops;
+ ramoops_console_size = pdata->console_size;
+ ramoops_pmsg_size = pdata->pmsg_size;
+ ramoops_ftrace_size = pdata->ftrace_size;
pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/read_write.c b/fs/read_write.c
index 99a6ef946d01..69128b378646 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -695,25 +695,23 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
}
EXPORT_SYMBOL(iov_shorten);
-static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
- unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
+ loff_t *ppos, iter_fn_t fn)
{
struct kiocb kiocb;
- struct iov_iter iter;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- iov_iter_init(&iter, rw, iov, nr_segs, len);
- ret = fn(&kiocb, &iter);
+ ret = fn(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
-static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter,
+ loff_t *ppos, iov_fn_t fn)
{
struct kiocb kiocb;
ssize_t ret;
@@ -721,30 +719,23 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
+ ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
/* Do it by hand, with file-ops */
-static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+ loff_t *ppos, io_fn_t fn)
{
- struct iovec *vector = iov;
ssize_t ret = 0;
- while (nr_segs > 0) {
- void __user *base;
- size_t len;
+ while (iov_iter_count(iter)) {
+ struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(filp, base, len, ppos);
+ nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
if (nr < 0) {
if (!ret)
@@ -752,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
break;
}
ret += nr;
- if (nr != len)
+ if (nr != iovec.iov_len)
break;
+ iov_iter_advance(iter, nr);
}
return ret;
@@ -844,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file,
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
+ struct iov_iter iter;
ssize_t ret;
io_fn_t fn;
iov_fn_t fnv;
iter_fn_t iter_fn;
- ret = rw_copy_check_uvector(type, uvector, nr_segs,
- ARRAY_SIZE(iovstack), iovstack, &iov);
- if (ret <= 0)
- goto out;
+ ret = import_iovec(type, uvector, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;
- tot_len = ret;
+ tot_len = iov_iter_count(&iter);
+ if (!tot_len)
+ goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
@@ -872,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file,
}
if (iter_fn)
- ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
- pos, iter_fn);
+ ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
else if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
+ ret = do_sync_readv_writev(file, &iter, pos, fnv);
else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, fn);
if (type != READ)
file_end_write(file);
out:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
@@ -1024,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
+ struct iov_iter iter;
ssize_t ret;
io_fn_t fn;
iov_fn_t fnv;
iter_fn_t iter_fn;
- ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
- UIO_FASTIOV, iovstack, &iov);
- if (ret <= 0)
- goto out;
+ ret = compat_import_iovec(type, uvector, nr_segs,
+ UIO_FASTIOV, &iov, &iter);
+ if (ret < 0)
+ return ret;
- tot_len = ret;
+ tot_len = iov_iter_count(&iter);
+ if (!tot_len)
+ goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
@@ -1052,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
}
if (iter_fn)
- ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
- pos, iter_fn);
+ ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
else if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
+ ret = do_sync_readv_writev(file, &iter, pos, fnv);
else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, fn);
if (type != READ)
file_end_write(file);
out:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
diff --git a/fs/splice.c b/fs/splice.c
index 4bbfa95b5bfe..41cbb16299e0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1533,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- ssize_t count;
pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
- ret = rw_copy_check_uvector(READ, uiov, nr_segs,
- ARRAY_SIZE(iovstack), iovstack, &iov);
- if (ret <= 0)
- goto out;
-
- count = ret;
- iov_iter_init(&iter, READ, iov, nr_segs, count);
+ ret = import_iovec(READ, uiov, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;
+ sd.total_len = iov_iter_count(&iter);
sd.len = 0;
- sd.total_len = count;
sd.flags = flags;
sd.u.data = &iter;
sd.pos = 0;
- pipe_lock(pipe);
- ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
- pipe_unlock(pipe);
-
-out:
- if (iov != iovstack)
- kfree(iov);
+ if (sd.total_len) {
+ pipe_lock(pipe);
+ ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
+ pipe_unlock(pipe);
+ }
+ kfree(iov);
return ret;
}
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3cef9927..19636af5e75c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
{
int retval;
- retval = security_inode_getattr(path->mnt, path->dentry);
+ retval = security_inode_getattr(path);
if (retval)
return retval;
return vfs_getattr_nosec(path, stat);
diff --git a/fs/super.c b/fs/super.c
index 2b7dc90ccdbb..928c20f47af9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
- s->cleancache_poolid = -1;
+ s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2554d8835b48..b400c04371f0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (grp->attrs) {
for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
- umode_t mode = 0;
+ umode_t mode = (*attr)->mode;
/*
* In update mode, we're changing the permissions or
@@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (!mode)
continue;
}
+
+ WARN(mode & ~(SYSFS_PREALLOC | 0664),
+ "Attribute %s: Invalid permissions 0%o\n",
+ (*attr)->name, mode);
+
+ mode &= SYSFS_PREALLOC | 0664;
error = sysfs_add_file_mode_ns(parent, *attr, false,
- (*attr)->mode | mode,
- NULL);
+ mode, NULL);
if (unlikely(error))
break;
}
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
+tracefs-objs := inode.o
+
+obj-$(CONFIG_TRACING) += tracefs.o
+
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
+/*
+ * inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+ * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
+ *
+ * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * tracefs is the file system that is used by the tracing infrastructure.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kobject.h>
+#include <linux/namei.h>
+#include <linux/tracefs.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+
+#define TRACEFS_DEFAULT_MODE 0700
+
+static struct vfsmount *tracefs_mount;
+static int tracefs_mount_count;
+static bool tracefs_registered;
+
+static ssize_t default_read_file(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return 0;
+}
+
+static ssize_t default_write_file(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+static const struct file_operations tracefs_file_operations = {
+ .read = default_read_file,
+ .write = default_write_file,
+ .open = simple_open,
+ .llseek = noop_llseek,
+};
+
+static struct tracefs_dir_ops {
+ int (*mkdir)(const char *name);
+ int (*rmdir)(const char *name);
+} tracefs_ops;
+
+static char *get_dname(struct dentry *dentry)
+{
+ const char *dname;
+ char *name;
+ int len = dentry->d_name.len;
+
+ dname = dentry->d_name.name;
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return NULL;
+ memcpy(name, dname, len);
+ name[len] = 0;
+ return name;
+}
+
+static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+{
+ char *name;
+ int ret;
+
+ name = get_dname(dentry);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * The mkdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * mkdir routine to handle races.
+ */
+ mutex_unlock(&inode->i_mutex);
+ ret = tracefs_ops.mkdir(name);
+ mutex_lock(&inode->i_mutex);
+
+ kfree(name);
+
+ return ret;
+}
+
+static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
+{
+ char *name;
+ int ret;
+
+ name = get_dname(dentry);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * The rmdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * rmdir routine to handle races.
+ * This time we need to unlock not only the parent (inode) but
+ * also the directory that is being deleted.
+ */
+ mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
+
+ ret = tracefs_ops.rmdir(name);
+
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock(&dentry->d_inode->i_mutex);
+
+ kfree(name);
+
+ return ret;
+}
+
+static const struct inode_operations tracefs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .mkdir = tracefs_syscall_mkdir,
+ .rmdir = tracefs_syscall_rmdir,
+};
+
+static struct inode *tracefs_get_inode(struct super_block *sb)
+{
+ struct inode *inode = new_inode(sb);
+ if (inode) {
+ inode->i_ino = get_next_ino();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ }
+ return inode;
+}
+
+struct tracefs_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
+enum {
+ Opt_uid,
+ Opt_gid,
+ Opt_mode,
+ Opt_err
+};
+
+static const match_table_t tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_mode, "mode=%o"},
+ {Opt_err, NULL}
+};
+
+struct tracefs_fs_info {
+ struct tracefs_mount_opts mount_opts;
+};
+
+static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+{
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int token;
+ kuid_t uid;
+ kgid_t gid;
+ char *p;
+
+ opts->mode = TRACEFS_DEFAULT_MODE;
+
+ while ((p = strsep(&data, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ opts->uid = uid;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ opts->gid = gid;
+ break;
+ case Opt_mode:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->mode = option & S_IALLUGO;
+ break;
+ /*
+ * We might like to report bad mount options here;
+ * but traditionally tracefs has ignored all mount options
+ */
+ }
+ }
+
+ return 0;
+}
+
+static int tracefs_apply_options(struct super_block *sb)
+{
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+ struct inode *inode = sb->s_root->d_inode;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+
+ inode->i_uid = opts->uid;
+ inode->i_gid = opts->gid;
+
+ return 0;
+}
+
+static int tracefs_remount(struct super_block *sb, int *flags, char *data)
+{
+ int err;
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+
+ sync_filesystem(sb);
+ err = tracefs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ tracefs_apply_options(sb);
+
+fail:
+ return err;
+}
+
+static int tracefs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+ if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->uid));
+ if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->gid));
+ if (opts->mode != TRACEFS_DEFAULT_MODE)
+ seq_printf(m, ",mode=%o", opts->mode);
+
+ return 0;
+}
+
+static const struct super_operations tracefs_super_operations = {
+ .statfs = simple_statfs,
+ .remount_fs = tracefs_remount,
+ .show_options = tracefs_show_options,
+};
+
+static int trace_fill_super(struct super_block *sb, void *data, int silent)
+{
+ static struct tree_descr trace_files[] = {{""}};
+ struct tracefs_fs_info *fsi;
+ int err;
+
+ save_mount_options(sb, data);
+
+ fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
+ sb->s_fs_info = fsi;
+ if (!fsi) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ err = tracefs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
+ if (err)
+ goto fail;
+
+ sb->s_op = &tracefs_super_operations;
+
+ tracefs_apply_options(sb);
+
+ return 0;
+
+fail:
+ kfree(fsi);
+ sb->s_fs_info = NULL;
+ return err;
+}
+
+static struct dentry *trace_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_single(fs_type, flags, data, trace_fill_super);
+}
+
+static struct file_system_type trace_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "tracefs",
+ .mount = trace_mount,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("tracefs");
+
+static struct dentry *start_creating(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+ int error;
+
+ pr_debug("tracefs: creating file '%s'\n",name);
+
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
+ /* If the parent is not specified, we create it in the root.
+ * We need the root dentry to do this, which is in the super
+ * block. A pointer to that is in the struct vfsmount that we
+ * have around.
+ */
+ if (!parent)
+ parent = tracefs_mount->mnt_root;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (!IS_ERR(dentry) && dentry->d_inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ }
+ if (IS_ERR(dentry))
+ mutex_unlock(&parent->d_inode->i_mutex);
+ return dentry;
+}
+
+static struct dentry *failed_creating(struct dentry *dentry)
+{
+ mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ dput(dentry);
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ return NULL;
+}
+
+static struct dentry *end_creating(struct dentry *dentry)
+{
+ mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ return dentry;
+}
+
+/**
+ * tracefs_create_file - create a file in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * file will be created in the root of the tracefs filesystem.
+ * @data: a pointer to something that the caller will want to get to later
+ * on. The inode.i_private pointer will point to this value on
+ * the open() call.
+ * @fops: a pointer to a struct file_operations that should be used for
+ * this file.
+ *
+ * This is the basic "create a file" function for tracefs. It allows for a
+ * wide range of flexibility in creating a file, or a directory (if you want
+ * to create a directory, the tracefs_create_dir() function is
+ * recommended to be used instead.)
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+ BUG_ON(!S_ISREG(mode));
+ dentry = start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+ return NULL;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+ inode->i_mode = mode;
+ inode->i_fop = fops ? fops : &tracefs_file_operations;
+ inode->i_private = data;
+ d_instantiate(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+}
+
+static struct dentry *__create_dir(const char *name, struct dentry *parent,
+ const struct inode_operations *ops)
+{
+ struct dentry *dentry = start_creating(name, parent);
+ struct inode *inode;
+
+ if (IS_ERR(dentry))
+ return NULL;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = ops;
+ inode->i_fop = &simple_dir_operations;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+}
+
+/**
+ * tracefs_create_dir - create a directory in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the directory to
+ * create.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * directory will be created in the root of the tracefs filesystem.
+ *
+ * This function creates a directory in tracefs with the given name.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed. If an error occurs, %NULL will be returned.
+ *
+ * If tracing is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
+{
+ return __create_dir(name, parent, &simple_dir_inode_operations);
+}
+
+/**
+ * tracefs_create_instance_dir - create the tracing instances directory
+ * @name: The name of the instances directory to create
+ * @parent: The parent directory that the instances directory will exist
+ * @mkdir: The function to call when a mkdir is performed.
+ * @rmdir: The function to call when a rmdir is performed.
+ *
+ * Only one instances directory is allowed.
+ *
+ * The instances directory is special as it allows for mkdir and rmdir to
+ * to be done by userspace. When a mkdir or rmdir is performed, the inode
+ * locks are released and the methhods passed in (@mkdir and @rmdir) are
+ * called without locks and with the name of the directory being created
+ * within the instances directory.
+ *
+ * Returns the dentry of the instances directory.
+ */
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+ int (*mkdir)(const char *name),
+ int (*rmdir)(const char *name))
+{
+ struct dentry *dentry;
+
+ /* Only allow one instance of the instances directory. */
+ if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+ return NULL;
+
+ dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
+ if (!dentry)
+ return NULL;
+
+ tracefs_ops.mkdir = mkdir;
+ tracefs_ops.rmdir = rmdir;
+
+ return dentry;
+}
+
+static inline int tracefs_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
+{
+ int ret = 0;
+
+ if (tracefs_positive(dentry)) {
+ if (dentry->d_inode) {
+ dget(dentry);
+ switch (dentry->d_inode->i_mode & S_IFMT) {
+ case S_IFDIR:
+ ret = simple_rmdir(parent->d_inode, dentry);
+ break;
+ default:
+ simple_unlink(parent->d_inode, dentry);
+ break;
+ }
+ if (!ret)
+ d_delete(dentry);
+ dput(dentry);
+ }
+ }
+ return ret;
+}
+
+/**
+ * tracefs_remove - removes a file or directory from the tracefs filesystem
+ * @dentry: a pointer to a the dentry of the file or directory to be
+ * removed.
+ *
+ * This function removes a file or directory in tracefs that was previously
+ * created with a call to another tracefs function (like
+ * tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove(struct dentry *dentry)
+{
+ struct dentry *parent;
+ int ret;
+
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ ret = __tracefs_remove(dentry, parent);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (!ret)
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+}
+
+/**
+ * tracefs_remove_recursive - recursively removes a directory
+ * @dentry: a pointer to a the dentry of the directory to be removed.
+ *
+ * This function recursively removes a directory tree in tracefs that
+ * was previously created with a call to another tracefs function
+ * (like tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove_recursive(struct dentry *dentry)
+{
+ struct dentry *child, *parent;
+
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ parent = dentry;
+ down:
+ mutex_lock(&parent->d_inode->i_mutex);
+ loop:
+ /*
+ * The parent->d_subdirs is protected by the d_lock. Outside that
+ * lock, the child can be unlinked and set to be freed which can
+ * use the d_u.d_child as the rcu head and corrupt this list.
+ */
+ spin_lock(&parent->d_lock);
+ list_for_each_entry(child, &parent->d_subdirs, d_child) {
+ if (!tracefs_positive(child))
+ continue;
+
+ /* perhaps simple_empty(child) makes more sense */
+ if (!list_empty(&child->d_subdirs)) {
+ spin_unlock(&parent->d_lock);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ parent = child;
+ goto down;
+ }
+
+ spin_unlock(&parent->d_lock);
+
+ if (!__tracefs_remove(child, parent))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+
+ /*
+ * The parent->d_lock protects agaist child from unlinking
+ * from d_subdirs. When releasing the parent->d_lock we can
+ * no longer trust that the next pointer is valid.
+ * Restart the loop. We'll skip this one with the
+ * tracefs_positive() check.
+ */
+ goto loop;
+ }
+ spin_unlock(&parent->d_lock);
+
+ mutex_unlock(&parent->d_inode->i_mutex);
+ child = parent;
+ parent = parent->d_parent;
+ mutex_lock(&parent->d_inode->i_mutex);
+
+ if (child != dentry)
+ /* go up */
+ goto loop;
+
+ if (!__tracefs_remove(child, parent))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ mutex_unlock(&parent->d_inode->i_mutex);
+}
+
+/**
+ * tracefs_initialized - Tells whether tracefs has been registered
+ */
+bool tracefs_initialized(void)
+{
+ return tracefs_registered;
+}
+
+static struct kobject *trace_kobj;
+
+static int __init tracefs_init(void)
+{
+ int retval;
+
+ trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
+ if (!trace_kobj)
+ return -EINVAL;
+
+ retval = register_filesystem(&trace_fs_type);
+ if (!retval)
+ tracefs_registered = true;
+
+ return retval;
+}
+core_initcall(tracefs_init);