diff options
Diffstat (limited to 'fs')
43 files changed, 630 insertions, 520 deletions
@@ -1367,6 +1367,39 @@ out: return ret; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) +{ + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; + + ret = get_user(ctx, ctx32p); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: ctx %lu nr_events %u\n", + ctx, nr_events); + goto out; + } + + ioctx = ioctx_alloc(nr_events); + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + /* truncating is ok because it's a user address */ + ret = put_user((u32)ioctx->user_id, ctx32p); + if (ret) + kill_ioctx(current->mm, ioctx, NULL); + percpu_ref_put(&ioctx->users); + } + +out: + return ret; +} +#endif + /* sys_io_destroy: * Destroy the aio_context specified. May cancel any outstanding * AIOs and block on completion. Will fail with -ENOSYS if not @@ -1591,8 +1624,8 @@ out_put_req: return ret; } -long do_io_submit(aio_context_t ctx_id, long nr, - struct iocb __user *__user *iocbpp, bool compat) +static long do_io_submit(aio_context_t ctx_id, long nr, + struct iocb __user *__user *iocbpp, bool compat) { struct kioctx *ctx; long ret = 0; @@ -1662,6 +1695,44 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, return do_io_submit(ctx_id, nr, iocbpp, 0); } +#ifdef CONFIG_COMPAT +static inline long +copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) +{ + compat_uptr_t uptr; + int i; + + for (i = 0; i < nr; ++i) { + if (get_user(uptr, ptr32 + i)) + return -EFAULT; + if (put_user(compat_ptr(uptr), ptr64 + i)) + return -EFAULT; + } + return 0; +} + +#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) + +COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, + int, nr, u32 __user *, iocb) +{ + struct iocb __user * __user *iocb64; + long ret; + + if (unlikely(nr < 0)) + return -EINVAL; + + if (nr > MAX_AIO_SUBMITS) + nr = MAX_AIO_SUBMITS; + + iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); + ret = copy_iocb(nr, iocb, iocb64); + if (!ret) + ret = do_io_submit(ctx_id, nr, iocb64, 1); + return ret; +} +#endif + /* lookup_kiocb * Finds a given iocb for cancellation. */ @@ -1761,3 +1832,25 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, } return ret; } + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct compat_timespec __user *, timeout) +{ + struct timespec t; + struct timespec __user *ut = NULL; + + if (timeout) { + if (compat_get_timespec(&t, timeout)) + return -EFAULT; + + ut = compat_alloc_user_space(sizeof(*ut)); + if (copy_to_user(ut, &t, sizeof(t))) + return -EFAULT; + } + return sys_io_getevents(ctx_id, min_nr, nr, events, ut); +} +#endif diff --git a/fs/befs/befs.h b/fs/befs/befs.h index c6bad51d8ec7..b914cfb03820 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -129,6 +129,7 @@ static inline befs_inode_addr blockno2iaddr(struct super_block *sb, befs_blocknr_t blockno) { befs_inode_addr iaddr; + iaddr.allocation_group = blockno >> BEFS_SB(sb)->ag_shift; iaddr.start = blockno - (iaddr.allocation_group << BEFS_SB(sb)->ag_shift); @@ -140,7 +141,7 @@ blockno2iaddr(struct super_block *sb, befs_blocknr_t blockno) static inline unsigned int befs_iaddrs_per_block(struct super_block *sb) { - return BEFS_SB(sb)->block_size / sizeof (befs_disk_inode_addr); + return BEFS_SB(sb)->block_size / sizeof(befs_disk_inode_addr); } #include "endian.h" diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index eb557d9dc8be..69c9d8cde955 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h @@ -55,12 +55,12 @@ enum super_flags { }; #define BEFS_BYTEORDER_NATIVE 0x42494745 -#define BEFS_BYTEORDER_NATIVE_LE (__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE) -#define BEFS_BYTEORDER_NATIVE_BE (__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE) +#define BEFS_BYTEORDER_NATIVE_LE ((__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE)) +#define BEFS_BYTEORDER_NATIVE_BE ((__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE)) #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 -#define BEFS_SUPER_MAGIC1_LE (__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1) -#define BEFS_SUPER_MAGIC1_BE (__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1) +#define BEFS_SUPER_MAGIC1_LE ((__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1)) +#define BEFS_SUPER_MAGIC1_BE ((__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1)) /* * Flags of inode @@ -79,7 +79,7 @@ enum inode_flags { BEFS_INODE_WAS_WRITTEN = 0x00020000, BEFS_NO_TRANSACTION = 0x00040000, }; -/* +/* * On-Disk datastructures of BeFS */ @@ -139,7 +139,7 @@ typedef struct { } PACKED befs_super_block; -/* +/* * Note: the indirect and dbl_indir block_runs may * be longer than one block! */ diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 7e135ea73fdd..d509887c580c 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -12,8 +12,8 @@ * * Dominic Giampaolo, author of "Practical File System * Design with the Be File System", for such a helpful book. - * - * Marcus J. Ranum, author of the b+tree package in + * + * Marcus J. Ranum, author of the b+tree package in * comp.sources.misc volume 10. This code is not copied from that * work, but it is partially based on it. * @@ -38,38 +38,38 @@ */ /* Befs B+tree structure: - * + * * The first thing in the tree is the tree superblock. It tells you * all kinds of useful things about the tree, like where the rootnode * is located, and the size of the nodes (always 1024 with current version * of BeOS). * * The rest of the tree consists of a series of nodes. Nodes contain a header - * (struct befs_btree_nodehead), the packed key data, an array of shorts + * (struct befs_btree_nodehead), the packed key data, an array of shorts * containing the ending offsets for each of the keys, and an array of - * befs_off_t values. In interior nodes, the keys are the ending keys for - * the childnode they point to, and the values are offsets into the - * datastream containing the tree. + * befs_off_t values. In interior nodes, the keys are the ending keys for + * the childnode they point to, and the values are offsets into the + * datastream containing the tree. */ /* Note: - * - * The book states 2 confusing things about befs b+trees. First, + * + * The book states 2 confusing things about befs b+trees. First, * it states that the overflow field of node headers is used by internal nodes * to point to another node that "effectively continues this one". Here is what * I believe that means. Each key in internal nodes points to another node that - * contains key values less than itself. Inspection reveals that the last key - * in the internal node is not the last key in the index. Keys that are - * greater than the last key in the internal node go into the overflow node. + * contains key values less than itself. Inspection reveals that the last key + * in the internal node is not the last key in the index. Keys that are + * greater than the last key in the internal node go into the overflow node. * I imagine there is a performance reason for this. * - * Second, it states that the header of a btree node is sufficient to - * distinguish internal nodes from leaf nodes. Without saying exactly how. + * Second, it states that the header of a btree node is sufficient to + * distinguish internal nodes from leaf nodes. Without saying exactly how. * After figuring out the first, it becomes obvious that internal nodes have * overflow nodes and leafnodes do not. */ -/* +/* * Currently, this code is only good for directory B+trees. * In order to be used for other BFS indexes, it needs to be extended to handle * duplicate keys and non-string keytypes (int32, int64, float, double). @@ -237,8 +237,8 @@ befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds, * with @key (usually the disk block number of an inode). * * On failure, returns BEFS_ERR or BEFS_BT_NOT_FOUND. - * - * Algorithm: + * + * Algorithm: * Read the superblock and rootnode of the b+tree. * Drill down through the interior nodes using befs_find_key(). * Once at the correct leaf node, use befs_find_key() again to get the @@ -402,12 +402,12 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node, * * Here's how it works: Key_no is the index of the key/value pair to * return in keybuf/value. - * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is + * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is * the number of characters in the key (just a convenience). * * Algorithm: * Get the first leafnode of the tree. See if the requested key is in that - * node. If not, follow the node->right link to the next leafnode. Repeat + * node. If not, follow the node->right link to the next leafnode. Repeat * until the (key_no)th key is found or the tree is out of keys. */ int @@ -536,7 +536,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds, * @node_off: Pointer to offset of current node within datastream. Modified * by the function. * - * Helper function for btree traverse. Moves the current position to the + * Helper function for btree traverse. Moves the current position to the * start of the first leaf node. * * Also checks for an empty tree. If there are no keys, returns BEFS_BT_EMPTY. @@ -592,10 +592,10 @@ befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds, } /** - * befs_leafnode - Determine if the btree node is a leaf node or an + * befs_leafnode - Determine if the btree node is a leaf node or an * interior node * @node: Pointer to node structure to test - * + * * Return 1 if leaf, 0 if interior */ static int @@ -656,7 +656,7 @@ befs_bt_valarray(struct befs_btree_node *node) * @node: Pointer to the node structure to find the keydata array within * * Returns a pointer to the start of the keydata array - * of the node pointed to by the node header + * of the node pointed to by the node header */ static char * befs_bt_keydata(struct befs_btree_node *node) @@ -702,7 +702,7 @@ befs_bt_get_key(struct super_block *sb, struct befs_btree_node *node, /** * befs_compare_strings - compare two strings - * @key1: pointer to the first key to be compared + * @key1: pointer to the first key to be compared * @keylen1: length in bytes of key1 * @key2: pointer to the second key to be compared * @keylen2: length in bytes of key2 diff --git a/fs/befs/btree.h b/fs/befs/btree.h index f2a8f637e9e0..60c6c728e64e 100644 --- a/fs/befs/btree.h +++ b/fs/befs/btree.h @@ -1,13 +1,11 @@ /* * btree.h - * + * */ - int befs_btree_find(struct super_block *sb, const befs_data_stream *ds, - const char *key, befs_off_t * value); + const char *key, befs_off_t *value); int befs_btree_read(struct super_block *sb, const befs_data_stream *ds, loff_t key_no, size_t bufsize, char *keybuf, - size_t * keysize, befs_off_t * value); - + size_t *keysize, befs_off_t *value); diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index b4c7ba013c0d..720b3bc5c16a 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -84,13 +84,11 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, * * Takes a file position and gives back a brun who's starting block * is block number fblock of the file. - * + * * Returns BEFS_OK or BEFS_ERR. - * + * * Calls specialized functions for each of the three possible * datastream regions. - * - * 2001-11-15 Will Dyson */ int befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, @@ -120,7 +118,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, /** * befs_read_lsmylink - read long symlink from datastream. - * @sb: Filesystem superblock + * @sb: Filesystem superblock * @ds: Datastream to read from * @buff: Buffer in which to place long symlink data * @len: Length of the long symlink in bytes diff --git a/fs/befs/datastream.h b/fs/befs/datastream.h index 91ba8203d83f..7ff9ff09ec6e 100644 --- a/fs/befs/datastream.h +++ b/fs/befs/datastream.h @@ -5,10 +5,10 @@ struct buffer_head *befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, - befs_off_t pos, uint * off); + befs_off_t pos, uint *off); int befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, - befs_blocknr_t fblock, befs_block_run * run); + befs_blocknr_t fblock, befs_block_run *run); size_t befs_read_lsymlink(struct super_block *sb, const befs_data_stream *data, void *buff, befs_off_t len); @@ -17,4 +17,3 @@ befs_blocknr_t befs_count_blocks(struct super_block *sb, const befs_data_stream *ds); extern const befs_inode_addr BAD_IADDR; - diff --git a/fs/befs/debug.c b/fs/befs/debug.c index 85c13392e9e8..36656c86f50e 100644 --- a/fs/befs/debug.c +++ b/fs/befs/debug.c @@ -1,6 +1,6 @@ /* * linux/fs/befs/debug.c - * + * * Copyright (C) 2001 Will Dyson (will_dyson at pobox.com) * * With help from the ntfs-tng driver by Anton Altparmakov @@ -57,6 +57,7 @@ befs_debug(const struct super_block *sb, const char *fmt, ...) struct va_format vaf; va_list args; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -67,7 +68,7 @@ befs_debug(const struct super_block *sb, const char *fmt, ...) } void -befs_dump_inode(const struct super_block *sb, befs_inode * inode) +befs_dump_inode(const struct super_block *sb, befs_inode *inode) { #ifdef CONFIG_BEFS_DEBUG @@ -151,7 +152,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) */ void -befs_dump_super_block(const struct super_block *sb, befs_super_block * sup) +befs_dump_super_block(const struct super_block *sb, befs_super_block *sup) { #ifdef CONFIG_BEFS_DEBUG @@ -202,7 +203,7 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup) #if 0 /* unused */ void -befs_dump_small_data(const struct super_block *sb, befs_small_data * sd) +befs_dump_small_data(const struct super_block *sb, befs_small_data *sd) { } @@ -221,7 +222,8 @@ befs_dump_run(const struct super_block *sb, befs_disk_block_run run) #endif /* 0 */ void -befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super * super) +befs_dump_index_entry(const struct super_block *sb, + befs_disk_btree_super *super) { #ifdef CONFIG_BEFS_DEBUG @@ -242,7 +244,7 @@ befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super * supe } void -befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead * node) +befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *node) { #ifdef CONFIG_BEFS_DEBUG diff --git a/fs/befs/inode.c b/fs/befs/inode.c index fa4b718de597..5367a6470a69 100644 --- a/fs/befs/inode.c +++ b/fs/befs/inode.c @@ -1,6 +1,6 @@ /* * inode.c - * + * * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com> */ @@ -10,12 +10,12 @@ #include "inode.h" /* - Validates the correctness of the befs inode - Returns BEFS_OK if the inode should be used, otherwise - returns BEFS_BAD_INODE -*/ + * Validates the correctness of the befs inode + * Returns BEFS_OK if the inode should be used, otherwise + * returns BEFS_BAD_INODE + */ int -befs_check_inode(struct super_block *sb, befs_inode * raw_inode, +befs_check_inode(struct super_block *sb, befs_inode *raw_inode, befs_blocknr_t inode) { u32 magic1 = fs32_to_cpu(sb, raw_inode->magic1); diff --git a/fs/befs/inode.h b/fs/befs/inode.h index 9dc7fd9b7570..2219e412f49b 100644 --- a/fs/befs/inode.h +++ b/fs/befs/inode.h @@ -1,8 +1,7 @@ /* * inode.h - * + * */ -int befs_check_inode(struct super_block *sb, befs_inode * raw_inode, +int befs_check_inode(struct super_block *sb, befs_inode *raw_inode, befs_blocknr_t inode); - diff --git a/fs/befs/io.c b/fs/befs/io.c index b4a558126ee1..227cb86e07fe 100644 --- a/fs/befs/io.c +++ b/fs/befs/io.c @@ -3,7 +3,7 @@ * * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com * - * Based on portions of file.c and inode.c + * Based on portions of file.c and inode.c * by Makoto Kato (m_kato@ga2.so-net.ne.jp) * * Many thanks to Dominic Giampaolo, author of Practical File System @@ -19,8 +19,7 @@ /* * Converts befs notion of disk addr to a disk offset and uses * linux kernel function sb_bread() to get the buffer containing - * the offset. -Will Dyson - * + * the offset. */ struct buffer_head * @@ -55,7 +54,7 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr) befs_debug(sb, "<--- %s", __func__); return bh; - error: +error: befs_debug(sb, "<--- %s ERROR", __func__); return NULL; } diff --git a/fs/befs/io.h b/fs/befs/io.h index 78d7bc6e60de..9b3e1967cb31 100644 --- a/fs/befs/io.h +++ b/fs/befs/io.h @@ -4,4 +4,3 @@ struct buffer_head *befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr); - diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 647a276eba56..19407165f4aa 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -18,6 +18,7 @@ #include <linux/parser.h> #include <linux/namei.h> #include <linux/sched.h> +#include <linux/exportfs.h> #include "befs.h" #include "btree.h" @@ -37,7 +38,8 @@ static int befs_readdir(struct file *, struct dir_context *); static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); -static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int); +static struct dentry *befs_lookup(struct inode *, struct dentry *, + unsigned int); static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); @@ -51,6 +53,10 @@ static void befs_put_super(struct super_block *); static int befs_remount(struct super_block *, int *, char *); static int befs_statfs(struct dentry *, struct kstatfs *); static int parse_options(char *, struct befs_mount_options *); +static struct dentry *befs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type); +static struct dentry *befs_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type); static const struct super_operations befs_sops = { .alloc_inode = befs_alloc_inode, /* allocate a new inode */ @@ -83,9 +89,14 @@ static const struct address_space_operations befs_symlink_aops = { .readpage = befs_symlink_readpage, }; -/* +static const struct export_operations befs_export_operations = { + .fh_to_dentry = befs_fh_to_dentry, + .fh_to_parent = befs_fh_to_parent, +}; + +/* * Called by generic_file_read() to read a page of data - * + * * In turn, simply calls a generic block read function and * passes it the address of befs_get_block, for mapping file * positions to disk blocks. @@ -102,15 +113,13 @@ befs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, befs_get_block); } -/* - * Generic function to map a file position (block) to a +/* + * Generic function to map a file position (block) to a * disk offset (passed back in bh_result). * * Used by many higher level functions. * * Calls befs_fblock2brun() in datastream.c to do the real work. - * - * -WD 10-26-01 */ static int @@ -269,15 +278,15 @@ befs_alloc_inode(struct super_block *sb) struct befs_inode_info *bi; bi = kmem_cache_alloc(befs_inode_cachep, GFP_KERNEL); - if (!bi) - return NULL; - return &bi->vfs_inode; + if (!bi) + return NULL; + return &bi->vfs_inode; } static void befs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); + kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } static void befs_destroy_inode(struct inode *inode) @@ -287,7 +296,7 @@ static void befs_destroy_inode(struct inode *inode) static void init_once(void *foo) { - struct befs_inode_info *bi = (struct befs_inode_info *) foo; + struct befs_inode_info *bi = (struct befs_inode_info *) foo; inode_init_once(&bi->vfs_inode); } @@ -338,7 +347,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) /* * set uid and gid. But since current BeOS is single user OS, so * you can change by "uid" or "gid" options. - */ + */ inode->i_uid = befs_sb->mount_opts.use_uid ? befs_sb->mount_opts.uid : @@ -353,14 +362,14 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) * BEFS's time is 64 bits, but current VFS is 32 bits... * BEFS don't have access time. Nor inode change time. VFS * doesn't have creation time. - * Also, the lower 16 bits of the last_modified_time and + * Also, the lower 16 bits of the last_modified_time and * create_time are just a counter to help ensure uniqueness * for indexing purposes. (PFD, page 54) */ inode->i_mtime.tv_sec = fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16; - inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */ + inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */ inode->i_ctime = inode->i_mtime; inode->i_atime = inode->i_mtime; @@ -414,10 +423,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) unlock_new_inode(inode); return inode; - unacquire_bh: +unacquire_bh: brelse(bh); - unacquire_none: +unacquire_none: iget_failed(inode); befs_debug(sb, "<--- %s - Bad inode", __func__); return ERR_PTR(-EIO); @@ -442,7 +451,7 @@ befs_init_inodecache(void) } /* Called at fs teardown. - * + * * Taken from NFS implementation by Al Viro. */ static void @@ -491,13 +500,10 @@ fail: } /* - * UTF-8 to NLS charset convert routine - * + * UTF-8 to NLS charset convert routine * - * Changed 8/10/01 by Will Dyson. Now use uni2char() / char2uni() rather than - * the nls tables directly + * Uses uni2char() / char2uni() rather than the nls tables directly */ - static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) @@ -521,9 +527,8 @@ befs_utf2nls(struct super_block *sb, const char *in, } *out = result = kmalloc(maxlen, GFP_NOFS); - if (!*out) { + if (!*out) return -ENOMEM; - } for (i = o = 0; i < in_len; i += utflen, o += unilen) { @@ -546,7 +551,7 @@ befs_utf2nls(struct super_block *sb, const char *in, return o; - conv_err: +conv_err: befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- %s", __func__); @@ -561,18 +566,18 @@ befs_utf2nls(struct super_block *sb, const char *in, * @in_len: Length of input string in bytes * @out: The output string in UTF-8 format * @out_len: Length of the output buffer - * + * * Converts input string @in, which is in the format of the loaded NLS map, * into a utf8 string. - * + * * The destination string @out is allocated by this function and the caller is * responsible for freeing it with kfree() - * + * * On return, *@out_len is the length of @out in bytes. * * On success, the return value is the number of utf8 characters written to * the output buffer @out. - * + * * On Failure, a negative number coresponding to the error code is returned. */ @@ -585,9 +590,11 @@ befs_nls2utf(struct super_block *sb, const char *in, wchar_t uni; int unilen, utflen; char *result; - /* There're nls characters that will translate to 3-chars-wide UTF-8 - * characters, a additional byte is needed to save the final \0 - * in special cases */ + /* + * There are nls characters that will translate to 3-chars-wide UTF-8 + * characters, an additional byte is needed to save the final \0 + * in special cases + */ int maxlen = (3 * in_len) + 1; befs_debug(sb, "---> %s\n", __func__); @@ -624,14 +631,41 @@ befs_nls2utf(struct super_block *sb, const char *in, return i; - conv_err: - befs_error(sb, "Name using charecter set %s contains a charecter that " +conv_err: + befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- %s", __func__); kfree(result); return -EILSEQ; } +static struct inode *befs_nfs_get_inode(struct super_block *sb, uint64_t ino, + uint32_t generation) +{ + /* No need to handle i_generation */ + return befs_iget(sb, ino); +} + +/* + * Map a NFS file handle to a corresponding dentry + */ +static struct dentry *befs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + befs_nfs_get_inode); +} + +/* + * Find the parent for a file specified by NFS handle + */ +static struct dentry *befs_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + befs_nfs_get_inode); +} + enum { Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, }; @@ -666,6 +700,7 @@ parse_options(char *options, struct befs_mount_options *opts) while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; @@ -721,7 +756,7 @@ parse_options(char *options, struct befs_mount_options *opts) } /* This function has the responsibiltiy of getting the - * filesystem ready for unmounting. + * filesystem ready for unmounting. * Basically, we free everything that we allocated in * befs_read_inode */ @@ -782,8 +817,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) * Linux 2.4.10 and later refuse to read blocks smaller than * the logical block size for the device. But we also need to read at * least 1k to get the second 512 bytes of the volume. - * -WD 10-26-01 - */ + */ blocksize = sb_min_blocksize(sb, 1024); if (!blocksize) { if (!silent) @@ -791,7 +825,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent) goto unacquire_priv_sbp; } - if (!(bh = sb_bread(sb, sb_block))) { + bh = sb_bread(sb, sb_block); + if (!bh) { if (!silent) befs_error(sb, "unable to read superblock"); goto unacquire_priv_sbp; @@ -816,7 +851,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); - if( befs_sb->num_blocks > ~((sector_t)0) ) { + if (befs_sb->num_blocks > ~((sector_t)0)) { if (!silent) befs_error(sb, "blocks count: %llu is larger than the host can use", befs_sb->num_blocks); @@ -831,6 +866,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) /* Set real blocksize of fs */ sb_set_blocksize(sb, (ulong) befs_sb->block_size); sb->s_op = &befs_sops; + sb->s_export_op = &befs_export_operations; root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); if (IS_ERR(root)) { ret = PTR_ERR(root); @@ -861,16 +897,16 @@ befs_fill_super(struct super_block *sb, void *data, int silent) } return 0; -/*****************/ - unacquire_bh: + +unacquire_bh: brelse(bh); - unacquire_priv_sbp: +unacquire_priv_sbp: kfree(befs_sb->mount_opts.iocharset); kfree(sb->s_fs_info); sb->s_fs_info = NULL; - unacquire_none: +unacquire_none: return ret; } @@ -919,7 +955,7 @@ static struct file_system_type befs_fs_type = { .name = "befs", .mount = befs_mount, .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("befs"); @@ -956,9 +992,9 @@ exit_befs_fs(void) } /* -Macros that typecheck the init and exit functions, -ensures that they are called at init and cleanup, -and eliminates warnings about unused functions. -*/ + * Macros that typecheck the init and exit functions, + * ensures that they are called at init and cleanup, + * and eliminates warnings about unused functions. + */ module_init(init_befs_fs) module_exit(exit_befs_fs) diff --git a/fs/befs/super.h b/fs/befs/super.h index dc4556376a22..ec1df30a7e9a 100644 --- a/fs/befs/super.h +++ b/fs/befs/super.h @@ -2,7 +2,5 @@ * super.h */ -int befs_load_sb(struct super_block *sb, befs_super_block * disk_sb); - +int befs_load_sb(struct super_block *sb, befs_super_block *disk_sb); int befs_check_sb(struct super_block *sb); - diff --git a/fs/compat.c b/fs/compat.c index 543b48c29ac3..3f4908c28698 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -487,45 +487,6 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, return compat_sys_fcntl64(fd, cmd, arg); } -COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) -{ - long ret; - aio_context_t ctx64; - - mm_segment_t oldfs = get_fs(); - if (unlikely(get_user(ctx64, ctx32p))) - return -EFAULT; - - set_fs(KERNEL_DS); - /* The __user pointer cast is valid because of the set_fs() */ - ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64); - set_fs(oldfs); - /* truncating is ok because it's a user address */ - if (!ret) - ret = put_user((u32) ctx64, ctx32p); - return ret; -} - -COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, - compat_long_t, min_nr, - compat_long_t, nr, - struct io_event __user *, events, - struct compat_timespec __user *, timeout) -{ - struct timespec t; - struct timespec __user *ut = NULL; - - if (timeout) { - if (compat_get_timespec(&t, timeout)) - return -EFAULT; - - ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t)) ) - return -EFAULT; - } - return sys_io_getevents(ctx_id, min_nr, nr, events, ut); -} - /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) @@ -602,42 +563,6 @@ out: return ret; } -static inline long -copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) -{ - compat_uptr_t uptr; - int i; - - for (i = 0; i < nr; ++i) { - if (get_user(uptr, ptr32 + i)) - return -EFAULT; - if (put_user(compat_ptr(uptr), ptr64 + i)) - return -EFAULT; - } - return 0; -} - -#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) - -COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, - int, nr, u32 __user *, iocb) -{ - struct iocb __user * __user *iocb64; - long ret; - - if (unlikely(nr < 0)) - return -EINVAL; - - if (nr > MAX_AIO_SUBMITS) - nr = MAX_AIO_SUBMITS; - - iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); - ret = copy_iocb(nr, iocb, iocb64); - if (!ret) - ret = do_io_submit(ctx_id, nr, iocb64, 1); - return ret; -} - struct compat_ncp_mount_data { compat_int_t version; compat_uint_t ncp_fd; diff --git a/fs/exec.c b/fs/exec.c index 8112eacf10f3..eadbf5069c38 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include <linux/slab.h> @@ -1268,6 +1268,13 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); current->personality &= ~bprm->per_clear; + /* + * We have to apply CLOEXEC before we change whether the process is + * dumpable (in setup_new_exec) to avoid a race with a process in userspace + * trying to access the should-be-closed file descriptors of a process + * undergoing exec(2). + */ + do_close_on_exec(current->files); return 0; out: @@ -1330,7 +1337,6 @@ void setup_new_exec(struct linux_binprm * bprm) group */ current->self_exec_id++; flush_signal_handlers(current, 0); - do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e173afe92661..0093ea2512a8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1478,6 +1478,10 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; else ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); + if (i_size_read(inode) < 0) { + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_dtime = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_state = 0; diff --git a/fs/namespace.c b/fs/namespace.c index f7e28f8ea04d..b5b1259e064f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -96,10 +96,6 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry) return &mountpoint_hashtable[tmp & mp_hash_mask]; } -/* - * allocation is serialized by namespace_sem, but we need the spinlock to - * serialize with freeing. - */ static int mnt_alloc_id(struct mount *mnt) { int res; @@ -1034,6 +1030,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (IS_MNT_SLAVE(old)) list_add(&mnt->mnt_slave, &old->mnt_slave); mnt->mnt_master = old->mnt_master; + } else { + CLEAR_MNT_SHARED(mnt); } if (flag & CL_MAKE_SHARED) set_mnt_shared(mnt); @@ -1828,9 +1826,7 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); - down_read(&namespace_sem); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); - up_read(&namespace_sem); if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cb22a9f9ae7e..fad81041f5ab 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1273,8 +1273,8 @@ out_error: */ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) { - int error; struct inode *inode = d_inode(dentry); + int error = 0; /* * I believe we can only get a negative dentry here in the case of a @@ -1293,7 +1293,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (nfs_mapping_need_revalidate_inode(inode)) + error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -2285,8 +2286,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str if (cache == NULL) goto out; /* Found an entry, is our attribute cache valid? */ - if (!nfs_attribute_cache_expired(inode) && - !(nfsi->cache_validity & NFS_INO_INVALID_ATTR)) + if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; err = -ECHILD; if (!may_block) @@ -2334,12 +2334,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, cache = NULL; if (cache == NULL) goto out; - err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode); - if (err) + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; + err = 0; out: rcu_read_unlock(); return err; @@ -2491,12 +2491,13 @@ EXPORT_SYMBOL_GPL(nfs_may_open); static int nfs_execute_ok(struct inode *inode, int mask) { struct nfs_server *server = NFS_SERVER(inode); - int ret; + int ret = 0; - if (mask & MAY_NOT_BLOCK) - ret = nfs_revalidate_inode_rcu(server, inode); - else - ret = nfs_revalidate_inode(server, inode); + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + ret = __nfs_revalidate_inode(server, inode); + } if (ret == 0 && !execute_ok(inode)) ret = -EACCES; return ret; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 55208b9b3c11..157cb43ce9db 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -101,21 +101,11 @@ EXPORT_SYMBOL_GPL(nfs_file_release); static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_inode *nfsi = NFS_I(inode); - const unsigned long force_reval = NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; - unsigned long cache_validity = nfsi->cache_validity; - - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && - (cache_validity & force_reval) != force_reval) - goto out_noreval; if (filp->f_flags & O_DIRECT) goto force_reval; - if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - goto force_reval; - if (nfs_attribute_timeout(inode)) + if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE)) goto force_reval; -out_noreval: return 0; force_reval: return __nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index a5589b791439..f956ca20a8a3 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -282,7 +282,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) s->nfs_client->cl_minorversion); out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9e111d07f667..45962fe5098c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1126,7 +1126,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: @@ -1175,7 +1176,8 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); } /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3cc39d1c1206..e5a6f248697b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -177,7 +177,7 @@ out_err: static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, struct nfs4_deviceid_node *devid) { - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); if (!ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5864146e05e6..011e4f8c1e01 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -160,6 +160,43 @@ int nfs_sync_mapping(struct address_space *mapping) return ret; } +static int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); +} + +static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + /* Special case for the pagecache or access cache */ + if (flags == NFS_INO_REVAL_PAGECACHE && + !(cache_validity & NFS_INO_REVAL_FORCED)) + return false; + return (cache_validity & flags) != 0; +} + +static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & flags) != 0) + return true; + if (nfs_attribute_timeout(inode)) + return true; + return false; +} + +bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) +{ + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return nfs_check_cache_invalid_delegated(inode, flags); + + return nfs_check_cache_invalid_not_delegated(inode, flags); +} + static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); @@ -795,6 +832,8 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) return; @@ -1044,13 +1083,6 @@ out: return status; } -int nfs_attribute_timeout(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); -} - int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) @@ -1073,15 +1105,6 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); -int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) -{ - if (!(NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - && !nfs_attribute_cache_expired(inode)) - return NFS_STALE(inode) ? -ESTALE : 0; - return -ECHILD; -} - static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1114,17 +1137,8 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map bool nfs_mapping_need_revalidate_inode(struct inode *inode) { - unsigned long cache_validity = NFS_I(inode)->cache_validity; - - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { - const unsigned long force_reval = - NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; - return (cache_validity & force_reval) == force_reval; - } - - return (cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) - || NFS_STALE(inode); + return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) || + NFS_STALE(inode); } int nfs_revalidate_mapping_rcu(struct inode *inode) @@ -1536,13 +1550,6 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr { unsigned long invalid = NFS_INO_INVALID_ATTR; - /* - * Don't revalidate the pagecache if we hold a delegation, but do - * force an attribute update - */ - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED; - if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6b79c2ca9b9a..09ca5095c04e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -381,6 +381,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); +extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_atomic_killable(atomic_t *p); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d33242c8d95d..6dcbc5defb7a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1089,8 +1089,15 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != dir->i_version) + if (cinfo->atomic && cinfo->before == dir->i_version) { + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + nfsi->attrtimeo_timestamp = jiffies; + } else { nfs_force_lookup_revalidate(dir); + if (cinfo->before != dir->i_version) + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; + } dir->i_version = cinfo->after; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); @@ -3115,6 +3122,16 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid = &calldata->res.stateid; renew_lease(server, calldata->timestamp); break; + case -NFS4ERR_ACCESS: + if (calldata->arg.bitmask != NULL) { + calldata->arg.bitmask = NULL; + calldata->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; + + } + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -3140,7 +3157,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); - nfs_refresh_inode(calldata->inode, calldata->res.fattr); + nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } @@ -3193,9 +3210,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } - if (calldata->arg.fmode == 0) { + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ if (!nfs4_have_delegation(inode, FMODE_READ)) calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; @@ -3207,7 +3225,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) nfs4_map_atomic_open_share(NFS_SERVER(inode), calldata->arg.fmode, 0); - nfs_fattr_init(calldata->res.fattr); + if (calldata->res.fattr == NULL) + calldata->arg.bitmask = NULL; + else if (calldata->arg.bitmask == NULL) + calldata->res.fattr = NULL; calldata->timestamp = jiffies; if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, @@ -3274,6 +3295,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; + nfs_fattr_init(&calldata->fattr); calldata->arg.fmode = 0; calldata->lr.arg.ld_private = &calldata->lr.ld_private; calldata->res.fattr = &calldata->fattr; @@ -5673,6 +5695,14 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_ACCESS: + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { @@ -5692,6 +5722,7 @@ static void nfs4_delegreturn_release(void *calldata) if (data->lr.roc) pnfs_roc_release(&data->lr.arg, &data->lr.res, data->res.lr_ret); + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); @@ -5780,10 +5811,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status != 0) goto out; status = data->rpc_status; - if (status == 0) - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); - else - nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 95baf7d340f0..1d152f4470cd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -494,21 +494,18 @@ nfs4_alloc_state_owner(struct nfs_server *server, } static void -nfs4_drop_state_owner(struct nfs4_state_owner *sp) -{ - struct rb_node *rb_node = &sp->so_server_node; - - if (!RB_EMPTY_NODE(rb_node)) { - struct nfs_server *server = sp->so_server; - struct nfs_client *clp = server->nfs_client; - - spin_lock(&clp->cl_lock); - if (!RB_EMPTY_NODE(rb_node)) { - rb_erase(rb_node, &server->state_owners); - RB_CLEAR_NODE(rb_node); - } - spin_unlock(&clp->cl_lock); - } +nfs4_reset_state_owner(struct nfs4_state_owner *sp) +{ + /* This state_owner is no longer usable, but must + * remain in place so that state recovery can find it + * and the opens associated with it. + * It may also be used for new 'open' request to + * return a delegation to the server. + * So update the 'create_time' so that it looks like + * a new state_owner. This will cause the server to + * request an OPEN_CONFIRM to start a new sequence. + */ + sp->so_seqid.create_time = ktime_get(); } static void nfs4_free_state_owner(struct nfs4_state_owner *sp) @@ -797,21 +794,33 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) /* * Search the state->lock_states for an existing lock_owner - * that is compatible with current->files + * that is compatible with either of the given owners. + * If the second is non-zero, then the first refers to a Posix-lock + * owner (current->files) and the second refers to a flock/OFD + * owner (struct file*). In that case, prefer a match for the first + * owner. + * If both sorts of locks are held on the one file we cannot know + * which stateid was intended to be used, so a "correct" choice cannot + * be made. Failing that, a "consistent" choice is preferable. The + * consistent choice we make is to prefer the first owner, that of a + * Posix lock. */ static struct nfs4_lock_state * __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, fl_owner_t fl_owner2) { - struct nfs4_lock_state *pos; + struct nfs4_lock_state *pos, *ret = NULL; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner && - pos->ls_owner != fl_owner2) - continue; - atomic_inc(&pos->ls_count); - return pos; + if (pos->ls_owner == fl_owner) { + ret = pos; + break; + } + if (pos->ls_owner == fl_owner2) + ret = pos; } - return NULL; + if (ret) + atomic_inc(&ret->ls_count); + return ret; } /* @@ -1101,7 +1110,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid); if (status == -NFS4ERR_BAD_SEQID) - nfs4_drop_state_owner(sp); + nfs4_reset_state_owner(sp); if (!nfs4_has_session(sp->so_server->nfs_client)) nfs_increment_seqid(status, seqid); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1af6268a7d8c..e9255cb453e6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -502,11 +502,13 @@ static int nfs4_stat_to_errno(int); (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_open_downgrade_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_open_downgrade_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2277,9 +2279,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); - encode_close(xdr, args, &hdr); if (args->bitmask != NULL) encode_getfattr(xdr, args->bitmask, &hdr); + encode_close(xdr, args, &hdr); encode_nops(&hdr); } @@ -2356,6 +2358,8 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); encode_open_downgrade(xdr, args, &hdr); encode_nops(&hdr); } @@ -2701,7 +2705,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); } @@ -6151,6 +6156,12 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } status = decode_open_downgrade(xdr, res); out: return status; @@ -6484,16 +6495,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; } + if (res->fattr != NULL) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_close(xdr, res); - if (status != 0) - goto out; - /* - * Note: Server may do delete on close for this file - * in which case the getattr call will fail with - * an ESTALE error. Shouldn't be a problem, - * though, since fattr->valid will remain unset. - */ - decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6966,9 +6973,11 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } - status = decode_getfattr(xdr, res->fattr, res->server); - if (status != 0) - goto out; + if (res->fattr) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_delegreturn(xdr); out: return status; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 896df7bdf85f..59554f3adf29 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1251,6 +1251,7 @@ bool pnfs_roc(struct inode *ino, nfs4_stateid stateid; enum pnfs_iomode iomode = 0; bool layoutreturn = false, roc = false; + bool skip_read = false; if (!nfs_have_layout(ino)) return false; @@ -1270,18 +1271,27 @@ retry: } /* no roc if we hold a delegation */ - if (nfs4_check_delegation(ino, FMODE_READ)) - goto out_noroc; + if (nfs4_check_delegation(ino, FMODE_READ)) { + if (nfs4_check_delegation(ino, FMODE_WRITE)) + goto out_noroc; + skip_read = true; + } list_for_each_entry(ctx, &nfsi->open_files, list) { state = ctx->state; + if (state == NULL) + continue; /* Don't return layout if there is open file state */ - if (state != NULL && state->state != 0) + if (state->state & FMODE_WRITE) goto out_noroc; + if (state->state & FMODE_READ) + skip_read = true; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { + if (skip_read && lseg->pls_range.iomode == IOMODE_READ) + continue; /* If we are sending layoutreturn, invalidate all valid lsegs */ if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) continue; diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 741077deef3b..a3645249f7ec 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, */ void fsnotify_unmount_inodes(struct super_block *sb) { - struct inode *inode, *next_i, *need_iput = NULL; + struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) { - struct inode *need_iput_tmp; - + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point @@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb) continue; } - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) - __iget(inode); - else - need_iput_tmp = NULL; + __iget(inode); spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - while (&next_i->i_sb_list != &sb->s_inodes) { - spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && - atomic_read(&next_i->i_count)) { - __iget(next_i); - need_iput = next_i; - spin_unlock(&next_i->i_lock); - break; - } - spin_unlock(&next_i->i_lock); - next_i = list_next_entry(next_i, i_sb_list); - } - - /* - * We can safely drop s_inode_list_lock here because either - * we actually hold references on both inode and next_i or - * end of list. Also no new inodes will be added since the - * umount has begun. - */ spin_unlock(&sb->s_inode_list_lock); - if (need_iput_tmp) - iput(need_iput_tmp); + if (iput_inode) + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); fsnotify_inode_delete(inode); - iput(inode); + iput_inode = inode; spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); } diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 87e577a49b0d..cec495a921e3 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -634,7 +634,15 @@ static void qsync_work_fn(struct work_struct *work) dqi_sync_work.work); struct super_block *sb = oinfo->dqi_gqinode->i_sb; - dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); + /* + * We have to be careful here not to deadlock on s_umount as umount + * disabling quotas may be in progress and it waits for this work to + * complete. If trylock fails, we'll do the sync next time... + */ + if (down_read_trylock(&sb->s_umount)) { + dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); + up_read(&sb->s_umount); + } schedule_delayed_work(&oinfo->dqi_sync_work, msecs_to_jiffies(oinfo->dqi_syncms)); } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 8a54fd8a4fa5..32c5a40c1257 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -454,7 +454,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. * The function expects local quota file to be already locked and - * dqonoff_mutex locked. */ + * s_umount locked in shared mode. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -597,7 +597,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -674,7 +674,7 @@ out_put: break; } out: - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + up_read(&sb->s_umount); kfree(rec); return status; } @@ -840,7 +840,10 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) } ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); - /* dqonoff_mutex protects us against racing with recovery thread... */ + /* + * s_umount held in exclusive mode protects us against racing with + * recovery thread... + */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); mark_clean = 0; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index d171d2c53f7f..f8933cb53d68 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4834,7 +4834,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); - if (ret || len == 0) + if (ret <= 0) goto out_unlock; /* Lock out changes to the allocation maps and remap. */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c894d945b084..a24e42f95341 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -985,7 +985,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; - /* Cancel periodic syncing before we grab dqonoff_mutex */ oinfo = sb_dqinfo(sb, type)->dqi_priv; cancel_delayed_work_sync(&oinfo->dqi_sync_work); inode = igrab(sb->s_dquot.files[type]); diff --git a/fs/pnode.c b/fs/pnode.c index 234a9ac49958..06a793f4ae38 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -67,49 +67,47 @@ int get_dominating_id(struct mount *mnt, const struct path *root) static int do_make_slave(struct mount *mnt) { - struct mount *peer_mnt = mnt, *master = mnt->mnt_master; - struct mount *slave_mnt; + struct mount *master, *slave_mnt; - /* - * slave 'mnt' to a peer mount that has the - * same root dentry. If none is available then - * slave it to anything that is available. - */ - while ((peer_mnt = next_peer(peer_mnt)) != mnt && - peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ; - - if (peer_mnt == mnt) { - peer_mnt = next_peer(mnt); - if (peer_mnt == mnt) - peer_mnt = NULL; - } - if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) && - list_empty(&mnt->mnt_share)) - mnt_release_group_id(mnt); - - list_del_init(&mnt->mnt_share); - mnt->mnt_group_id = 0; - - if (peer_mnt) - master = peer_mnt; - - if (master) { - list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) - slave_mnt->mnt_master = master; - list_move(&mnt->mnt_slave, &master->mnt_slave_list); - list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); - INIT_LIST_HEAD(&mnt->mnt_slave_list); + if (list_empty(&mnt->mnt_share)) { + if (IS_MNT_SHARED(mnt)) { + mnt_release_group_id(mnt); + CLEAR_MNT_SHARED(mnt); + } + master = mnt->mnt_master; + if (!master) { + struct list_head *p = &mnt->mnt_slave_list; + while (!list_empty(p)) { + slave_mnt = list_first_entry(p, + struct mount, mnt_slave); + list_del_init(&slave_mnt->mnt_slave); + slave_mnt->mnt_master = NULL; + } + return 0; + } } else { - struct list_head *p = &mnt->mnt_slave_list; - while (!list_empty(p)) { - slave_mnt = list_first_entry(p, - struct mount, mnt_slave); - list_del_init(&slave_mnt->mnt_slave); - slave_mnt->mnt_master = NULL; + struct mount *m; + /* + * slave 'mnt' to a peer mount that has the + * same root dentry. If none is available then + * slave it to anything that is available. + */ + for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) { + if (m->mnt.mnt_root == mnt->mnt.mnt_root) { + master = m; + break; + } } + list_del_init(&mnt->mnt_share); + mnt->mnt_group_id = 0; + CLEAR_MNT_SHARED(mnt); } + list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) + slave_mnt->mnt_master = master; + list_move(&mnt->mnt_slave, &master->mnt_slave_list); + list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); + INIT_LIST_HEAD(&mnt->mnt_slave_list); mnt->mnt_master = master; - CLEAR_MNT_SHARED(mnt); return 0; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8738a0d62c09..406fed92362a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -119,8 +119,7 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex - * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. + * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); @@ -572,7 +571,8 @@ int dquot_scan_active(struct super_block *sb, struct dquot *dquot, *old_dquot = NULL; int ret = 0; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + spin_lock(&dq_list_lock); list_for_each_entry(dquot, &inuse_list, dq_inuse) { if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) @@ -603,7 +603,6 @@ int dquot_scan_active(struct super_block *sb, spin_unlock(&dq_list_lock); out: dqput(old_dquot); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return ret; } EXPORT_SYMBOL(dquot_scan_active); @@ -617,7 +616,8 @@ int dquot_writeback_dquots(struct super_block *sb, int type) int cnt; int err, ret = 0; - mutex_lock(&dqopt->dqonoff_mutex); + WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -653,7 +653,6 @@ int dquot_writeback_dquots(struct super_block *sb, int type) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); dqstats_inc(DQST_SYNCS); - mutex_unlock(&dqopt->dqonoff_mutex); return ret; } @@ -683,7 +682,6 @@ int dquot_quota_sync(struct super_block *sb, int type) * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ - mutex_lock(&dqopt->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -693,7 +691,6 @@ int dquot_quota_sync(struct super_block *sb, int type) truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - mutex_unlock(&dqopt->dqonoff_mutex); return 0; } @@ -935,7 +932,7 @@ static int dqinit_needed(struct inode *inode, int type) return 0; } -/* This routine is guarded by dqonoff_mutex mutex */ +/* This routine is guarded by s_umount semaphore */ static void add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; @@ -2050,21 +2047,13 @@ int dquot_get_next_id(struct super_block *sb, struct kqid *qid) struct quota_info *dqopt = sb_dqopt(sb); int err; - mutex_lock(&dqopt->dqonoff_mutex); - if (!sb_has_quota_active(sb, qid->type)) { - err = -ESRCH; - goto out; - } - if (!dqopt->ops[qid->type]->get_next_id) { - err = -ENOSYS; - goto out; - } + if (!sb_has_quota_active(sb, qid->type)) + return -ESRCH; + if (!dqopt->ops[qid->type]->get_next_id) + return -ENOSYS; mutex_lock(&dqopt->dqio_mutex); err = dqopt->ops[qid->type]->get_next_id(sb, qid); mutex_unlock(&dqopt->dqio_mutex); -out: - mutex_unlock(&dqopt->dqonoff_mutex); - return err; } EXPORT_SYMBOL(dquot_get_next_id); @@ -2107,6 +2096,10 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) struct quota_info *dqopt = sb_dqopt(sb); struct inode *toputinode[MAXQUOTAS]; + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); + /* Cannot turn off usage accounting without turning off limits, or * suspend quotas and simultaneously turn quotas off. */ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED)) @@ -2114,18 +2107,14 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) DQUOT_USAGE_ENABLED))) return -EINVAL; - /* We need to serialize quota_off() for device */ - mutex_lock(&dqopt->dqonoff_mutex); - /* * Skip everything if there's nothing to do. We have to do this because * sometimes we are called when fill_super() failed and calling * sync_fs() in such cases does no good. */ - if (!sb_any_quota_loaded(sb)) { - mutex_unlock(&dqopt->dqonoff_mutex); + if (!sb_any_quota_loaded(sb)) return 0; - } + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { toputinode[cnt] = NULL; if (type != -1 && cnt != type) @@ -2179,7 +2168,6 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) dqopt->info[cnt].dqi_bgrace = 0; dqopt->ops[cnt] = NULL; } - mutex_unlock(&dqopt->dqonoff_mutex); /* Skip syncing and setting flags if quota files are hidden */ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) @@ -2196,20 +2184,14 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) * must also discard the blockdev buffers so that we see the * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (toputinode[cnt]) { - mutex_lock(&dqopt->dqonoff_mutex); - /* If quota was reenabled in the meantime, we have - * nothing to do */ - if (!sb_has_quota_loaded(sb, cnt)) { - inode_lock(toputinode[cnt]); - toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | + /* This can happen when suspending quotas on remount-ro... */ + if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) { + inode_lock(toputinode[cnt]); + toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); - truncate_inode_pages(&toputinode[cnt]->i_data, - 0); - inode_unlock(toputinode[cnt]); - mark_inode_dirty_sync(toputinode[cnt]); - } - mutex_unlock(&dqopt->dqonoff_mutex); + truncate_inode_pages(&toputinode[cnt]->i_data, 0); + inode_unlock(toputinode[cnt]); + mark_inode_dirty_sync(toputinode[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev); @@ -2281,6 +2263,10 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, error = -EINVAL; goto out_fmt; } + if (sb_has_quota_loaded(sb, type)) { + error = -EBUSY; + goto out_fmt; + } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* As we bypass the pagecache we must now flush all the @@ -2292,11 +2278,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, sync_filesystem(sb); invalidate_bdev(sb->s_bdev); } - mutex_lock(&dqopt->dqonoff_mutex); - if (sb_has_quota_loaded(sb, type)) { - error = -EBUSY; - goto out_lock; - } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* We don't want quota and atime on quota files (deadlocks @@ -2317,7 +2298,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, error = -EIO; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) - goto out_lock; + goto out_file_flags; error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) goto out_file_init; @@ -2340,14 +2321,13 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, spin_unlock(&dq_state_lock); add_dquot_ref(sb, type); - mutex_unlock(&dqopt->dqonoff_mutex); return 0; out_file_init: dqopt->files[type] = NULL; iput(inode); -out_lock: +out_file_flags: if (oldflags != -1) { inode_lock(inode); /* Set the flags back (in the case of accidental quotaon() @@ -2356,7 +2336,6 @@ out_lock: inode->i_flags |= oldflags; inode_unlock(inode); } - mutex_unlock(&dqopt->dqonoff_mutex); out_fmt: put_quota_format(fmt); @@ -2371,15 +2350,16 @@ int dquot_resume(struct super_block *sb, int type) int ret = 0, cnt; unsigned int flags; + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - - mutex_lock(&dqopt->dqonoff_mutex); - if (!sb_has_quota_suspended(sb, cnt)) { - mutex_unlock(&dqopt->dqonoff_mutex); + if (!sb_has_quota_suspended(sb, cnt)) continue; - } + inode = dqopt->files[cnt]; dqopt->files[cnt] = NULL; spin_lock(&dq_state_lock); @@ -2388,7 +2368,6 @@ int dquot_resume(struct super_block *sb, int type) cnt); dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); spin_unlock(&dq_state_lock); - mutex_unlock(&dqopt->dqonoff_mutex); flags = dquot_generic_flag(flags, cnt); ret = vfs_load_quota_inode(inode, cnt, @@ -2424,42 +2403,30 @@ EXPORT_SYMBOL(dquot_quota_on); int dquot_enable(struct inode *inode, int type, int format_id, unsigned int flags) { - int ret = 0; struct super_block *sb = inode->i_sb; - struct quota_info *dqopt = sb_dqopt(sb); /* Just unsuspend quotas? */ BUG_ON(flags & DQUOT_SUSPENDED); + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); if (!flags) return 0; /* Just updating flags needed? */ if (sb_has_quota_loaded(sb, type)) { - mutex_lock(&dqopt->dqonoff_mutex); - /* Now do a reliable test... */ - if (!sb_has_quota_loaded(sb, type)) { - mutex_unlock(&dqopt->dqonoff_mutex); - goto load_quota; - } if (flags & DQUOT_USAGE_ENABLED && - sb_has_quota_usage_enabled(sb, type)) { - ret = -EBUSY; - goto out_lock; - } + sb_has_quota_usage_enabled(sb, type)) + return -EBUSY; if (flags & DQUOT_LIMITS_ENABLED && - sb_has_quota_limits_enabled(sb, type)) { - ret = -EBUSY; - goto out_lock; - } + sb_has_quota_limits_enabled(sb, type)) + return -EBUSY; spin_lock(&dq_state_lock); sb_dqopt(sb)->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); -out_lock: - mutex_unlock(&dqopt->dqonoff_mutex); - return ret; + return 0; } -load_quota: return vfs_load_quota_inode(inode, type, format_id, flags); } EXPORT_SYMBOL(dquot_enable); @@ -2751,7 +2718,6 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state) struct quota_info *dqopt = sb_dqopt(sb); int type; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); memset(state, 0, sizeof(*state)); for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_active(sb, type)) @@ -2773,7 +2739,6 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state) tstate->nextents = 1; /* We don't know... */ spin_unlock(&dq_data_lock); } - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return 0; } EXPORT_SYMBOL(dquot_get_state); @@ -2787,18 +2752,13 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) if ((ii->i_fieldmask & QC_WARNS_MASK) || (ii->i_fieldmask & QC_RT_SPC_TIMER)) return -EINVAL; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_active(sb, type)) { - err = -ESRCH; - goto out; - } + if (!sb_has_quota_active(sb, type)) + return -ESRCH; mi = sb_dqopt(sb)->info + type; if (ii->i_fieldmask & QC_FLAGS) { if ((ii->i_flags & QCI_ROOT_SQUASH && - mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) { - err = -EINVAL; - goto out; - } + mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) + return -EINVAL; } spin_lock(&dq_data_lock); if (ii->i_fieldmask & QC_SPC_TIMER) @@ -2815,8 +2775,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) mark_info_dirty(sb, type); /* Force write to disk */ sb->dq_op->write_info(sb, type); -out: - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return err; } EXPORT_SYMBOL(dquot_set_dqinfo); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 5acd0c4769af..07e08c7d05ca 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -104,13 +104,9 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_active(sb, type)) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + if (!sb_has_quota_active(sb, type)) return -ESRCH; - } fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; @@ -789,9 +785,14 @@ static int quotactl_cmd_write(int cmd) } return 1; } - #endif /* CONFIG_BLOCK */ +/* Return true if quotactl command is manipulating quota on/off state */ +static bool quotactl_cmd_onoff(int cmd) +{ + return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF); +} + /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon @@ -809,7 +810,9 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) putname(tmp); if (IS_ERR(bdev)) return ERR_CAST(bdev); - if (quotactl_cmd_write(cmd)) + if (quotactl_cmd_onoff(cmd)) + sb = get_super_exclusive_thawed(bdev); + else if (quotactl_cmd_write(cmd)) sb = get_super_thawed(bdev); else sb = get_super(bdev); @@ -872,7 +875,10 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, ret = do_quotactl(sb, type, cmds, id, addr, pathp); - drop_super(sb); + if (!quotactl_cmd_onoff(cmds)) + drop_super(sb); + else + drop_super_exclusive(sb); out: if (pathp && !IS_ERR(pathp)) path_put(pathp); diff --git a/fs/read_write.c b/fs/read_write.c index da6de12b5c46..7537b6b6b5a2 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1669,6 +1669,9 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) * Check that the two inodes are eligible for cloning, the ranges make * sense, and then flush all dirty data. Caller must ensure that the * inodes have been locked against any other modifications. + * + * Returns: 0 for "nothing to clone", 1 for "something to clone", or + * the usual negative error code. */ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, struct inode *inode_out, loff_t pos_out, @@ -1695,17 +1698,15 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, /* Are we going all the way to the end? */ isize = i_size_read(inode_in); - if (isize == 0) { - *len = 0; + if (isize == 0) return 0; - } /* Zero length dedupe exits immediately; reflink goes to EOF. */ if (*len == 0) { - if (is_dedupe) { - *len = 0; + if (is_dedupe || pos_in == isize) return 0; - } + if (pos_in > isize) + return -EINVAL; *len = isize - pos_in; } @@ -1769,7 +1770,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, return -EBADE; } - return 0; + return 1; } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); @@ -1955,6 +1956,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) goto out; ret = 0; + if (off + len > i_size_read(src)) + return -EINVAL; + /* pre-format output fields to sane values */ for (i = 0; i < count; i++) { same->info[i].bytes_deduped = 0ULL; diff --git a/fs/seq_file.c b/fs/seq_file.c index 368bfb92b115..a11f271800ef 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -190,6 +190,13 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) */ m->version = file->f_version; + /* + * if request is to read from zero offset, reset iterator to first + * record as it might have been already advanced by previous requests + */ + if (*ppos == 0) + m->index = 0; + /* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { while ((err = traverse(m, *ppos)) == -EAGAIN) diff --git a/fs/splice.c b/fs/splice.c index 8ed7c9d8c0fb..873d83104e79 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1087,7 +1087,13 @@ EXPORT_SYMBOL(do_splice_direct); static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { - while (pipe->nrbufs == pipe->buffers) { + for (;;) { + if (unlikely(!pipe->readers)) { + send_sig(SIGPIPE, current, 0); + return -EPIPE; + } + if (pipe->nrbufs != pipe->buffers) + return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) @@ -1096,7 +1102,6 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) pipe_wait(pipe); pipe->waiting_writers--; } - return 0; } static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, diff --git a/fs/super.c b/fs/super.c index c183835566c1..1709ed029a2c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -244,7 +244,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, mutex_init(&s->s_vfs_rename_mutex); lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; @@ -558,6 +557,13 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); +void drop_super_exclusive(struct super_block *sb) +{ + up_write(&sb->s_umount); + put_super(sb); +} +EXPORT_SYMBOL(drop_super_exclusive); + /** * iterate_supers - call function for all active superblocks * @f: function to call @@ -628,15 +634,7 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -/** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ - -struct super_block *get_super(struct block_device *bdev) +static struct super_block *__get_super(struct block_device *bdev, bool excl) { struct super_block *sb; @@ -651,11 +649,17 @@ rescan: if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + if (!excl) + down_read(&sb->s_umount); + else + down_write(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & MS_BORN)) return sb; - up_read(&sb->s_umount); + if (!excl) + up_read(&sb->s_umount); + else + up_write(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); @@ -666,32 +670,67 @@ rescan: return NULL; } -EXPORT_SYMBOL(get_super); - /** - * get_super_thawed - get thawed superblock of a device + * get_super - get the superblock of a device * @bdev: device to get the superblock for * * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen). %NULL is returned if no match - * is found. + * mounted on the device given. %NULL is returned if no match is found. */ -struct super_block *get_super_thawed(struct block_device *bdev) +struct super_block *get_super(struct block_device *bdev) +{ + return __get_super(bdev, false); +} +EXPORT_SYMBOL(get_super); + +static struct super_block *__get_super_thawed(struct block_device *bdev, + bool excl) { while (1) { - struct super_block *s = get_super(bdev); + struct super_block *s = __get_super(bdev, excl); if (!s || s->s_writers.frozen == SB_UNFROZEN) return s; - up_read(&s->s_umount); + if (!excl) + up_read(&s->s_umount); + else + up_write(&s->s_umount); wait_event(s->s_writers.wait_unfrozen, s->s_writers.frozen == SB_UNFROZEN); put_super(s); } } + +/** + * get_super_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen). %NULL is returned if no match + * is found. + */ +struct super_block *get_super_thawed(struct block_device *bdev) +{ + return __get_super_thawed(bdev, false); +} EXPORT_SYMBOL(get_super_thawed); /** + * get_super_exclusive_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen) and s_umount semaphore is held + * in exclusive mode. %NULL is returned if no match is found. + */ +struct super_block *get_super_exclusive_thawed(struct block_device *bdev) +{ + return __get_super_thawed(bdev, true); +} +EXPORT_SYMBOL(get_super_exclusive_thawed); + +/** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for * diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 45ceb94e89e4..1bc0bd6a9848 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -1191,7 +1191,7 @@ out: return err; } -void ufs_truncate_blocks(struct inode *inode) +static void ufs_truncate_blocks(struct inode *inode) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index aca2d4bd4303..07593a362cd0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1161,7 +1161,7 @@ xfs_reflink_remap_range( ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); - if (ret || len == 0) + if (ret <= 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); |