summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-10-30 12:42:58 -0400
committerDavid S. Miller <davem@davemloft.net>2016-10-30 12:42:58 -0400
commit27058af401e49d88a905df000dd26f443fcfa8ce (patch)
tree819f32113d3b8374b9fbf72e2202d4c4d4511a60 /fs
parent357f4aae859b5d74554b0ccbb18556f1df4166c3 (diff)
parent2a26d99b251b8625d27aed14e97fc10707a3a81f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Mostly simple overlapping changes. For example, David Ahern's adjacency list revamp in 'net-next' conflicted with an adjacency list traversal bug fix in 'net'. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c6
-rw-r--r--fs/afs/fsclient.c4
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/rxrpc.c3
-rw-r--r--fs/befs/befs.h19
-rw-r--r--fs/befs/btree.c60
-rw-r--r--fs/befs/datastream.c253
-rw-r--r--fs/befs/debug.c1
-rw-r--r--fs/befs/io.c26
-rw-r--r--fs/befs/io.h2
-rw-r--r--fs/befs/linuxvfs.c130
-rw-r--r--fs/befs/super.c36
-rw-r--r--fs/btrfs/compression.c4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c33
-rw-r--r--fs/btrfs/extent_io.c64
-rw-r--r--fs/btrfs/extent_io.h22
-rw-r--r--fs/btrfs/free-space-tree.c19
-rw-r--r--fs/btrfs/send.c58
-rw-r--r--fs/btrfs/tests/extent-io-tests.c87
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c189
-rw-r--r--fs/btrfs/tree-log.c20
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c3
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c4
-rw-r--r--fs/cifs/cifs_debug.c1
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_ioctl.h8
-rw-r--r--fs/cifs/cifsacl.c123
-rw-r--r--fs/cifs/cifsfs.c11
-rw-r--r--fs/cifs/cifsglob.h50
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c43
-rw-r--r--fs/cifs/file.c105
-rw-r--r--fs/cifs/ioctl.c16
-rw-r--r--fs/cifs/misc.c15
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/smb2inode.c6
-rw-r--r--fs/cifs/smb2misc.c16
-rw-r--r--fs/cifs/smb2ops.c62
-rw-r--r--fs/cifs/smb2pdu.c587
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/cifs/xattr.c62
-rw-r--r--fs/crypto/crypto.c15
-rw-r--r--fs/crypto/policy.c4
-rw-r--r--fs/exec.c9
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext4/block_validity.c4
-rw-r--r--fs/ext4/mballoc.h17
-rw-r--r--fs/ext4/namei.c18
-rw-r--r--fs/ext4/super.c21
-rw-r--r--fs/ext4/sysfs.c4
-rw-r--r--fs/ext4/xattr.c20
-rw-r--r--fs/f2fs/gc.c10
-rw-r--r--fs/iomap.c5
-rw-r--r--fs/isofs/inode.c8
-rw-r--r--fs/jbd2/transaction.c3
-rw-r--r--fs/kernfs/dir.c84
-rw-r--r--fs/kernfs/file.c1
-rw-r--r--fs/locks.c6
-rw-r--r--fs/namei.c25
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/nfs/blocklayout/blocklayout.c3
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/orangefs/dcache.c5
-rw-r--r--fs/orangefs/file.c14
-rw-r--r--fs/orangefs/namei.c8
-rw-r--r--fs/orangefs/orangefs-kernel.h7
-rw-r--r--fs/overlayfs/copy_up.c67
-rw-r--r--fs/overlayfs/dir.c5
-rw-r--r--fs/overlayfs/inode.c44
-rw-r--r--fs/overlayfs/super.c44
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/task_mmu.c29
-rw-r--r--fs/proc/task_nommu.c28
-rw-r--r--fs/read_write.c29
-rw-r--r--fs/super.c43
-rw-r--r--fs/sysfs/dir.c6
-rw-r--r--fs/ubifs/dir.c28
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c418
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h8
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c3
-rw-r--r--fs/xfs/libxfs/xfs_format.h1
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c13
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h2
-rw-r--r--fs/xfs/xfs_file.c232
-rw-r--r--fs/xfs/xfs_icache.c8
-rw-r--r--fs/xfs/xfs_iomap.c57
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_reflink.c499
-rw-r--r--fs/xfs/xfs_reflink.h11
-rw-r--r--fs/xfs/xfs_sysfs.c4
-rw-r--r--fs/xfs/xfs_trace.h4
99 files changed, 2368 insertions, 1719 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 2037e7a77a37..d764236072b1 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -91,11 +91,9 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
*/
bool afs_cm_incoming_call(struct afs_call *call)
{
- u32 operation_id = ntohl(call->operation_ID);
+ _enter("{CB.OP %u}", call->operation_ID);
- _enter("{CB.OP %u}", operation_id);
-
- switch (operation_id) {
+ switch (call->operation_ID) {
case CBCallBack:
call->type = &afs_SRXCBCallBack;
return true;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 96f4d764d1a6..31c616ab9b40 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -364,7 +364,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
buffer = kmap(page);
ret = afs_extract_data(call, buffer,
call->count, true);
- kunmap(buffer);
+ kunmap(page);
if (ret < 0)
return ret;
}
@@ -397,7 +397,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
page = call->reply3;
buffer = kmap(page);
memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap(buffer);
+ kunmap(page);
}
_leave(" = 0 [done]");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5497c8496055..535a38d2c1d0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -112,7 +112,7 @@ struct afs_call {
bool need_attention; /* T if RxRPC poked us */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
- __be32 operation_ID; /* operation ID for an incoming call */
+ u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
afs_dataversion_t store_version; /* updated version expected from store */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 477928b25940..25f05a8d21b1 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -676,10 +676,11 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
ASSERTCMP(call->offset, <, 4);
/* the operation ID forms the first four bytes of the request data */
- ret = afs_extract_data(call, &call->operation_ID, 4, true);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
+ call->operation_ID = ntohl(call->tmp);
call->state = AFS_CALL_AWAIT_REQUEST;
call->offset = 0;
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index e0f59263a96d..c6bad51d8ec7 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -43,7 +43,10 @@ struct befs_sb_info {
u32 ag_shift;
u32 num_ags;
- /* jornal log entry */
+ /* State of the superblock */
+ u32 flags;
+
+ /* Journal log entry */
befs_block_run log_blocks;
befs_off_t log_start;
befs_off_t log_end;
@@ -79,7 +82,7 @@ enum befs_err {
BEFS_BT_END,
BEFS_BT_EMPTY,
BEFS_BT_MATCH,
- BEFS_BT_PARMATCH,
+ BEFS_BT_OVERFLOW,
BEFS_BT_NOT_FOUND
};
@@ -140,18 +143,6 @@ befs_iaddrs_per_block(struct super_block *sb)
return BEFS_SB(sb)->block_size / sizeof (befs_disk_inode_addr);
}
-static inline int
-befs_iaddr_is_empty(const befs_inode_addr *iaddr)
-{
- return (!iaddr->allocation_group) && (!iaddr->start) && (!iaddr->len);
-}
-
-static inline size_t
-befs_brun_size(struct super_block *sb, befs_block_run run)
-{
- return BEFS_SB(sb)->block_size * run.len;
-}
-
#include "endian.h"
#endif /* _LINUX_BEFS_H */
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 307645f9e284..7e135ea73fdd 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -85,7 +85,7 @@ struct befs_btree_node {
};
/* local constants */
-static const befs_off_t befs_bt_inval = 0xffffffffffffffffULL;
+static const befs_off_t BEFS_BT_INVAL = 0xffffffffffffffffULL;
/* local functions */
static int befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds,
@@ -156,8 +156,6 @@ befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
sup->max_depth = fs32_to_cpu(sb, od_sup->max_depth);
sup->data_type = fs32_to_cpu(sb, od_sup->data_type);
sup->root_node_ptr = fs64_to_cpu(sb, od_sup->root_node_ptr);
- sup->free_node_ptr = fs64_to_cpu(sb, od_sup->free_node_ptr);
- sup->max_size = fs64_to_cpu(sb, od_sup->max_size);
brelse(bh);
if (sup->magic != BEFS_BTREE_MAGIC) {
@@ -183,8 +181,8 @@ befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
* Calls befs_read_datastream to read in the indicated btree node and
* makes sure its header fields are in cpu byteorder, byteswapping if
* necessary.
- * Note: node->bh must be NULL when this function called first
- * time. Don't forget brelse(node->bh) after last call.
+ * Note: node->bh must be NULL when this function is called the first time.
+ * Don't forget brelse(node->bh) after last call.
*
* On success, returns BEFS_OK and *@node contains the btree node that
* starts at @node_off, with the node->head fields in cpu byte order.
@@ -244,7 +242,7 @@ befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds,
* Read the superblock and rootnode of the b+tree.
* Drill down through the interior nodes using befs_find_key().
* Once at the correct leaf node, use befs_find_key() again to get the
- * actuall value stored with the key.
+ * actual value stored with the key.
*/
int
befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
@@ -283,9 +281,9 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
while (!befs_leafnode(this_node)) {
res = befs_find_key(sb, this_node, key, &node_off);
- if (res == BEFS_BT_NOT_FOUND)
+ /* if no key set, try the overflow node */
+ if (res == BEFS_BT_OVERFLOW)
node_off = this_node->head.overflow;
- /* if no match, go to overflow node */
if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) {
befs_error(sb, "befs_btree_find() failed to read "
"node at %llu", node_off);
@@ -293,15 +291,15 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
}
}
- /* at the correct leaf node now */
-
+ /* at a leaf node now, check if it is correct */
res = befs_find_key(sb, this_node, key, value);
brelse(this_node->bh);
kfree(this_node);
if (res != BEFS_BT_MATCH) {
- befs_debug(sb, "<--- %s Key %s not found", __func__, key);
+ befs_error(sb, "<--- %s Key %s not found", __func__, key);
+ befs_debug(sb, "<--- %s ERROR", __func__);
*value = 0;
return BEFS_BT_NOT_FOUND;
}
@@ -324,16 +322,12 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
* @findkey: Keystring to search for
* @value: If key is found, the value stored with the key is put here
*
- * finds exact match if one exists, and returns BEFS_BT_MATCH
- * If no exact match, finds first key in node that is greater
- * (alphabetically) than the search key and returns BEFS_BT_PARMATCH
- * (for partial match, I guess). Can you think of something better to
- * call it?
- *
- * If no key was a match or greater than the search key, return
- * BEFS_BT_NOT_FOUND.
+ * Finds exact match if one exists, and returns BEFS_BT_MATCH.
+ * If there is no match and node's value array is too small for key, return
+ * BEFS_BT_OVERFLOW.
+ * If no match and node should countain this key, return BEFS_BT_NOT_FOUND.
*
- * Use binary search instead of a linear.
+ * Uses binary search instead of a linear.
*/
static int
befs_find_key(struct super_block *sb, struct befs_btree_node *node,
@@ -348,18 +342,16 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
befs_debug(sb, "---> %s %s", __func__, findkey);
- *value = 0;
-
findkey_len = strlen(findkey);
- /* if node can not contain key, just skeep this node */
+ /* if node can not contain key, just skip this node */
last = node->head.all_key_count - 1;
thiskey = befs_bt_get_key(sb, node, last, &keylen);
eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len);
if (eq < 0) {
- befs_debug(sb, "<--- %s %s not found", __func__, findkey);
- return BEFS_BT_NOT_FOUND;
+ befs_debug(sb, "<--- node can't contain %s", findkey);
+ return BEFS_BT_OVERFLOW;
}
valarray = befs_bt_valarray(node);
@@ -387,12 +379,15 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
else
first = mid + 1;
}
+
+ /* return an existing value so caller can arrive to a leaf node */
if (eq < 0)
*value = fs64_to_cpu(sb, valarray[mid + 1]);
else
*value = fs64_to_cpu(sb, valarray[mid]);
- befs_debug(sb, "<--- %s found %s at %d", __func__, thiskey, mid);
- return BEFS_BT_PARMATCH;
+ befs_error(sb, "<--- %s %s not found", __func__, findkey);
+ befs_debug(sb, "<--- %s ERROR", __func__);
+ return BEFS_BT_NOT_FOUND;
}
/**
@@ -405,7 +400,7 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
* @keysize: Length of the returned key
* @value: Value stored with the returned key
*
- * Heres how it works: Key_no is the index of the key/value pair to
+ * Here's how it works: Key_no is the index of the key/value pair to
* return in keybuf/value.
* Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is
* the number of characters in the key (just a convenience).
@@ -422,7 +417,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
{
struct befs_btree_node *this_node;
befs_btree_super bt_super;
- befs_off_t node_off = 0;
+ befs_off_t node_off;
int cur_key;
fs64 *valarray;
char *keystart;
@@ -467,7 +462,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
while (key_sum + this_node->head.all_key_count <= key_no) {
/* no more nodes to look in: key_no is too large */
- if (this_node->head.right == befs_bt_inval) {
+ if (this_node->head.right == BEFS_BT_INVAL) {
*keysize = 0;
*value = 0;
befs_debug(sb,
@@ -541,7 +536,6 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
* @node_off: Pointer to offset of current node within datastream. Modified
* by the function.
*
- *
* Helper function for btree traverse. Moves the current position to the
* start of the first leaf node.
*
@@ -608,7 +602,7 @@ static int
befs_leafnode(struct befs_btree_node *node)
{
/* all interior nodes (and only interior nodes) have an overflow node */
- if (node->head.overflow == befs_bt_inval)
+ if (node->head.overflow == BEFS_BT_INVAL)
return 1;
else
return 0;
@@ -715,7 +709,7 @@ befs_bt_get_key(struct super_block *sb, struct befs_btree_node *node,
*
* Returns 0 if @key1 and @key2 are equal.
* Returns >0 if @key1 is greater.
- * Returns <0 if @key2 is greater..
+ * Returns <0 if @key2 is greater.
*/
static int
befs_compare_strings(const void *key1, int keylen1,
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index af1bc19b7c85..b4c7ba013c0d 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -22,22 +22,22 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 };
static int befs_find_brun_direct(struct super_block *sb,
const befs_data_stream *data,
- befs_blocknr_t blockno, befs_block_run * run);
+ befs_blocknr_t blockno, befs_block_run *run);
static int befs_find_brun_indirect(struct super_block *sb,
const befs_data_stream *data,
befs_blocknr_t blockno,
- befs_block_run * run);
+ befs_block_run *run);
static int befs_find_brun_dblindirect(struct super_block *sb,
const befs_data_stream *data,
befs_blocknr_t blockno,
- befs_block_run * run);
+ befs_block_run *run);
/**
* befs_read_datastream - get buffer_head containing data, starting from pos.
* @sb: Filesystem superblock
- * @ds: datastrem to find data with
+ * @ds: datastream to find data with
* @pos: start of data
* @off: offset of data in buffer_head->b_data
*
@@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
*/
struct buffer_head *
befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
- befs_off_t pos, uint * off)
+ befs_off_t pos, uint *off)
{
struct buffer_head *bh;
befs_block_run run;
@@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
return bh;
}
-/*
+/**
+ * befs_fblock2brun - give back block run for fblock
+ * @sb: the superblock
+ * @data: datastream to read from
+ * @fblock: the blocknumber with the file position to find
+ * @run: The found run is passed back through this pointer
+ *
* Takes a file position and gives back a brun who's starting block
* is block number fblock of the file.
*
@@ -88,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
*/
int
befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
- befs_blocknr_t fblock, befs_block_run * run)
+ befs_blocknr_t fblock, befs_block_run *run)
{
int err;
befs_off_t pos = fblock << BEFS_SB(sb)->block_shift;
@@ -115,7 +121,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
/**
* befs_read_lsmylink - read long symlink from datastream.
* @sb: Filesystem superblock
- * @ds: Datastrem to read from
+ * @ds: Datastream to read from
* @buff: Buffer in which to place long symlink data
* @len: Length of the long symlink in bytes
*
@@ -128,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const befs_data_stream *ds,
befs_off_t bytes_read = 0; /* bytes readed */
u16 plen;
struct buffer_head *bh;
+
befs_debug(sb, "---> %s length: %llu", __func__, len);
while (bytes_read < len) {
@@ -183,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds)
metablocks += ds->indirect.len;
/*
- Double indir block, plus all the indirect blocks it mapps
- In the double-indirect range, all block runs of data are
- BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know
- how many data block runs are in the double-indirect region,
- and from that we know how many indirect blocks it takes to
- map them. We assume that the indirect blocks are also
- BEFS_DBLINDIR_BRUN_LEN blocks long.
+ * Double indir block, plus all the indirect blocks it maps.
+ * In the double-indirect range, all block runs of data are
+ * BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know
+ * how many data block runs are in the double-indirect region,
+ * and from that we know how many indirect blocks it takes to
+ * map them. We assume that the indirect blocks are also
+ * BEFS_DBLINDIR_BRUN_LEN blocks long.
*/
if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) {
uint dbl_bytes;
@@ -212,58 +219,50 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds)
return blocks;
}
-/*
- Finds the block run that starts at file block number blockno
- in the file represented by the datastream data, if that
- blockno is in the direct region of the datastream.
-
- sb: the superblock
- data: the datastream
- blockno: the blocknumber to find
- run: The found run is passed back through this pointer
-
- Return value is BEFS_OK if the blockrun is found, BEFS_ERR
- otherwise.
-
- Algorithm:
- Linear search. Checks each element of array[] to see if it
- contains the blockno-th filesystem block. This is necessary
- because the block runs map variable amounts of data. Simply
- keeps a count of the number of blocks searched so far (sum),
- incrementing this by the length of each block run as we come
- across it. Adds sum to *count before returning (this is so
- you can search multiple arrays that are logicaly one array,
- as in the indirect region code).
-
- When/if blockno is found, if blockno is inside of a block
- run as stored on disk, we offset the start and length members
- of the block run, so that blockno is the start and len is
- still valid (the run ends in the same place).
-
- 2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_direct - find a direct block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the direct region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * Linear search. Checks each element of array[] to see if it
+ * contains the blockno-th filesystem block. This is necessary
+ * because the block runs map variable amounts of data. Simply
+ * keeps a count of the number of blocks searched so far (sum),
+ * incrementing this by the length of each block run as we come
+ * across it. Adds sum to *count before returning (this is so
+ * you can search multiple arrays that are logicaly one array,
+ * as in the indirect region code).
+ *
+ * When/if blockno is found, if blockno is inside of a block
+ * run as stored on disk, we offset the start and length members
+ * of the block run, so that blockno is the start and len is
+ * still valid (the run ends in the same place).
+ */
static int
befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
- befs_blocknr_t blockno, befs_block_run * run)
+ befs_blocknr_t blockno, befs_block_run *run)
{
int i;
const befs_block_run *array = data->direct;
befs_blocknr_t sum;
- befs_blocknr_t max_block =
- data->max_direct_range >> BEFS_SB(sb)->block_shift;
befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno);
- if (blockno > max_block) {
- befs_error(sb, "%s passed block outside of direct region",
- __func__);
- return BEFS_ERR;
- }
-
for (i = 0, sum = 0; i < BEFS_NUM_DIRECT_BLOCKS;
sum += array[i].len, i++) {
if (blockno >= sum && blockno < sum + (array[i].len)) {
int offset = blockno - sum;
+
run->allocation_group = array[i].allocation_group;
run->start = array[i].start + offset;
run->len = array[i].len - offset;
@@ -275,38 +274,39 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
}
}
+ befs_error(sb, "%s failed to find file block %lu", __func__,
+ (unsigned long)blockno);
befs_debug(sb, "---> %s ERROR", __func__);
return BEFS_ERR;
}
-/*
- Finds the block run that starts at file block number blockno
- in the file represented by the datastream data, if that
- blockno is in the indirect region of the datastream.
-
- sb: the superblock
- data: the datastream
- blockno: the blocknumber to find
- run: The found run is passed back through this pointer
-
- Return value is BEFS_OK if the blockrun is found, BEFS_ERR
- otherwise.
-
- Algorithm:
- For each block in the indirect run of the datastream, read
- it in and search through it for search_blk.
-
- XXX:
- Really should check to make sure blockno is inside indirect
- region.
-
- 2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_indirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the indirect region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * For each block in the indirect run of the datastream, read
+ * it in and search through it for search_blk.
+ *
+ * XXX:
+ * Really should check to make sure blockno is inside indirect
+ * region.
+ */
static int
befs_find_brun_indirect(struct super_block *sb,
const befs_data_stream *data,
befs_blocknr_t blockno,
- befs_block_run * run)
+ befs_block_run *run)
{
int i, j;
befs_blocknr_t sum = 0;
@@ -326,11 +326,12 @@ befs_find_brun_indirect(struct super_block *sb,
/* Examine blocks of the indirect run one at a time */
for (i = 0; i < indirect.len; i++) {
- indirblock = befs_bread(sb, indirblockno + i);
+ indirblock = sb_bread(sb, indirblockno + i);
if (indirblock == NULL) {
- befs_debug(sb, "---> %s failed to read "
+ befs_error(sb, "---> %s failed to read "
"disk block %lu from the indirect brun",
__func__, (unsigned long)indirblockno + i);
+ befs_debug(sb, "<--- %s ERROR", __func__);
return BEFS_ERR;
}
@@ -370,52 +371,51 @@ befs_find_brun_indirect(struct super_block *sb,
return BEFS_ERR;
}
-/*
- Finds the block run that starts at file block number blockno
- in the file represented by the datastream data, if that
- blockno is in the double-indirect region of the datastream.
-
- sb: the superblock
- data: the datastream
- blockno: the blocknumber to find
- run: The found run is passed back through this pointer
-
- Return value is BEFS_OK if the blockrun is found, BEFS_ERR
- otherwise.
-
- Algorithm:
- The block runs in the double-indirect region are different.
- They are always allocated 4 fs blocks at a time, so each
- block run maps a constant amount of file data. This means
- that we can directly calculate how many block runs into the
- double-indirect region we need to go to get to the one that
- maps a particular filesystem block.
-
- We do this in two stages. First we calculate which of the
- inode addresses in the double-indirect block will point us
- to the indirect block that contains the mapping for the data,
- then we calculate which of the inode addresses in that
- indirect block maps the data block we are after.
-
- Oh, and once we've done that, we actually read in the blocks
- that contain the inode addresses we calculated above. Even
- though the double-indirect run may be several blocks long,
- we can calculate which of those blocks will contain the index
- we are after and only read that one. We then follow it to
- the indirect block and perform a similar process to find
- the actual block run that maps the data block we are interested
- in.
-
- Then we offset the run as in befs_find_brun_array() and we are
- done.
-
- 2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_dblindirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the double-indirect region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * The block runs in the double-indirect region are different.
+ * They are always allocated 4 fs blocks at a time, so each
+ * block run maps a constant amount of file data. This means
+ * that we can directly calculate how many block runs into the
+ * double-indirect region we need to go to get to the one that
+ * maps a particular filesystem block.
+ *
+ * We do this in two stages. First we calculate which of the
+ * inode addresses in the double-indirect block will point us
+ * to the indirect block that contains the mapping for the data,
+ * then we calculate which of the inode addresses in that
+ * indirect block maps the data block we are after.
+ *
+ * Oh, and once we've done that, we actually read in the blocks
+ * that contain the inode addresses we calculated above. Even
+ * though the double-indirect run may be several blocks long,
+ * we can calculate which of those blocks will contain the index
+ * we are after and only read that one. We then follow it to
+ * the indirect block and perform a similar process to find
+ * the actual block run that maps the data block we are interested
+ * in.
+ *
+ * Then we offset the run as in befs_find_brun_array() and we are
+ * done.
+ */
static int
befs_find_brun_dblindirect(struct super_block *sb,
const befs_data_stream *data,
befs_blocknr_t blockno,
- befs_block_run * run)
+ befs_block_run *run)
{
int dblindir_indx;
int indir_indx;
@@ -430,10 +430,9 @@ befs_find_brun_dblindirect(struct super_block *sb,
struct buffer_head *indir_block;
befs_block_run indir_run;
befs_disk_inode_addr *iaddr_array;
- struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_blocknr_t indir_start_blk =
- data->max_indirect_range >> befs_sb->block_shift;
+ data->max_indirect_range >> BEFS_SB(sb)->block_shift;
off_t dbl_indir_off = blockno - indir_start_blk;
@@ -471,7 +470,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
}
dbl_indir_block =
- befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) +
+ sb_bread(sb, iaddr2blockno(sb, &data->double_indirect) +
dbl_which_block);
if (dbl_indir_block == NULL) {
befs_error(sb, "%s couldn't read the "
@@ -479,7 +478,6 @@ befs_find_brun_dblindirect(struct super_block *sb,
(unsigned long)
iaddr2blockno(sb, &data->double_indirect) +
dbl_which_block);
- brelse(dbl_indir_block);
return BEFS_ERR;
}
@@ -499,12 +497,11 @@ befs_find_brun_dblindirect(struct super_block *sb,
}
indir_block =
- befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block);
+ sb_bread(sb, iaddr2blockno(sb, &indir_run) + which_block);
if (indir_block == NULL) {
befs_error(sb, "%s couldn't read the indirect block "
"at blockno %lu", __func__, (unsigned long)
iaddr2blockno(sb, &indir_run) + which_block);
- brelse(indir_block);
return BEFS_ERR;
}
diff --git a/fs/befs/debug.c b/fs/befs/debug.c
index 4de7cffcd662..85c13392e9e8 100644
--- a/fs/befs/debug.c
+++ b/fs/befs/debug.c
@@ -169,6 +169,7 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup)
befs_debug(sb, " num_blocks %llu", fs64_to_cpu(sb, sup->num_blocks));
befs_debug(sb, " used_blocks %llu", fs64_to_cpu(sb, sup->used_blocks));
+ befs_debug(sb, " inode_size %u", fs32_to_cpu(sb, sup->inode_size));
befs_debug(sb, " magic2 %08x", fs32_to_cpu(sb, sup->magic2));
befs_debug(sb, " blocks_per_ag %u",
diff --git a/fs/befs/io.c b/fs/befs/io.c
index 523c8af2d770..b4a558126ee1 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -27,7 +27,7 @@ struct buffer_head *
befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
{
struct buffer_head *bh;
- befs_blocknr_t block = 0;
+ befs_blocknr_t block;
struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_debug(sb, "---> Enter %s "
@@ -59,27 +59,3 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
befs_debug(sb, "<--- %s ERROR", __func__);
return NULL;
}
-
-struct buffer_head *
-befs_bread(struct super_block *sb, befs_blocknr_t block)
-{
- struct buffer_head *bh;
-
- befs_debug(sb, "---> Enter %s %lu", __func__, (unsigned long)block);
-
- bh = sb_bread(sb, block);
-
- if (bh == NULL) {
- befs_error(sb, "Failed to read block %lu",
- (unsigned long)block);
- goto error;
- }
-
- befs_debug(sb, "<--- %s", __func__);
-
- return bh;
-
- error:
- befs_debug(sb, "<--- %s ERROR", __func__);
- return NULL;
-}
diff --git a/fs/befs/io.h b/fs/befs/io.h
index 9b78266b6aa5..78d7bc6e60de 100644
--- a/fs/befs/io.h
+++ b/fs/befs/io.h
@@ -5,5 +5,3 @@
struct buffer_head *befs_bread_iaddr(struct super_block *sb,
befs_inode_addr iaddr);
-struct buffer_head *befs_bread(struct super_block *sb, befs_blocknr_t block);
-
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index bfe9f9994935..647a276eba56 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -120,7 +120,7 @@ befs_get_block(struct inode *inode, sector_t block,
struct super_block *sb = inode->i_sb;
befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
befs_block_run run = BAD_IADDR;
- int res = 0;
+ int res;
ulong disk_off;
befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld",
@@ -179,15 +179,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
kfree(utfname);
} else {
- ret = befs_btree_find(sb, ds, dentry->d_name.name, &offset);
+ ret = befs_btree_find(sb, ds, name, &offset);
}
if (ret == BEFS_BT_NOT_FOUND) {
befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
+ d_add(dentry, NULL);
return ERR_PTR(-ENOENT);
} else if (ret != BEFS_OK || offset == 0) {
- befs_warning(sb, "<--- %s Error", __func__);
+ befs_error(sb, "<--- %s Error", __func__);
return ERR_PTR(-ENODATA);
}
@@ -211,56 +212,55 @@ befs_readdir(struct file *file, struct dir_context *ctx)
befs_off_t value;
int result;
size_t keysize;
- unsigned char d_type;
char keybuf[BEFS_NAME_LEN + 1];
befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld",
__func__, file, inode->i_ino, ctx->pos);
-more:
- result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
- keybuf, &keysize, &value);
+ while (1) {
+ result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
+ keybuf, &keysize, &value);
- if (result == BEFS_ERR) {
- befs_debug(sb, "<--- %s ERROR", __func__);
- befs_error(sb, "IO error reading %pD (inode %lu)",
- file, inode->i_ino);
- return -EIO;
-
- } else if (result == BEFS_BT_END) {
- befs_debug(sb, "<--- %s END", __func__);
- return 0;
-
- } else if (result == BEFS_BT_EMPTY) {
- befs_debug(sb, "<--- %s Empty directory", __func__);
- return 0;
- }
+ if (result == BEFS_ERR) {
+ befs_debug(sb, "<--- %s ERROR", __func__);
+ befs_error(sb, "IO error reading %pD (inode %lu)",
+ file, inode->i_ino);
+ return -EIO;
- d_type = DT_UNKNOWN;
+ } else if (result == BEFS_BT_END) {
+ befs_debug(sb, "<--- %s END", __func__);
+ return 0;
- /* Convert to NLS */
- if (BEFS_SB(sb)->nls) {
- char *nlsname;
- int nlsnamelen;
- result =
- befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen);
- if (result < 0) {
- befs_debug(sb, "<--- %s ERROR", __func__);
- return result;
+ } else if (result == BEFS_BT_EMPTY) {
+ befs_debug(sb, "<--- %s Empty directory", __func__);
+ return 0;
}
- if (!dir_emit(ctx, nlsname, nlsnamelen,
- (ino_t) value, d_type)) {
+
+ /* Convert to NLS */
+ if (BEFS_SB(sb)->nls) {
+ char *nlsname;
+ int nlsnamelen;
+
+ result =
+ befs_utf2nls(sb, keybuf, keysize, &nlsname,
+ &nlsnamelen);
+ if (result < 0) {
+ befs_debug(sb, "<--- %s ERROR", __func__);
+ return result;
+ }
+ if (!dir_emit(ctx, nlsname, nlsnamelen,
+ (ino_t) value, DT_UNKNOWN)) {
+ kfree(nlsname);
+ return 0;
+ }
kfree(nlsname);
- return 0;
+ } else {
+ if (!dir_emit(ctx, keybuf, keysize,
+ (ino_t) value, DT_UNKNOWN))
+ return 0;
}
- kfree(nlsname);
- } else {
- if (!dir_emit(ctx, keybuf, keysize,
- (ino_t) value, d_type))
- return 0;
+ ctx->pos++;
}
- ctx->pos++;
- goto more;
}
static struct inode *
@@ -299,7 +299,6 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
struct befs_sb_info *befs_sb = BEFS_SB(sb);
struct befs_inode_info *befs_ino;
struct inode *inode;
- long ret = -EIO;
befs_debug(sb, "---> %s inode = %lu", __func__, ino);
@@ -318,7 +317,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
befs_ino->i_inode_num.allocation_group,
befs_ino->i_inode_num.start, befs_ino->i_inode_num.len);
- bh = befs_bread(sb, inode->i_ino);
+ bh = sb_bread(sb, inode->i_ino);
if (!bh) {
befs_error(sb, "unable to read inode block - "
"inode = %lu", inode->i_ino);
@@ -421,7 +420,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
unacquire_none:
iget_failed(inode);
befs_debug(sb, "<--- %s - Bad inode", __func__);
- return ERR_PTR(ret);
+ return ERR_PTR(-EIO);
}
/* Initialize the inode cache. Called at fs setup.
@@ -436,10 +435,9 @@ befs_init_inodecache(void)
0, (SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
- if (befs_inode_cachep == NULL) {
- pr_err("%s: Couldn't initialize inode slabcache\n", __func__);
+ if (befs_inode_cachep == NULL)
return -ENOMEM;
- }
+
return 0;
}
@@ -524,8 +522,6 @@ befs_utf2nls(struct super_block *sb, const char *in,
*out = result = kmalloc(maxlen, GFP_NOFS);
if (!*out) {
- befs_error(sb, "%s cannot allocate memory", __func__);
- *out_len = 0;
return -ENOMEM;
}
@@ -604,7 +600,6 @@ befs_nls2utf(struct super_block *sb, const char *in,
*out = result = kmalloc(maxlen, GFP_NOFS);
if (!*out) {
- befs_error(sb, "%s cannot allocate memory", __func__);
*out_len = 0;
return -ENOMEM;
}
@@ -637,10 +632,6 @@ befs_nls2utf(struct super_block *sb, const char *in,
return -EILSEQ;
}
-/**
- * Use the
- *
- */
enum {
Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
};
@@ -760,19 +751,19 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
long ret = -EINVAL;
const unsigned long sb_block = 0;
const off_t x86_sb_off = 512;
+ int blocksize;
save_mount_options(sb, data);
sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL);
- if (sb->s_fs_info == NULL) {
- pr_err("(%s): Unable to allocate memory for private "
- "portion of superblock. Bailing.\n", sb->s_id);
+ if (sb->s_fs_info == NULL)
goto unacquire_none;
- }
+
befs_sb = BEFS_SB(sb);
if (!parse_options((char *) data, &befs_sb->mount_opts)) {
- befs_error(sb, "cannot parse mount options");
+ if (!silent)
+ befs_error(sb, "cannot parse mount options");
goto unacquire_priv_sbp;
}
@@ -793,10 +784,16 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
* least 1k to get the second 512 bytes of the volume.
* -WD 10-26-01
*/
- sb_min_blocksize(sb, 1024);
+ blocksize = sb_min_blocksize(sb, 1024);
+ if (!blocksize) {
+ if (!silent)
+ befs_error(sb, "unable to set blocksize");
+ goto unacquire_priv_sbp;
+ }
if (!(bh = sb_bread(sb, sb_block))) {
- befs_error(sb, "unable to read superblock");
+ if (!silent)
+ befs_error(sb, "unable to read superblock");
goto unacquire_priv_sbp;
}
@@ -820,9 +817,9 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);
if( befs_sb->num_blocks > ~((sector_t)0) ) {
- befs_error(sb, "blocks count: %llu "
- "is larger than the host can use",
- befs_sb->num_blocks);
+ if (!silent)
+ befs_error(sb, "blocks count: %llu is larger than the host can use",
+ befs_sb->num_blocks);
goto unacquire_priv_sbp;
}
@@ -841,7 +838,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_root = d_make_root(root);
if (!sb->s_root) {
- befs_error(sb, "get root inode failed");
+ if (!silent)
+ befs_error(sb, "get root inode failed");
goto unacquire_priv_sbp;
}
@@ -870,9 +868,9 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
unacquire_priv_sbp:
kfree(befs_sb->mount_opts.iocharset);
kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
unacquire_none:
- sb->s_fs_info = NULL;
return ret;
}
diff --git a/fs/befs/super.c b/fs/befs/super.c
index aeafc4d84278..7c50025c99d8 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -13,24 +13,20 @@
#include "befs.h"
#include "super.h"
-/**
- * load_befs_sb -- Read from disk and properly byteswap all the fields
+/*
+ * befs_load_sb -- Read from disk and properly byteswap all the fields
* of the befs superblock
- *
- *
- *
- *
*/
int
-befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
+befs_load_sb(struct super_block *sb, befs_super_block *disk_sb)
{
struct befs_sb_info *befs_sb = BEFS_SB(sb);
/* Check the byte order of the filesystem */
if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
- befs_sb->byte_order = BEFS_BYTESEX_LE;
+ befs_sb->byte_order = BEFS_BYTESEX_LE;
else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE)
- befs_sb->byte_order = BEFS_BYTESEX_BE;
+ befs_sb->byte_order = BEFS_BYTESEX_BE;
befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1);
befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2);
@@ -45,6 +41,8 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
befs_sb->ag_shift = fs32_to_cpu(sb, disk_sb->ag_shift);
befs_sb->num_ags = fs32_to_cpu(sb, disk_sb->num_ags);
+ befs_sb->flags = fs32_to_cpu(sb, disk_sb->flags);
+
befs_sb->log_blocks = fsrun_to_cpu(sb, disk_sb->log_blocks);
befs_sb->log_start = fs64_to_cpu(sb, disk_sb->log_start);
befs_sb->log_end = fs64_to_cpu(sb, disk_sb->log_end);
@@ -84,15 +82,15 @@ befs_check_sb(struct super_block *sb)
}
if (befs_sb->block_size > PAGE_SIZE) {
- befs_error(sb, "blocksize(%u) cannot be larger"
+ befs_error(sb, "blocksize(%u) cannot be larger "
"than system pagesize(%lu)", befs_sb->block_size,
PAGE_SIZE);
return BEFS_ERR;
}
/*
- * block_shift and block_size encode the same information
- * in different ways as a consistency check.
+ * block_shift and block_size encode the same information
+ * in different ways as a consistency check.
*/
if ((1 << befs_sb->block_shift) != befs_sb->block_size) {
@@ -101,10 +99,18 @@ befs_check_sb(struct super_block *sb)
return BEFS_ERR;
}
- if (befs_sb->log_start != befs_sb->log_end) {
+
+ /* ag_shift also encodes the same information as blocks_per_ag in a
+ * different way, non-fatal consistency check
+ */
+ if ((1 << befs_sb->ag_shift) != befs_sb->blocks_per_ag)
+ befs_error(sb, "ag_shift disagrees with blocks_per_ag.");
+
+ if (befs_sb->log_start != befs_sb->log_end ||
+ befs_sb->flags == BEFS_DIRTY) {
befs_error(sb, "Filesystem not clean! There are blocks in the "
- "journal. You must boot into BeOS and mount this volume "
- "to make it clean.");
+ "journal. You must boot into BeOS and mount this "
+ "volume to make it clean.");
return BEFS_ERR;
}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ccc70d96958d..d4d8b7e36b2f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -698,7 +698,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
if (ret) {
- bio->bi_error = ret;
+ comp_bio->bi_error = ret;
bio_endio(comp_bio);
}
@@ -728,7 +728,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
if (ret) {
- bio->bi_error = ret;
+ comp_bio->bi_error = ret;
bio_endio(comp_bio);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c21bad26a27..0b8ce2b9f7d0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -252,7 +252,8 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
- (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
+ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e720d3e6ec20..3a57f99d96aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2586,6 +2586,7 @@ int open_ctree(struct super_block *sb,
int num_backups_tried = 0;
int backup_index = 0;
int max_active;
+ int clear_free_space_tree = 0;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -3148,6 +3149,26 @@ retry_root_backup:
if (sb->s_flags & MS_RDONLY)
return 0;
+ if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
+ btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ clear_free_space_tree = 1;
+ } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
+ btrfs_warn(fs_info, "free space tree is invalid");
+ clear_free_space_tree = 1;
+ }
+
+ if (clear_free_space_tree) {
+ btrfs_info(fs_info, "clearing free space tree");
+ ret = btrfs_clear_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to clear free space tree: %d", ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "creating free space tree");
@@ -3185,18 +3206,6 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
- if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
- btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
- btrfs_info(fs_info, "clearing free space tree");
- ret = btrfs_clear_free_space_tree(fs_info);
- if (ret) {
- btrfs_warn(fs_info,
- "failed to clear free space tree: %d", ret);
- close_ctree(tree_root);
- return ret;
- }
- }
-
if (!fs_info->uuid_root) {
btrfs_info(fs_info, "creating UUID tree");
ret = btrfs_create_uuid_tree(fs_info);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ee40384c394d..66a755150056 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5558,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
-/*
- * The extent buffer bitmap operations are done with byte granularity because
- * bitmap items are not guaranteed to be aligned to a word and therefore a
- * single word in a bitmap may straddle two pages in the extent buffer.
- */
-#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
-#define BITMAP_FIRST_BYTE_MASK(start) \
- ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
-#define BITMAP_LAST_BYTE_MASK(nbits) \
- (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+void le_bitmap_set(u8 *map, unsigned int start, int len)
+{
+ u8 *p = map + BIT_BYTE(start);
+ const unsigned int size = start + len;
+ int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+ while (len - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~(u8)0;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+void le_bitmap_clear(u8 *map, unsigned int start, int len)
+{
+ u8 *p = map + BIT_BYTE(start);
+ const unsigned int size = start + len;
+ int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+ u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
+
+ while (len - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_BYTE;
+ mask_to_clear = ~(u8)0;
+ p++;
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
/*
* eb_bitmap_offset() - calculate the page and offset of the byte containing the
@@ -5612,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
unsigned long nr)
{
- char *kaddr;
+ u8 *kaddr;
struct page *page;
unsigned long i;
size_t offset;
@@ -5634,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len)
{
- char *kaddr;
+ u8 *kaddr;
struct page *page;
unsigned long i;
size_t offset;
const unsigned int size = pos + len;
int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
- unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
eb_bitmap_offset(eb, start, pos, &i, &offset);
page = eb->pages[i];
@@ -5651,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
kaddr[offset] |= mask_to_set;
len -= bits_to_set;
bits_to_set = BITS_PER_BYTE;
- mask_to_set = ~0U;
+ mask_to_set = ~(u8)0;
if (++offset >= PAGE_SIZE && len > 0) {
offset = 0;
page = eb->pages[++i];
@@ -5676,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len)
{
- char *kaddr;
+ u8 *kaddr;
struct page *page;
unsigned long i;
size_t offset;
const unsigned int size = pos + len;
int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
- unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+ u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
eb_bitmap_offset(eb, start, pos, &i, &offset);
page = eb->pages[i];
@@ -5693,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
kaddr[offset] &= ~mask_to_clear;
len -= bits_to_clear;
bits_to_clear = BITS_PER_BYTE;
- mask_to_clear = ~0U;
+ mask_to_clear = ~(u8)0;
if (++offset >= PAGE_SIZE && len > 0) {
offset = 0;
page = eb->pages[++i];
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4a094f1dc7ef..ab31d145227e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -59,6 +59,28 @@
*/
#define EXTENT_PAGE_PRIVATE 1
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ * single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+ return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
+
+extern void le_bitmap_set(u8 *map, unsigned int start, int len);
+extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
+
struct extent_state;
struct btrfs_root;
struct btrfs_io_bio;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index e4a42a8e4f84..57401b474ec6 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -151,7 +151,7 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
}
-static unsigned long *alloc_bitmap(u32 bitmap_size)
+static u8 *alloc_bitmap(u32 bitmap_size)
{
void *mem;
@@ -180,8 +180,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
- unsigned long *bitmap;
- char *bitmap_cursor;
+ u8 *bitmap, *bitmap_cursor;
u64 start, end;
u64 bitmap_range, i;
u32 bitmap_size, flags, expected_extent_count;
@@ -231,7 +230,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
block_group->sectorsize);
last = div_u64(found_key.objectid + found_key.offset - start,
block_group->sectorsize);
- bitmap_set(bitmap, first, last - first);
+ le_bitmap_set(bitmap, first, last - first);
extent_count++;
nr++;
@@ -270,7 +269,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
goto out;
}
- bitmap_cursor = (char *)bitmap;
+ bitmap_cursor = bitmap;
bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
i = start;
while (i < end) {
@@ -319,7 +318,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
- unsigned long *bitmap;
+ u8 *bitmap;
u64 start, end;
/* Initialize to silence GCC. */
u64 extent_start = 0;
@@ -363,7 +362,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
break;
} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
unsigned long ptr;
- char *bitmap_cursor;
+ u8 *bitmap_cursor;
u32 bitmap_pos, data_size;
ASSERT(found_key.objectid >= start);
@@ -373,7 +372,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap_pos = div_u64(found_key.objectid - start,
block_group->sectorsize *
BITS_PER_BYTE);
- bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+ bitmap_cursor = bitmap + bitmap_pos;
data_size = free_space_bitmap_size(found_key.offset,
block_group->sectorsize);
@@ -410,7 +409,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
offset = start;
bitnr = 0;
while (offset < end) {
- bit = !!test_bit(bitnr, bitmap);
+ bit = !!le_test_bit(bitnr, bitmap);
if (prev_bit == 0 && bit == 1) {
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
@@ -1185,6 +1184,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
}
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
ret = btrfs_commit_transaction(trans, tree_root);
@@ -1253,6 +1253,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
return PTR_ERR(trans);
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
fs_info->free_space_root = NULL;
ret = clear_free_space_tree(trans, free_space_root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 01bc36cec26e..71261b459863 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5805,6 +5805,64 @@ static int changed_extent(struct send_ctx *sctx,
int ret = 0;
if (sctx->cur_ino != sctx->cmp_key->objectid) {
+
+ if (result == BTRFS_COMPARE_TREE_CHANGED) {
+ struct extent_buffer *leaf_l;
+ struct extent_buffer *leaf_r;
+ struct btrfs_file_extent_item *ei_l;
+ struct btrfs_file_extent_item *ei_r;
+
+ leaf_l = sctx->left_path->nodes[0];
+ leaf_r = sctx->right_path->nodes[0];
+ ei_l = btrfs_item_ptr(leaf_l,
+ sctx->left_path->slots[0],
+ struct btrfs_file_extent_item);
+ ei_r = btrfs_item_ptr(leaf_r,
+ sctx->right_path->slots[0],
+ struct btrfs_file_extent_item);
+
+ /*
+ * We may have found an extent item that has changed
+ * only its disk_bytenr field and the corresponding
+ * inode item was not updated. This case happens due to
+ * very specific timings during relocation when a leaf
+ * that contains file extent items is COWed while
+ * relocation is ongoing and its in the stage where it
+ * updates data pointers. So when this happens we can
+ * safely ignore it since we know it's the same extent,
+ * but just at different logical and physical locations
+ * (when an extent is fully replaced with a new one, we
+ * know the generation number must have changed too,
+ * since snapshot creation implies committing the current
+ * transaction, and the inode item must have been updated
+ * as well).
+ * This replacement of the disk_bytenr happens at
+ * relocation.c:replace_file_extents() through
+ * relocation.c:btrfs_reloc_cow_block().
+ */
+ if (btrfs_file_extent_generation(leaf_l, ei_l) ==
+ btrfs_file_extent_generation(leaf_r, ei_r) &&
+ btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
+ btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
+ btrfs_file_extent_compression(leaf_l, ei_l) ==
+ btrfs_file_extent_compression(leaf_r, ei_r) &&
+ btrfs_file_extent_encryption(leaf_l, ei_l) ==
+ btrfs_file_extent_encryption(leaf_r, ei_r) &&
+ btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
+ btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
+ btrfs_file_extent_type(leaf_l, ei_l) ==
+ btrfs_file_extent_type(leaf_r, ei_r) &&
+ btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
+ btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
+ btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
+ btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
+ btrfs_file_extent_offset(leaf_l, ei_l) ==
+ btrfs_file_extent_offset(leaf_r, ei_r) &&
+ btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
+ btrfs_file_extent_num_bytes(leaf_r, ei_r))
+ return 0;
+ }
+
inconsistent_snapshot_error(sctx, result, "extent");
return -EIO;
}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index d19ab0317283..caad80bb9bd0 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -273,20 +273,37 @@ out:
return ret;
}
-/**
- * test_bit_in_byte - Determine whether a bit is set in a byte
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit_in_byte(int nr, const u8 *addr)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
+ unsigned long len)
{
- return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1)));
+ unsigned long i;
+
+ for (i = 0; i < len * BITS_PER_BYTE; i++) {
+ int bit, bit1;
+
+ bit = !!test_bit(i, bitmap);
+ bit1 = !!extent_buffer_test_bit(eb, 0, i);
+ if (bit1 != bit) {
+ test_msg("Bits do not match\n");
+ return -EINVAL;
+ }
+
+ bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
+ i % BITS_PER_BYTE);
+ if (bit1 != bit) {
+ test_msg("Offset bits do not match\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
}
static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
unsigned long len)
{
- unsigned long i, x;
+ unsigned long i, j;
+ u32 x;
+ int ret;
memset(bitmap, 0, len);
memset_extent_buffer(eb, 0, 0, len);
@@ -297,16 +314,18 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ ret = check_eb_bitmap(bitmap, eb, len);
+ if (ret) {
test_msg("Setting all bits failed\n");
- return -EINVAL;
+ return ret;
}
bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ ret = check_eb_bitmap(bitmap, eb, len);
+ if (ret) {
test_msg("Clearing all bits failed\n");
- return -EINVAL;
+ return ret;
}
/* Straddling pages test */
@@ -316,9 +335,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
sizeof(long) * BITS_PER_BYTE);
extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
sizeof(long) * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ ret = check_eb_bitmap(bitmap, eb, len);
+ if (ret) {
test_msg("Setting straddling pages failed\n");
- return -EINVAL;
+ return ret;
}
bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
@@ -328,9 +348,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
sizeof(long) * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ ret = check_eb_bitmap(bitmap, eb, len);
+ if (ret) {
test_msg("Clearing straddling pages failed\n");
- return -EINVAL;
+ return ret;
}
}
@@ -339,28 +360,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
* something repetitive that could miss some hypothetical off-by-n bug.
*/
x = 0;
- for (i = 0; i < len / sizeof(long); i++) {
- x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL;
- bitmap[i] = x;
- }
- write_extent_buffer(eb, bitmap, 0, len);
-
- for (i = 0; i < len * BITS_PER_BYTE; i++) {
- int bit, bit1;
-
- bit = !!test_bit_in_byte(i, (u8 *)bitmap);
- bit1 = !!extent_buffer_test_bit(eb, 0, i);
- if (bit1 != bit) {
- test_msg("Testing bit pattern failed\n");
- return -EINVAL;
+ bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
+ extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
+ for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+ x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
+ for (j = 0; j < 32; j++) {
+ if (x & (1U << j)) {
+ bitmap_set(bitmap, i * 32 + j, 1);
+ extent_buffer_bitmap_set(eb, 0, i * 32 + j, 1);
+ }
}
+ }
- bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
- i % BITS_PER_BYTE);
- if (bit1 != bit) {
- test_msg("Testing bit pattern with offset failed\n");
- return -EINVAL;
- }
+ ret = check_eb_bitmap(bitmap, eb, len);
+ if (ret) {
+ test_msg("Random bit pattern failed\n");
+ return ret;
}
return 0;
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 7508d3b42780..6e144048a72e 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -24,20 +24,15 @@
#include "../transaction.h"
struct free_space_extent {
- u64 start, length;
+ u64 start;
+ u64 length;
};
-/*
- * The test cases align their operations to this in order to hit some of the
- * edge cases in the bitmap code.
- */
-#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE)
-
static int __check_free_space_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
struct btrfs_path *path,
- struct free_space_extent *extents,
+ const struct free_space_extent * const extents,
unsigned int num_extents)
{
struct btrfs_free_space_info *info;
@@ -126,7 +121,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
struct btrfs_path *path,
- struct free_space_extent *extents,
+ const struct free_space_extent * const extents,
unsigned int num_extents)
{
struct btrfs_free_space_info *info;
@@ -168,9 +163,10 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
static int test_empty_block_group(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
+ const struct free_space_extent extents[] = {
{cache->key.objectid, cache->key.offset},
};
@@ -181,9 +177,10 @@ static int test_empty_block_group(struct btrfs_trans_handle *trans,
static int test_remove_all(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {};
+ const struct free_space_extent extents[] = {};
int ret;
ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
@@ -201,16 +198,17 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
static int test_remove_beginning(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid + BITMAP_RANGE,
- cache->key.offset - BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid + alignment,
+ cache->key.offset - alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, BITMAP_RANGE);
+ cache->key.objectid, alignment);
if (ret) {
test_msg("Could not remove free space\n");
return ret;
@@ -224,17 +222,18 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
static int test_remove_end(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid, cache->key.offset - BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid, cache->key.offset - alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
cache->key.objectid +
- cache->key.offset - BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.offset - alignment,
+ alignment);
if (ret) {
test_msg("Could not remove free space\n");
return ret;
@@ -247,18 +246,19 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
static int test_remove_middle(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid, BITMAP_RANGE},
- {cache->key.objectid + 2 * BITMAP_RANGE,
- cache->key.offset - 2 * BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid, alignment},
+ {cache->key.objectid + 2 * alignment,
+ cache->key.offset - 2 * alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + alignment,
+ alignment);
if (ret) {
test_msg("Could not remove free space\n");
return ret;
@@ -271,10 +271,11 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
static int test_merge_left(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid, 2 * BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid, 2 * alignment},
};
int ret;
@@ -287,15 +288,15 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, BITMAP_RANGE);
+ cache->key.objectid, alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
@@ -308,10 +309,11 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
static int test_merge_right(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid + alignment, 2 * alignment},
};
int ret;
@@ -324,16 +326,16 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + 2 * BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + 2 * alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
@@ -346,10 +348,11 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
static int test_merge_both(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid, 3 * BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid, 3 * alignment},
};
int ret;
@@ -362,23 +365,23 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, BITMAP_RANGE);
+ cache->key.objectid, alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + 2 * BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + 2 * alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
@@ -391,12 +394,13 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
static int test_merge_none(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ u32 alignment)
{
- struct free_space_extent extents[] = {
- {cache->key.objectid, BITMAP_RANGE},
- {cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE},
- {cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE},
+ const struct free_space_extent extents[] = {
+ {cache->key.objectid, alignment},
+ {cache->key.objectid + 2 * alignment, alignment},
+ {cache->key.objectid + 4 * alignment, alignment},
};
int ret;
@@ -409,23 +413,23 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, BITMAP_RANGE);
+ cache->key.objectid, alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + 4 * BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + 4 * alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
}
ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid + 2 * BITMAP_RANGE,
- BITMAP_RANGE);
+ cache->key.objectid + 2 * alignment,
+ alignment);
if (ret) {
test_msg("Could not add free space\n");
return ret;
@@ -438,10 +442,11 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
typedef int (*test_func_t)(struct btrfs_trans_handle *,
struct btrfs_fs_info *,
struct btrfs_block_group_cache *,
- struct btrfs_path *);
+ struct btrfs_path *,
+ u32 alignment);
-static int run_test(test_func_t test_func, int bitmaps,
- u32 sectorsize, u32 nodesize)
+static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
+ u32 nodesize, u32 alignment)
{
struct btrfs_fs_info *fs_info;
struct btrfs_root *root = NULL;
@@ -480,7 +485,7 @@ static int run_test(test_func_t test_func, int bitmaps,
btrfs_set_header_nritems(root->node, 0);
root->alloc_bytenr += 2 * nodesize;
- cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize);
+ cache = btrfs_alloc_dummy_block_group(8 * alignment, sectorsize);
if (!cache) {
test_msg("Couldn't allocate dummy block group cache\n");
ret = -ENOMEM;
@@ -514,7 +519,7 @@ static int run_test(test_func_t test_func, int bitmaps,
}
}
- ret = test_func(&trans, root->fs_info, cache, path);
+ ret = test_func(&trans, root->fs_info, cache, path, alignment);
if (ret)
goto out;
@@ -539,15 +544,27 @@ out:
return ret;
}
-static int run_test_both_formats(test_func_t test_func,
- u32 sectorsize, u32 nodesize)
+static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
+ u32 nodesize, u32 alignment)
{
+ int test_ret = 0;
int ret;
- ret = run_test(test_func, 0, sectorsize, nodesize);
- if (ret)
- return ret;
- return run_test(test_func, 1, sectorsize, nodesize);
+ ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
+ if (ret) {
+ test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n",
+ test_func, sectorsize, nodesize, alignment);
+ test_ret = ret;
+ }
+
+ ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
+ if (ret) {
+ test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n",
+ test_func, sectorsize, nodesize, alignment);
+ test_ret = ret;
+ }
+
+ return test_ret;
}
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
@@ -563,18 +580,30 @@ int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
test_merge_both,
test_merge_none,
};
+ u32 bitmap_alignment;
+ int test_ret = 0;
int i;
+ /*
+ * Align some operations to a page to flush out bugs in the extent
+ * buffer bitmap handling of highmem.
+ */
+ bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE;
+
test_msg("Running free space tree tests\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- int ret = run_test_both_formats(tests[i], sectorsize,
- nodesize);
- if (ret) {
- test_msg("%pf : sectorsize %u failed\n",
- tests[i], sectorsize);
- return ret;
- }
+ int ret;
+
+ ret = run_test_both_formats(tests[i], sectorsize, nodesize,
+ sectorsize);
+ if (ret)
+ test_ret = ret;
+
+ ret = run_test_both_formats(tests[i], sectorsize, nodesize,
+ bitmap_alignment);
+ if (ret)
+ test_ret = ret;
}
- return 0;
+ return test_ret;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 528cae123dc9..3d33c4e41e5f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2713,14 +2713,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
int index, int error)
{
struct btrfs_log_ctx *ctx;
+ struct btrfs_log_ctx *safe;
- if (!error) {
- INIT_LIST_HEAD(&root->log_ctxs[index]);
- return;
- }
-
- list_for_each_entry(ctx, &root->log_ctxs[index], list)
+ list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+ list_del_init(&ctx->list);
ctx->log_ret = error;
+ }
INIT_LIST_HEAD(&root->log_ctxs[index]);
}
@@ -2961,13 +2959,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);
out_wake_log_root:
- /*
- * We needn't get log_mutex here because we are sure all
- * the other tasks are blocked.
- */
+ mutex_lock(&log_root_tree->log_mutex);
btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
- mutex_lock(&log_root_tree->log_mutex);
log_root_tree->log_transid_committed++;
atomic_set(&log_root_tree->log_commit[index2], 0);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2978,10 +2972,8 @@ out_wake_log_root:
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
- /* See above. */
- btrfs_remove_all_log_ctxs(root, index1, ret);
-
mutex_lock(&root->log_mutex);
+ btrfs_remove_all_log_ctxs(root, index1, ret);
root->log_transid_committed++;
atomic_set(&root->log_commit[index1], 0);
mutex_unlock(&root->log_mutex);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7bf08825cc11..18630e800208 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1272,7 +1272,8 @@ again:
statret = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, !!page);
if (statret < 0) {
- __free_page(page);
+ if (page)
+ __free_page(page);
if (statret == -ENODATA) {
BUG_ON(retry_op != READ_INLINE);
goto again;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bca1b49c1c4b..ef4d04647325 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1511,7 +1511,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
}
- if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) {
+ if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
+ !(rinfo->hash_order && req->r_path2)) {
/* note dir version at start of readdir so we can tell
* if any dentries get dropped */
req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a29ffce98187..b382e5910eea 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -845,6 +845,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
err = ceph_fs_debugfs_init(fsc);
if (err < 0)
goto fail;
+ } else {
+ root = dget(fsc->sb->s_root);
}
fsc->mount_state = CEPH_MOUNT_MOUNTED;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 40b703217977..febc28f9e2c2 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -16,7 +16,7 @@
static int __remove_xattr(struct ceph_inode_info *ci,
struct ceph_inode_xattr *xattr);
-const struct xattr_handler ceph_other_xattr_handler;
+static const struct xattr_handler ceph_other_xattr_handler;
/*
* List of handlers for synthetic system.* attributes. Other
@@ -1086,7 +1086,7 @@ static int ceph_set_xattr_handler(const struct xattr_handler *handler,
return __ceph_setxattr(inode, name, value, size, flags);
}
-const struct xattr_handler ceph_other_xattr_handler = {
+static const struct xattr_handler ceph_other_xattr_handler = {
.prefix = "", /* match any name => handlers called with full name */
.get = ceph_get_xattr_handler,
.set = ceph_set_xattr_handler,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 6c58e13fed2f..3d03e48a9213 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -152,6 +152,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
+ seq_printf(m, "\nNumber of credits: %d", server->credits);
i++;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 1418daa03d95..07ed81cf1552 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -49,6 +49,7 @@
#define CIFS_MOUNT_USE_PREFIX_PATH 0x1000000 /* make subpath with unaccessible
* root mountable
*/
+#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
struct cifs_sb_info {
struct rb_root tlink_tree;
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 0065256881d8..57ff0756e30c 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -36,7 +36,15 @@ struct smb_mnt_fs_info {
__u64 cifs_posix_caps;
} __packed;
+struct smb_snapshot_array {
+ __u32 number_of_snapshots;
+ __u32 number_of_snapshots_returned;
+ __u32 snapshot_array_size;
+ /* snapshots[]; */
+} __packed;
+
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
#define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
+#define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 71e8a56e9479..15bac390dff9 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -42,6 +42,35 @@ static const struct cifs_sid sid_authusers = {
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+/* S-1-22-1 Unmapped Unix users */
+static const struct cifs_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22},
+ {cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-22-2 Unmapped Unix groups */
+static const struct cifs_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22},
+ {cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx
+ */
+
+/* S-1-5-88 MS NFS and Apple style UID/GID/mode */
+
+/* S-1-5-88-1 Unix uid */
+static const struct cifs_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-2 Unix gid */
+static const struct cifs_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-3 Unix mode */
+static const struct cifs_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(3), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
static const struct cred *root_cred;
static int
@@ -183,6 +212,62 @@ compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
return 0; /* sids compare/match */
}
+static bool
+is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
+{
+ int i;
+ int num_subauth;
+ const struct cifs_sid *pwell_known_sid;
+
+ if (!psid || (puid == NULL))
+ return false;
+
+ num_subauth = psid->num_subauth;
+
+ /* check if Mac (or Windows NFS) vs. Samba format for Unix owner SID */
+ if (num_subauth == 2) {
+ if (is_group)
+ pwell_known_sid = &sid_unix_groups;
+ else
+ pwell_known_sid = &sid_unix_users;
+ } else if (num_subauth == 3) {
+ if (is_group)
+ pwell_known_sid = &sid_unix_NFS_groups;
+ else
+ pwell_known_sid = &sid_unix_NFS_users;
+ } else
+ return false;
+
+ /* compare the revision */
+ if (psid->revision != pwell_known_sid->revision)
+ return false;
+
+ /* compare all of the six auth values */
+ for (i = 0; i < NUM_AUTHS; ++i) {
+ if (psid->authority[i] != pwell_known_sid->authority[i]) {
+ cifs_dbg(FYI, "auth %d did not match\n", i);
+ return false;
+ }
+ }
+
+ if (num_subauth == 2) {
+ if (psid->sub_auth[0] != pwell_known_sid->sub_auth[0])
+ return false;
+
+ *puid = le32_to_cpu(psid->sub_auth[1]);
+ } else /* 3 subauths, ie Windows/Mac style */ {
+ *puid = le32_to_cpu(psid->sub_auth[0]);
+ if ((psid->sub_auth[0] != pwell_known_sid->sub_auth[0]) ||
+ (psid->sub_auth[1] != pwell_known_sid->sub_auth[1]))
+ return false;
+
+ *puid = le32_to_cpu(psid->sub_auth[2]);
+ }
+
+ cifs_dbg(FYI, "Unix UID %d returned from SID\n", *puid);
+ return true; /* well known sid found, uid returned */
+}
+
static void
cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
{
@@ -276,6 +361,43 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
return -EIO;
}
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) {
+ uint32_t unix_id;
+ bool is_group;
+
+ if (sidtype != SIDOWNER)
+ is_group = true;
+ else
+ is_group = false;
+
+ if (is_well_known_sid(psid, &unix_id, is_group) == false)
+ goto try_upcall_to_get_id;
+
+ if (is_group) {
+ kgid_t gid;
+ gid_t id;
+
+ id = (gid_t)unix_id;
+ gid = make_kgid(&init_user_ns, id);
+ if (gid_valid(gid)) {
+ fgid = gid;
+ goto got_valid_id;
+ }
+ } else {
+ kuid_t uid;
+ uid_t id;
+
+ id = (uid_t)unix_id;
+ uid = make_kuid(&init_user_ns, id);
+ if (uid_valid(uid)) {
+ fuid = uid;
+ goto got_valid_id;
+ }
+ }
+ /* If unable to find uid/gid easily from SID try via upcall */
+ }
+
+try_upcall_to_get_id:
sidstr = sid_to_key_str(psid, sidtype);
if (!sidstr)
return -ENOMEM;
@@ -329,6 +451,7 @@ out_revert_creds:
* Note that we return 0 here unconditionally. If the mapping
* fails then we just fall back to using the mnt_uid/mnt_gid.
*/
+got_valid_id:
if (sidtype == SIDOWNER)
fattr->cf_uid = fuid;
else
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cca04e710421..15261ba464c5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -64,15 +64,15 @@ unsigned int global_secflags = CIFSSEC_DEF;
unsigned int sign_CIFS_PDUs = 1;
static const struct super_operations cifs_super_ops;
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
-module_param(CIFSMaxBufSize, uint, 0);
+module_param(CIFSMaxBufSize, uint, 0444);
MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). "
"Default: 16384 Range: 8192 to 130048");
unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
-module_param(cifs_min_rcv, uint, 0);
+module_param(cifs_min_rcv, uint, 0444);
MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: "
"1 to 64");
unsigned int cifs_min_small = 30;
-module_param(cifs_min_small, uint, 0);
+module_param(cifs_min_small, uint, 0444);
MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
"Range: 2 to 256");
unsigned int cifs_max_pending = CIFS_MAX_REQ;
@@ -271,7 +271,7 @@ cifs_alloc_inode(struct super_block *sb)
cifs_inode->createtime = 0;
cifs_inode->epoch = 0;
#ifdef CONFIG_CIFS_SMB2
- get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
+ generate_random_uuid(cifs_inode->lease_key);
#endif
/*
* Can not set i_flags here - they get immediately overwritten to zero
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",posixpaths");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
seq_puts(s, ",setuids");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+ seq_puts(s, ",idsfromsid");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
seq_puts(s, ",serverino");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
@@ -1262,7 +1264,6 @@ init_cifs(void)
GlobalTotalActiveXid = 0;
GlobalMaxActiveXid = 0;
spin_lock_init(&cifs_tcp_ses_lock);
- spin_lock_init(&cifs_file_list_lock);
spin_lock_init(&GlobalMid_Lock);
get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8f1d8c1e72be..1f17f6bd7a60 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -75,6 +75,18 @@
#define SMB_ECHO_INTERVAL_MAX 600
#define SMB_ECHO_INTERVAL_DEFAULT 60
+/*
+ * Default number of credits to keep available for SMB3.
+ * This value is chosen somewhat arbitrarily. The Windows client
+ * defaults to 128 credits, the Windows server allows clients up to
+ * 512 credits (or 8K for later versions), and the NetApp server
+ * does not limit clients at all. Choose a high enough default value
+ * such that the client shouldn't limit performance, but allow mount
+ * to override (until you approach 64K, where we limit credits to 65000
+ * to reduce possibility of seeing more server credit overflow bugs.
+ */
+#define SMB2_MAX_CREDITS_AVAILABLE 32000
+
#include "cifspdu.h"
#ifndef XATTR_DOS_ATTRIB
@@ -376,6 +388,8 @@ struct smb_version_operations {
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
struct cifsFileInfo *src_file);
+ int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *src_file, void __user *);
int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const unsigned char *,
char *, unsigned int *);
@@ -464,6 +478,7 @@ struct smb_vol {
bool retry:1;
bool intr:1;
bool setuids:1;
+ bool setuidfromacl:1;
bool override_uid:1;
bool override_gid:1;
bool dynperm:1;
@@ -510,6 +525,7 @@ struct smb_vol {
struct sockaddr_storage srcaddr; /* allow binding to a local IP */
struct nls_table *local_nls;
unsigned int echo_interval; /* echo interval in secs */
+ unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
};
#define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -567,7 +583,8 @@ struct TCP_Server_Info {
bool noblocksnd; /* use blocking sendmsg */
bool noautotune; /* do not autotune send buf sizes */
bool tcp_nodelay;
- int credits; /* send no more requests at once */
+ unsigned int credits; /* send no more requests at once */
+ unsigned int max_credits; /* can override large 32000 default at mnt */
unsigned int in_flight; /* number of requests on the wire to server */
spinlock_t req_lock; /* protect the two values above */
struct mutex srv_mutex;
@@ -833,6 +850,7 @@ struct cifs_tcon {
struct list_head tcon_list;
int tc_count;
struct list_head openFileList;
+ spinlock_t open_file_lock; /* protects list above */
struct cifs_ses *ses; /* pointer to session associated with */
char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
char *nativeFileSystem;
@@ -889,7 +907,7 @@ struct cifs_tcon {
#endif /* CONFIG_CIFS_STATS2 */
__u64 bytes_read;
__u64 bytes_written;
- spinlock_t stat_lock;
+ spinlock_t stat_lock; /* protects the two fields above */
#endif /* CONFIG_CIFS_STATS */
FILE_SYSTEM_DEVICE_INFO fsDevInfo;
FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
@@ -1040,20 +1058,24 @@ struct cifs_fid_locks {
};
struct cifsFileInfo {
+ /* following two lists are protected by tcon->open_file_lock */
struct list_head tlist; /* pointer to next fid owned by tcon */
struct list_head flist; /* next fid (file instance) for this inode */
+ /* lock list below protected by cifsi->lock_sem */
struct cifs_fid_locks *llist; /* brlocks held by this fid */
kuid_t uid; /* allows finding which FileInfo structure */
__u32 pid; /* process id who opened file */
struct cifs_fid fid; /* file id from remote */
+ struct list_head rlist; /* reconnect list */
/* BB add lock scope info here if needed */ ;
/* lock scope id (0 if none) */
struct dentry *dentry;
- unsigned int f_flags;
struct tcon_link *tlink;
+ unsigned int f_flags;
bool invalidHandle:1; /* file closed via session abend */
bool oplock_break_cancelled:1;
- int count; /* refcount protected by cifs_file_list_lock */
+ int count;
+ spinlock_t file_info_lock; /* protects four flag/count fields above */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
@@ -1120,7 +1142,7 @@ struct cifs_writedata {
/*
* Take a reference on the file private data. Must be called with
- * cifs_file_list_lock held.
+ * cfile->file_info_lock held.
*/
static inline void
cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
@@ -1514,8 +1536,10 @@ require use of the stronger protocol */
* GlobalMid_Lock protects:
* list operations on pending_mid_q and oplockQ
* updates to XID counters, multiplex id and SMB sequence numbers
- * cifs_file_list_lock protects:
- * list operations on tcp and SMB session lists and tCon lists
+ * tcp_ses_lock protects:
+ * list operations on tcp and SMB session lists
+ * tcon->open_file_lock protects the list of open files hanging off the tcon
+ * cfile->file_info_lock protects counters and fields in cifs file struct
* f_owner.lock protects certain per file struct operations
* mapping->page_lock protects certain per page operations
*
@@ -1547,18 +1571,12 @@ GLOBAL_EXTERN struct list_head cifs_tcp_ses_list;
* tcp session, and the list of tcon's per smb session. It also protects
* the reference counters for the server, smb session, and tcon. Finally,
* changes to the tcon->tidStatus should be done while holding this lock.
+ * generally the locks should be taken in order tcp_ses_lock before
+ * tcon->open_file_lock and that before file->file_info_lock since the
+ * structure order is cifs_socket-->cifs_ses-->cifs_tcon-->cifs_file
*/
GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
-/*
- * This lock protects the cifs_file->llist and cifs_file->flist
- * list operations, and updates to some flags (cifs_file->invalidHandle)
- * It will be moved to either use the tcon->stat_lock or equivalent later.
- * If cifs_tcp_ses_lock and the lock below are both needed to be held, then
- * the cifs_tcp_ses_lock must be grabbed first and released last.
- */
-GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
-
#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
/* Outstanding dir notify requests */
GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4ead72a001f9..ced0e42ce460 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -193,6 +193,8 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
extern void cifs_umount(struct cifs_sb_info *);
extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
+extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
+
extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
__u64 length, __u8 type,
struct cifsLockInfo **conf_lock,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f82d2823622f..3f3185febc58 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -98,13 +98,13 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
struct list_head *tmp1;
/* list all files open on tree connection and mark them invalid */
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
open_file = list_entry(tmp, struct cifsFileInfo, tlist);
open_file->invalidHandle = true;
open_file->oplock_break_cancelled = true;
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* to this tcon.
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2e4f4bad8b1e..aab5227979e2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -63,7 +63,6 @@ extern mempool_t *cifs_req_poolp;
#define TLINK_IDLE_EXPIRE (600 * HZ)
enum {
-
/* Mount options that take no arguments */
Opt_user_xattr, Opt_nouser_xattr,
Opt_forceuid, Opt_noforceuid,
@@ -76,7 +75,7 @@ enum {
Opt_noposixpaths, Opt_nounix,
Opt_nocase,
Opt_brl, Opt_nobrl,
- Opt_forcemandatorylock, Opt_setuids,
+ Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
Opt_nohard, Opt_nosoft,
Opt_nointr, Opt_intr,
@@ -95,7 +94,7 @@ enum {
Opt_cruid, Opt_gid, Opt_file_mode,
Opt_dirmode, Opt_port,
Opt_rsize, Opt_wsize, Opt_actimeo,
- Opt_echo_interval,
+ Opt_echo_interval, Opt_max_credits,
/* Mount options which take string value */
Opt_user, Opt_pass, Opt_ip,
@@ -148,6 +147,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_forcemandatorylock, "forcemand" },
{ Opt_setuids, "setuids" },
{ Opt_nosetuids, "nosetuids" },
+ { Opt_setuidfromacl, "idsfromsid" },
{ Opt_dynperm, "dynperm" },
{ Opt_nodynperm, "nodynperm" },
{ Opt_nohard, "nohard" },
@@ -190,6 +190,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_wsize, "wsize=%s" },
{ Opt_actimeo, "actimeo=%s" },
{ Opt_echo_interval, "echo_interval=%s" },
+ { Opt_max_credits, "max_credits=%s" },
{ Opt_blank_user, "user=" },
{ Opt_blank_user, "username=" },
@@ -1376,6 +1377,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
case Opt_nosetuids:
vol->setuids = 0;
break;
+ case Opt_setuidfromacl:
+ vol->setuidfromacl = 1;
+ break;
case Opt_dynperm:
vol->dynperm = true;
break;
@@ -1586,6 +1590,15 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
}
vol->echo_interval = option;
break;
+ case Opt_max_credits:
+ if (get_option_ul(args, &option) || (option < 20) ||
+ (option > 60000)) {
+ cifs_dbg(VFS, "%s: Invalid max_credits value\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
+ vol->max_credits = option;
+ break;
/* String Arguments */
@@ -2163,7 +2176,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
sizeof(tcp_ses->dstaddr));
#ifdef CONFIG_CIFS_SMB2
- get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE);
+ generate_random_uuid(tcp_ses->client_guid);
#endif
/*
* at this point we are the only ones with the pointer
@@ -3270,6 +3283,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
if (pvolume_info->setuids)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
+ if (pvolume_info->setuidfromacl)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
if (pvolume_info->server_ino)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
if (pvolume_info->remap)
@@ -3598,7 +3613,11 @@ try_mount_again:
bdi_destroy(&cifs_sb->bdi);
goto out;
}
-
+ if ((volume_info->max_credits < 20) ||
+ (volume_info->max_credits > 60000))
+ server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
+ else
+ server->max_credits = volume_info->max_credits;
/* get a reference to a SMB session */
ses = cifs_get_smb_ses(server, volume_info);
if (IS_ERR(ses)) {
@@ -3688,14 +3707,16 @@ remote_path_check:
goto mount_fail_check;
}
- rc = cifs_are_all_path_components_accessible(server,
+ if (rc != -EREMOTE) {
+ rc = cifs_are_all_path_components_accessible(server,
xid, tcon, cifs_sb,
full_path);
- if (rc != 0) {
- cifs_dbg(VFS, "cannot query dirs between root and final path, "
- "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
- rc = 0;
+ if (rc != 0) {
+ cifs_dbg(VFS, "cannot query dirs between root and final path, "
+ "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ rc = 0;
+ }
}
kfree(full_path);
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a95fe8b1afe9..7f5f6176c6f1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -305,6 +305,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
mutex_init(&cfile->fh_mutex);
+ spin_lock_init(&cfile->file_info_lock);
cifs_sb_active(inode->i_sb);
@@ -317,7 +318,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
oplock = 0;
}
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
oplock = fid->pending_open->oplock;
list_del(&fid->pending_open->olist);
@@ -326,12 +327,13 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
server->ops->set_fid(cfile, fid, oplock);
list_add(&cfile->tlist, &tcon->openFileList);
+
/* if readable file instance put first in list*/
if (file->f_mode & FMODE_READ)
list_add(&cfile->flist, &cinode->openFileList);
else
list_add_tail(&cfile->flist, &cinode->openFileList);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
if (fid->purge_cache)
cifs_zap_mapping(inode);
@@ -343,16 +345,16 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct cifsFileInfo *
cifsFileInfo_get(struct cifsFileInfo *cifs_file)
{
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&cifs_file->file_info_lock);
cifsFileInfo_get_locked(cifs_file);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cifs_file->file_info_lock);
return cifs_file;
}
/*
* Release a reference on the file private data. This may involve closing
* the filehandle out on the server. Must be called without holding
- * cifs_file_list_lock.
+ * tcon->open_file_lock and cifs_file->file_info_lock.
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
@@ -367,11 +369,15 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
struct cifs_pending_open open;
bool oplock_break_cancelled;
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
+
+ spin_lock(&cifs_file->file_info_lock);
if (--cifs_file->count > 0) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cifs_file->file_info_lock);
+ spin_unlock(&tcon->open_file_lock);
return;
}
+ spin_unlock(&cifs_file->file_info_lock);
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &fid);
@@ -395,7 +401,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
cifs_set_oplock_level(cifsi, 0);
}
- spin_unlock(&cifs_file_list_lock);
+
+ spin_unlock(&tcon->open_file_lock);
oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
@@ -732,6 +739,15 @@ reopen_success:
* to the server to get the new inode info.
*/
+ /*
+ * If the server returned a read oplock and we have mandatory brlocks,
+ * set oplock level to None.
+ */
+ if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
+ cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
+ oplock = 0;
+ }
+
server->ops->set_fid(cfile, &cfile->fid, oplock);
if (oparms.reconnect)
cifs_relock_file(cfile);
@@ -753,6 +769,36 @@ int cifs_close(struct inode *inode, struct file *file)
return 0;
}
+void
+cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
+{
+ struct cifsFileInfo *open_file;
+ struct list_head *tmp;
+ struct list_head *tmp1;
+ struct list_head tmp_list;
+
+ cifs_dbg(FYI, "Reopen persistent handles");
+ INIT_LIST_HEAD(&tmp_list);
+
+ /* list all files open on tree connection, reopen resilient handles */
+ spin_lock(&tcon->open_file_lock);
+ list_for_each(tmp, &tcon->openFileList) {
+ open_file = list_entry(tmp, struct cifsFileInfo, tlist);
+ if (!open_file->invalidHandle)
+ continue;
+ cifsFileInfo_get(open_file);
+ list_add_tail(&open_file->rlist, &tmp_list);
+ }
+ spin_unlock(&tcon->open_file_lock);
+
+ list_for_each_safe(tmp, tmp1, &tmp_list) {
+ open_file = list_entry(tmp, struct cifsFileInfo, rlist);
+ cifs_reopen_file(open_file, false /* do not flush */);
+ list_del_init(&open_file->rlist);
+ cifsFileInfo_put(open_file);
+ }
+}
+
int cifs_closedir(struct inode *inode, struct file *file)
{
int rc = 0;
@@ -772,10 +818,10 @@ int cifs_closedir(struct inode *inode, struct file *file)
server = tcon->ses->server;
cifs_dbg(FYI, "Freeing private data in close dir\n");
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&cfile->file_info_lock);
if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
if (server->ops->close_dir)
rc = server->ops->close_dir(xid, tcon, &cfile->fid);
else
@@ -784,7 +830,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
/* not much we can do if it fails anyway, ignore rc */
rc = 0;
} else
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
buf = cfile->srch_inf.ntwrk_buf_start;
if (buf) {
@@ -1728,12 +1774,13 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
{
struct cifsFileInfo *open_file = NULL;
struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
/* we could simply get the first_list_entry since write-only entries
are always at the end of the list but since the first entry might
have a close pending, we go through the whole list */
@@ -1744,8 +1791,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
if (!open_file->invalidHandle) {
/* found a good file */
/* lock it so it will not be closed on us */
- cifsFileInfo_get_locked(open_file);
- spin_unlock(&cifs_file_list_lock);
+ cifsFileInfo_get(open_file);
+ spin_unlock(&tcon->open_file_lock);
return open_file;
} /* else might as well continue, and look for
another, or simply have the caller reopen it
@@ -1753,7 +1800,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
} else /* write only file */
break; /* write only files are last so must be done */
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return NULL;
}
@@ -1762,6 +1809,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
{
struct cifsFileInfo *open_file, *inv_file = NULL;
struct cifs_sb_info *cifs_sb;
+ struct cifs_tcon *tcon;
bool any_available = false;
int rc;
unsigned int refind = 0;
@@ -1777,15 +1825,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
}
cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
refind_writable:
if (refind > MAX_REOPEN_ATT) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return NULL;
}
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1796,8 +1845,8 @@ refind_writable:
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
if (!open_file->invalidHandle) {
/* found a good writable file */
- cifsFileInfo_get_locked(open_file);
- spin_unlock(&cifs_file_list_lock);
+ cifsFileInfo_get(open_file);
+ spin_unlock(&tcon->open_file_lock);
return open_file;
} else {
if (!inv_file)
@@ -1813,24 +1862,24 @@ refind_writable:
if (inv_file) {
any_available = false;
- cifsFileInfo_get_locked(inv_file);
+ cifsFileInfo_get(inv_file);
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
if (inv_file) {
rc = cifs_reopen_file(inv_file, false);
if (!rc)
return inv_file;
else {
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_move_tail(&inv_file->flist,
&cifs_inode->openFileList);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
cifsFileInfo_put(inv_file);
- spin_lock(&cifs_file_list_lock);
++refind;
inv_file = NULL;
+ spin_lock(&tcon->open_file_lock);
goto refind_writable;
}
}
@@ -3612,15 +3661,17 @@ static int cifs_readpage(struct file *file, struct page *page)
static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *open_file;
+ struct cifs_tcon *tcon =
+ cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return 1;
}
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return 0;
}
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 7a3b84e300f8..9f51b81119f2 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -189,7 +189,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
xid = get_xid();
cifs_sb = CIFS_SB(inode->i_sb);
-
+ cifs_dbg(VFS, "cifs ioctl 0x%x\n", command);
switch (command) {
case FS_IOC_GETFLAGS:
if (pSMBFile == NULL)
@@ -267,11 +267,23 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
tcon = tlink_tcon(pSMBFile->tlink);
rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
break;
+ case CIFS_ENUMERATE_SNAPSHOTS:
+ if (arg == 0) {
+ rc = -EINVAL;
+ goto cifs_ioc_exit;
+ }
+ tcon = tlink_tcon(pSMBFile->tlink);
+ if (tcon->ses->server->ops->enum_snapshots)
+ rc = tcon->ses->server->ops->enum_snapshots(xid, tcon,
+ pSMBFile, (void __user *)arg);
+ else
+ rc = -EOPNOTSUPP;
+ break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
break;
}
-
+cifs_ioc_exit:
free_xid(xid);
return rc;
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 813fe13c2ae1..c6729156f9a0 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -120,6 +120,7 @@ tconInfoAlloc(void)
++ret_buf->tc_count;
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
+ spin_lock_init(&ret_buf->open_file_lock);
#ifdef CONFIG_CIFS_STATS
spin_lock_init(&ret_buf->stat_lock);
#endif
@@ -465,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
continue;
cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each(tmp2, &tcon->openFileList) {
netfile = list_entry(tmp2, struct cifsFileInfo,
tlist);
@@ -495,11 +496,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
&netfile->oplock_break);
netfile->oplock_break_cancelled = false;
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "No matching file for oplock break\n");
return true;
@@ -613,9 +614,9 @@ backup_cred(struct cifs_sb_info *cifs_sb)
void
cifs_del_pending_open(struct cifs_pending_open *open)
{
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tlink_tcon(open->tlink)->open_file_lock);
list_del(&open->olist);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
void
@@ -635,7 +636,7 @@ void
cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
struct cifs_pending_open *open)
{
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tlink_tcon(tlink)->open_file_lock);
cifs_add_pending_open_locked(fid, tlink, open);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 65cf85dcda09..8f6a2a5863b9 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -597,14 +597,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
/* close and restart search */
cifs_dbg(FYI, "search backing up - close and restart search\n");
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&cfile->file_info_lock);
if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
if (server->ops->close_dir)
server->ops->close_dir(xid, tcon, &cfile->fid);
} else
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
if (cfile->srch_inf.ntwrk_buf_start) {
cifs_dbg(FYI, "freeing SMB ff cache buf on search rewind\n");
if (cfile->srch_inf.smallBuf)
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 4f0231e685a9..1238cd3552f9 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -266,9 +266,15 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
struct tcon_link *tlink;
int rc;
+ if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
+ (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+ (buf->Attributes == 0))
+ return 0; /* would be a no op, no sense sending this */
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
+
rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
SMB2_OP_SET_INFO);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 389fb9f8c84e..3d383489b9cf 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -549,19 +549,19 @@ smb2_is_valid_lease_break(char *buffer)
list_for_each(tmp1, &server->smb_ses_list) {
ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
- spin_lock(&cifs_file_list_lock);
list_for_each(tmp2, &ses->tcon_list) {
tcon = list_entry(tmp2, struct cifs_tcon,
tcon_list);
+ spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
&tcon->stats.cifs_stats.num_oplock_brks);
if (smb2_tcon_has_lease(tcon, rsp, lw)) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
+ spin_unlock(&tcon->open_file_lock);
}
- spin_unlock(&cifs_file_list_lock);
}
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -603,7 +603,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each(tmp2, &tcon->openFileList) {
cfile = list_entry(tmp2, struct cifsFileInfo,
tlist);
@@ -615,7 +615,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
cifs_dbg(FYI, "file id match, oplock break\n");
cinode = CIFS_I(d_inode(cfile->dentry));
-
+ spin_lock(&cfile->file_info_lock);
if (!CIFS_CACHE_WRITE(cinode) &&
rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
cfile->oplock_break_cancelled = true;
@@ -637,14 +637,14 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
clear_bit(
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
-
+ spin_unlock(&cfile->file_info_lock);
queue_work(cifsiod_wq, &cfile->oplock_break);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "No matching file for oplock break\n");
return true;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d203c0329626..5d456ebb3813 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -28,6 +28,7 @@
#include "cifs_unicode.h"
#include "smb2status.h"
#include "smb2glob.h"
+#include "cifs_ioctl.h"
static int
change_conf(struct TCP_Server_Info *server)
@@ -70,6 +71,10 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
spin_lock(&server->req_lock);
val = server->ops->get_credits_field(server, optype);
*val += add;
+ if (*val > 65000) {
+ *val = 65000; /* Don't get near 64K credits, avoid srv bugs */
+ printk_once(KERN_WARNING "server overflowed SMB3 credits\n");
+ }
server->in_flight--;
if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
rc = change_conf(server);
@@ -287,7 +292,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
cifs_dbg(FYI, "Link Speed %lld\n",
le64_to_cpu(out_buf->LinkSpeed));
}
-
+ kfree(out_buf);
return rc;
}
#endif /* STATS2 */
@@ -541,6 +546,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
server->ops->set_oplock_level(cinode, oplock, fid->epoch,
&fid->purge_cache);
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
+ memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}
static void
@@ -699,6 +705,7 @@ smb2_clone_range(const unsigned int xid,
cchunk_out:
kfree(pcchunk);
+ kfree(retbuf);
return rc;
}
@@ -823,7 +830,6 @@ smb2_duplicate_extents(const unsigned int xid,
{
int rc;
unsigned int ret_data_len;
- char *retbuf = NULL;
struct duplicate_extents_to_file dup_ext_buf;
struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
@@ -849,7 +855,7 @@ smb2_duplicate_extents(const unsigned int xid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
true /* is_fsctl */, (char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file),
- (char **)&retbuf,
+ NULL,
&ret_data_len);
if (ret_data_len > 0)
@@ -872,7 +878,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
struct fsctl_set_integrity_information_req integr_info;
- char *retbuf = NULL;
unsigned int ret_data_len;
integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
@@ -884,9 +889,53 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
FSCTL_SET_INTEGRITY_INFORMATION,
true /* is_fsctl */, (char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req),
+ NULL,
+ &ret_data_len);
+
+}
+
+static int
+smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile, void __user *ioc_buf)
+{
+ char *retbuf = NULL;
+ unsigned int ret_data_len = 0;
+ int rc;
+ struct smb_snapshot_array snapshot_in;
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid,
+ FSCTL_SRV_ENUMERATE_SNAPSHOTS,
+ true /* is_fsctl */, NULL, 0 /* no input data */,
(char **)&retbuf,
&ret_data_len);
+ cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
+ rc, ret_data_len);
+ if (rc)
+ return rc;
+ if (ret_data_len && (ioc_buf != NULL) && (retbuf != NULL)) {
+ /* Fixup buffer */
+ if (copy_from_user(&snapshot_in, ioc_buf,
+ sizeof(struct smb_snapshot_array))) {
+ rc = -EFAULT;
+ kfree(retbuf);
+ return rc;
+ }
+ if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
+ rc = -ERANGE;
+ return rc;
+ }
+
+ if (ret_data_len > snapshot_in.snapshot_array_size)
+ ret_data_len = snapshot_in.snapshot_array_size;
+
+ if (copy_to_user(ioc_buf, retbuf, ret_data_len))
+ rc = -EFAULT;
+ }
+
+ kfree(retbuf);
+ return rc;
}
static int
@@ -1041,7 +1090,7 @@ smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid)
static void
smb2_new_lease_key(struct cifs_fid *fid)
{
- get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
+ generate_random_uuid(fid->lease_key);
}
#define SMB2_SYMLINK_STRUCT_SIZE \
@@ -1654,6 +1703,7 @@ struct smb_version_operations smb21_operations = {
.clone_range = smb2_clone_range,
.wp_retry_size = smb2_wp_retry_size,
.dir_needs_close = smb2_dir_needs_close,
+ .enum_snapshots = smb3_enum_snapshots,
};
struct smb_version_operations smb30_operations = {
@@ -1740,6 +1790,7 @@ struct smb_version_operations smb30_operations = {
.wp_retry_size = smb2_wp_retry_size,
.dir_needs_close = smb2_dir_needs_close,
.fallocate = smb3_fallocate,
+ .enum_snapshots = smb3_enum_snapshots,
};
#ifdef CONFIG_CIFS_SMB311
@@ -1827,6 +1878,7 @@ struct smb_version_operations smb311_operations = {
.wp_retry_size = smb2_wp_retry_size,
.dir_needs_close = smb2_dir_needs_close,
.fallocate = smb3_fallocate,
+ .enum_snapshots = smb3_enum_snapshots,
};
#endif /* CIFS_SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 29e06db5f187..5ca5ea4668a1 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -100,7 +100,21 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
hdr->ProtocolId = SMB2_PROTO_NUMBER;
hdr->StructureSize = cpu_to_le16(64);
hdr->Command = smb2_cmd;
- hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
+ if (tcon && tcon->ses && tcon->ses->server) {
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ spin_lock(&server->req_lock);
+ /* Request up to 2 credits but don't go over the limit. */
+ if (server->credits >= server->max_credits)
+ hdr->CreditRequest = cpu_to_le16(0);
+ else
+ hdr->CreditRequest = cpu_to_le16(
+ min_t(int, server->max_credits -
+ server->credits, 2));
+ spin_unlock(&server->req_lock);
+ } else {
+ hdr->CreditRequest = cpu_to_le16(2);
+ }
hdr->ProcessId = cpu_to_le32((__u16)current->tgid);
if (!tcon)
@@ -236,8 +250,13 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
}
cifs_mark_open_files_invalid(tcon);
+
rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage);
mutex_unlock(&tcon->ses->session_mutex);
+
+ if (tcon->use_persistent)
+ cifs_reopen_persistent_handles(tcon);
+
cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
if (rc)
goto out;
@@ -574,59 +593,42 @@ vneg_out:
return -EIO;
}
-int
-SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *nls_cp)
+struct SMB2_sess_data {
+ unsigned int xid;
+ struct cifs_ses *ses;
+ struct nls_table *nls_cp;
+ void (*func)(struct SMB2_sess_data *);
+ int result;
+ u64 previous_session;
+
+ /* we will send the SMB in three pieces:
+ * a fixed length beginning part, an optional
+ * SPNEGO blob (which can be zero length), and a
+ * last part which will include the strings
+ * and rest of bcc area. This allows us to avoid
+ * a large buffer 17K allocation
+ */
+ int buf0_type;
+ struct kvec iov[2];
+};
+
+static int
+SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
{
+ int rc;
+ struct cifs_ses *ses = sess_data->ses;
struct smb2_sess_setup_req *req;
- struct smb2_sess_setup_rsp *rsp = NULL;
- struct kvec iov[2];
- int rc = 0;
- int resp_buftype = CIFS_NO_BUFFER;
- __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
struct TCP_Server_Info *server = ses->server;
- u16 blob_length = 0;
- struct key *spnego_key = NULL;
- char *security_blob = NULL;
- unsigned char *ntlmssp_blob = NULL;
- bool use_spnego = false; /* else use raw ntlmssp */
-
- cifs_dbg(FYI, "Session Setup\n");
-
- if (!server) {
- WARN(1, "%s: server is NULL!\n", __func__);
- return -EIO;
- }
-
- /*
- * If we are here due to reconnect, free per-smb session key
- * in case signing was required.
- */
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
-
- /*
- * If memory allocation is successful, caller of this function
- * frees it.
- */
- ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
- if (!ses->ntlmssp)
- return -ENOMEM;
- ses->ntlmssp->sesskey_per_smbsess = true;
-
- /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
- if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
- ses->sectype = RawNTLMSSP;
-
-ssetup_ntlmssp_authenticate:
- if (phase == NtLmChallenge)
- phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req);
if (rc)
return rc;
req->hdr.SessionId = 0; /* First session, not a reauthenticate */
+
+ /* if reconnect, we need to send previous sess id, otherwise it is 0 */
+ req->PreviousSessionId = sess_data->previous_session;
+
req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */
req->hdr.CreditRequest = cpu_to_le16(3);
@@ -642,199 +644,368 @@ ssetup_ntlmssp_authenticate:
req->Capabilities = 0;
req->Channel = 0; /* MBZ */
- iov[0].iov_base = (char *)req;
+ sess_data->iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and 1 for pad */
- iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ sess_data->iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+ /*
+ * This variable will be used to clear the buffer
+ * allocated above in case of any error in the calling function.
+ */
+ sess_data->buf0_type = CIFS_SMALL_BUFFER;
- if (ses->sectype == Kerberos) {
-#ifdef CONFIG_CIFS_UPCALL
- struct cifs_spnego_msg *msg;
+ return 0;
+}
- spnego_key = cifs_get_spnego_key(ses);
- if (IS_ERR(spnego_key)) {
- rc = PTR_ERR(spnego_key);
- spnego_key = NULL;
- goto ssetup_exit;
- }
+static void
+SMB2_sess_free_buffer(struct SMB2_sess_data *sess_data)
+{
+ free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+ sess_data->buf0_type = CIFS_NO_BUFFER;
+}
- msg = spnego_key->payload.data[0];
- /*
- * check version field to make sure that cifs.upcall is
- * sending us a response in an expected form
- */
- if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
- cifs_dbg(VFS,
- "bad cifs.upcall version. Expected %d got %d",
- CIFS_SPNEGO_UPCALL_VERSION, msg->version);
- rc = -EKEYREJECTED;
- goto ssetup_exit;
- }
- ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
- GFP_KERNEL);
- if (!ses->auth_key.response) {
- cifs_dbg(VFS,
- "Kerberos can't allocate (%u bytes) memory",
- msg->sesskey_len);
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- ses->auth_key.len = msg->sesskey_len;
- blob_length = msg->secblob_len;
- iov[1].iov_base = msg->data + msg->sesskey_len;
- iov[1].iov_len = blob_length;
-#else
- rc = -EOPNOTSUPP;
- goto ssetup_exit;
-#endif /* CONFIG_CIFS_UPCALL */
- } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */
- ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
- GFP_KERNEL);
- if (ntlmssp_blob == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
- if (use_spnego) {
- /* blob_length = build_spnego_ntlmssp_blob(
- &security_blob,
- sizeof(struct _NEGOTIATE_MESSAGE),
- ntlmssp_blob); */
- /* BB eventually need to add this */
- cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
- rc = -EOPNOTSUPP;
- kfree(ntlmssp_blob);
- goto ssetup_exit;
- } else {
- blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
- /* with raw NTLMSSP we don't encapsulate in SPNEGO */
- security_blob = ntlmssp_blob;
- }
- iov[1].iov_base = security_blob;
- iov[1].iov_len = blob_length;
- } else if (phase == NtLmAuthenticate) {
- req->hdr.SessionId = ses->Suid;
- rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
- nls_cp);
- if (rc) {
- cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
- rc);
- goto ssetup_exit; /* BB double check error handling */
- }
- if (use_spnego) {
- /* blob_length = build_spnego_ntlmssp_blob(
- &security_blob,
- blob_length,
- ntlmssp_blob); */
- cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
- rc = -EOPNOTSUPP;
- kfree(ntlmssp_blob);
- goto ssetup_exit;
- } else {
- security_blob = ntlmssp_blob;
- }
- iov[1].iov_base = security_blob;
- iov[1].iov_len = blob_length;
- } else {
- cifs_dbg(VFS, "illegal ntlmssp phase\n");
- rc = -EIO;
- goto ssetup_exit;
- }
+static int
+SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
+{
+ int rc;
+ struct smb2_sess_setup_req *req = sess_data->iov[0].iov_base;
/* Testing shows that buffer offset must be at location of Buffer[0] */
req->SecurityBufferOffset =
- cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
- 1 /* pad */ - 4 /* rfc1001 len */);
- req->SecurityBufferLength = cpu_to_le16(blob_length);
+ cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
+ 1 /* pad */ - 4 /* rfc1001 len */);
+ req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
- inc_rfc1001_len(req, blob_length - 1 /* pad */);
+ inc_rfc1001_len(req, sess_data->iov[1].iov_len - 1 /* pad */);
/* BB add code to build os and lm fields */
- rc = SendReceive2(xid, ses, iov, 2, &resp_buftype,
- CIFS_LOG_ERROR | CIFS_NEG_OP);
+ rc = SendReceive2(sess_data->xid, sess_data->ses,
+ sess_data->iov, 2,
+ &sess_data->buf0_type,
+ CIFS_LOG_ERROR | CIFS_NEG_OP);
- kfree(security_blob);
- rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
- ses->Suid = rsp->hdr.SessionId;
- if (resp_buftype != CIFS_NO_BUFFER &&
- rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
- if (phase != NtLmNegotiate) {
- cifs_dbg(VFS, "Unexpected more processing error\n");
- goto ssetup_exit;
- }
- if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
- le16_to_cpu(rsp->SecurityBufferOffset)) {
- cifs_dbg(VFS, "Invalid security buffer offset %d\n",
- le16_to_cpu(rsp->SecurityBufferOffset));
- rc = -EIO;
- goto ssetup_exit;
+ return rc;
+}
+
+static int
+SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
+{
+ int rc = 0;
+ struct cifs_ses *ses = sess_data->ses;
+
+ mutex_lock(&ses->server->srv_mutex);
+ if (ses->server->sign && ses->server->ops->generate_signingkey) {
+ rc = ses->server->ops->generate_signingkey(ses);
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ if (rc) {
+ cifs_dbg(FYI,
+ "SMB3 session key generation failed\n");
+ mutex_unlock(&ses->server->srv_mutex);
+ goto keygen_exit;
}
+ }
+ if (!ses->server->session_estab) {
+ ses->server->sequence_number = 0x2;
+ ses->server->session_estab = true;
+ }
+ mutex_unlock(&ses->server->srv_mutex);
+
+ cifs_dbg(FYI, "SMB2/3 session established successfully\n");
+ spin_lock(&GlobalMid_Lock);
+ ses->status = CifsGood;
+ ses->need_reconnect = false;
+ spin_unlock(&GlobalMid_Lock);
- /* NTLMSSP Negotiate sent now processing challenge (response) */
- phase = NtLmChallenge; /* process ntlmssp challenge */
- rc = 0; /* MORE_PROCESSING is not an error here but expected */
- rc = decode_ntlmssp_challenge(rsp->Buffer,
- le16_to_cpu(rsp->SecurityBufferLength), ses);
+keygen_exit:
+ if (!ses->server->sign) {
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ }
+ return rc;
+}
+
+#ifdef CONFIG_CIFS_UPCALL
+static void
+SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+{
+ int rc;
+ struct cifs_ses *ses = sess_data->ses;
+ struct cifs_spnego_msg *msg;
+ struct key *spnego_key = NULL;
+ struct smb2_sess_setup_rsp *rsp = NULL;
+
+ rc = SMB2_sess_alloc_buffer(sess_data);
+ if (rc)
+ goto out;
+
+ spnego_key = cifs_get_spnego_key(ses);
+ if (IS_ERR(spnego_key)) {
+ rc = PTR_ERR(spnego_key);
+ spnego_key = NULL;
+ goto out;
}
+ msg = spnego_key->payload.data[0];
/*
- * BB eventually add code for SPNEGO decoding of NtlmChallenge blob,
- * but at least the raw NTLMSSP case works.
+ * check version field to make sure that cifs.upcall is
+ * sending us a response in an expected form
*/
+ if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+ cifs_dbg(VFS,
+ "bad cifs.upcall version. Expected %d got %d",
+ CIFS_SPNEGO_UPCALL_VERSION, msg->version);
+ rc = -EKEYREJECTED;
+ goto out_put_spnego_key;
+ }
+
+ ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+ GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cifs_dbg(VFS,
+ "Kerberos can't allocate (%u bytes) memory",
+ msg->sesskey_len);
+ rc = -ENOMEM;
+ goto out_put_spnego_key;
+ }
+ ses->auth_key.len = msg->sesskey_len;
+
+ sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
+ sess_data->iov[1].iov_len = msg->secblob_len;
+
+ rc = SMB2_sess_sendreceive(sess_data);
+ if (rc)
+ goto out_put_spnego_key;
+
+ rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+ ses->Suid = rsp->hdr.SessionId;
+
+ ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+ if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+ cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
+
+ rc = SMB2_sess_establish_session(sess_data);
+out_put_spnego_key:
+ key_invalidate(spnego_key);
+ key_put(spnego_key);
+out:
+ sess_data->result = rc;
+ sess_data->func = NULL;
+ SMB2_sess_free_buffer(sess_data);
+}
+#else
+static void
+SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+{
+ cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
+ sess_data->result = -EOPNOTSUPP;
+ sess_data->func = NULL;
+}
+#endif
+
+static void
+SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data);
+
+static void
+SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
+{
+ int rc;
+ struct cifs_ses *ses = sess_data->ses;
+ struct smb2_sess_setup_rsp *rsp = NULL;
+ char *ntlmssp_blob = NULL;
+ bool use_spnego = false; /* else use raw ntlmssp */
+ u16 blob_length = 0;
+
/*
- * No tcon so can't do
- * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
+ * If memory allocation is successful, caller of this function
+ * frees it.
*/
- if (rc != 0)
- goto ssetup_exit;
+ ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+ if (!ses->ntlmssp) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ ses->ntlmssp->sesskey_per_smbsess = true;
+
+ rc = SMB2_sess_alloc_buffer(sess_data);
+ if (rc)
+ goto out_err;
+
+ ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
+ GFP_KERNEL);
+ if (ntlmssp_blob == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
+ if (use_spnego) {
+ /* BB eventually need to add this */
+ cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
+ rc = -EOPNOTSUPP;
+ goto out;
+ } else {
+ blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
+ /* with raw NTLMSSP we don't encapsulate in SPNEGO */
+ }
+ sess_data->iov[1].iov_base = ntlmssp_blob;
+ sess_data->iov[1].iov_len = blob_length;
+
+ rc = SMB2_sess_sendreceive(sess_data);
+ rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+
+ /* If true, rc here is expected and not an error */
+ if (sess_data->buf0_type != CIFS_NO_BUFFER &&
+ rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+ rc = 0;
+
+ if (rc)
+ goto out;
+
+ if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
+ le16_to_cpu(rsp->SecurityBufferOffset)) {
+ cifs_dbg(VFS, "Invalid security buffer offset %d\n",
+ le16_to_cpu(rsp->SecurityBufferOffset));
+ rc = -EIO;
+ goto out;
+ }
+ rc = decode_ntlmssp_challenge(rsp->Buffer,
+ le16_to_cpu(rsp->SecurityBufferLength), ses);
+ if (rc)
+ goto out;
+
+ cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+
+ ses->Suid = rsp->hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
-ssetup_exit:
- free_rsp_buf(resp_buftype, rsp);
-
- /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
- if ((phase == NtLmChallenge) && (rc == 0))
- goto ssetup_ntlmssp_authenticate;
+out:
+ kfree(ntlmssp_blob);
+ SMB2_sess_free_buffer(sess_data);
if (!rc) {
- mutex_lock(&server->srv_mutex);
- if (server->sign && server->ops->generate_signingkey) {
- rc = server->ops->generate_signingkey(ses);
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
- if (rc) {
- cifs_dbg(FYI,
- "SMB3 session key generation failed\n");
- mutex_unlock(&server->srv_mutex);
- goto keygen_exit;
- }
- }
- if (!server->session_estab) {
- server->sequence_number = 0x2;
- server->session_estab = true;
- }
- mutex_unlock(&server->srv_mutex);
-
- cifs_dbg(FYI, "SMB2/3 session established successfully\n");
- spin_lock(&GlobalMid_Lock);
- ses->status = CifsGood;
- ses->need_reconnect = false;
- spin_unlock(&GlobalMid_Lock);
+ sess_data->result = 0;
+ sess_data->func = SMB2_sess_auth_rawntlmssp_authenticate;
+ return;
}
+out_err:
+ kfree(ses->ntlmssp);
+ ses->ntlmssp = NULL;
+ sess_data->result = rc;
+ sess_data->func = NULL;
+}
-keygen_exit:
- if (!server->sign) {
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
+static void
+SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
+{
+ int rc;
+ struct cifs_ses *ses = sess_data->ses;
+ struct smb2_sess_setup_req *req;
+ struct smb2_sess_setup_rsp *rsp = NULL;
+ unsigned char *ntlmssp_blob = NULL;
+ bool use_spnego = false; /* else use raw ntlmssp */
+ u16 blob_length = 0;
+
+ rc = SMB2_sess_alloc_buffer(sess_data);
+ if (rc)
+ goto out;
+
+ req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
+ req->hdr.SessionId = ses->Suid;
+
+ rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
+ sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", rc);
+ goto out;
}
- if (spnego_key) {
- key_invalidate(spnego_key);
- key_put(spnego_key);
+
+ if (use_spnego) {
+ /* BB eventually need to add this */
+ cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
+ rc = -EOPNOTSUPP;
+ goto out;
}
+ sess_data->iov[1].iov_base = ntlmssp_blob;
+ sess_data->iov[1].iov_len = blob_length;
+
+ rc = SMB2_sess_sendreceive(sess_data);
+ if (rc)
+ goto out;
+
+ rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+
+ ses->Suid = rsp->hdr.SessionId;
+ ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+ if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+ cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
+
+ rc = SMB2_sess_establish_session(sess_data);
+out:
+ kfree(ntlmssp_blob);
+ SMB2_sess_free_buffer(sess_data);
kfree(ses->ntlmssp);
+ ses->ntlmssp = NULL;
+ sess_data->result = rc;
+ sess_data->func = NULL;
+}
+static int
+SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
+{
+ if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
+ ses->sectype = RawNTLMSSP;
+
+ switch (ses->sectype) {
+ case Kerberos:
+ sess_data->func = SMB2_auth_kerberos;
+ break;
+ case RawNTLMSSP:
+ sess_data->func = SMB2_sess_auth_rawntlmssp_negotiate;
+ break;
+ default:
+ cifs_dbg(VFS, "secType %d not supported!\n", ses->sectype);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int
+SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *nls_cp)
+{
+ int rc = 0;
+ struct TCP_Server_Info *server = ses->server;
+ struct SMB2_sess_data *sess_data;
+
+ cifs_dbg(FYI, "Session Setup\n");
+
+ if (!server) {
+ WARN(1, "%s: server is NULL!\n", __func__);
+ return -EIO;
+ }
+
+ sess_data = kzalloc(sizeof(struct SMB2_sess_data), GFP_KERNEL);
+ if (!sess_data)
+ return -ENOMEM;
+
+ rc = SMB2_select_sec(ses, sess_data);
+ if (rc)
+ goto out;
+ sess_data->xid = xid;
+ sess_data->ses = ses;
+ sess_data->buf0_type = CIFS_NO_BUFFER;
+ sess_data->nls_cp = (struct nls_table *) nls_cp;
+
+ while (sess_data->func)
+ sess_data->func(sess_data);
+
+ rc = sess_data->result;
+out:
+ kfree(sess_data);
return rc;
}
@@ -1164,7 +1335,7 @@ create_durable_v2_buf(struct cifs_fid *pfid)
buf->dcontext.Timeout = 0; /* Should this be configurable by workload */
buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
- get_random_bytes(buf->dcontext.CreateGuid, 16);
+ generate_random_uuid(buf->dcontext.CreateGuid);
memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
/* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
@@ -2057,6 +2228,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rdata->credits) {
buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
+ buf->CreditRequest = buf->CreditCharge;
spin_lock(&server->req_lock);
server->credits += rdata->credits -
le16_to_cpu(buf->CreditCharge);
@@ -2243,6 +2415,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (wdata->credits) {
req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
+ req->hdr.CreditRequest = req->hdr.CreditCharge;
spin_lock(&server->req_lock);
server->credits += wdata->credits -
le16_to_cpu(req->hdr.CreditCharge);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index ff88d9feb01e..fd3709e8de33 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -276,7 +276,7 @@ struct smb2_sess_setup_req {
__le32 Channel;
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
- __le64 PreviousSessionId;
+ __u64 PreviousSessionId;
__u8 Buffer[1]; /* variable length GSS security buffer */
} __packed;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 5e23f64c0804..20af5187ba63 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -33,7 +33,8 @@
#define MAX_EA_VALUE_SIZE 65535
#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
-
+#define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */
+#define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */
/* BB need to add server (Samba e.g) support for security and trusted prefix */
enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT };
@@ -144,6 +145,54 @@ out:
return rc;
}
+static int cifs_attrib_get(struct dentry *dentry,
+ struct inode *inode, void *value,
+ size_t size)
+{
+ ssize_t rc;
+ __u32 *pattribute;
+
+ rc = cifs_revalidate_dentry_attr(dentry);
+
+ if (rc)
+ return rc;
+
+ if ((value == NULL) || (size == 0))
+ return sizeof(__u32);
+ else if (size < sizeof(__u32))
+ return -ERANGE;
+
+ /* return dos attributes as pseudo xattr */
+ pattribute = (__u32 *)value;
+ *pattribute = CIFS_I(inode)->cifsAttrs;
+
+ return sizeof(__u32);
+}
+
+static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
+ void *value, size_t size)
+{
+ ssize_t rc;
+ __u64 * pcreatetime;
+
+ rc = cifs_revalidate_dentry_attr(dentry);
+ if (rc)
+ return rc;
+
+ if ((value == NULL) || (size == 0))
+ return sizeof(__u64);
+ else if (size < sizeof(__u64))
+ return -ERANGE;
+
+ /* return dos attributes as pseudo xattr */
+ pcreatetime = (__u64 *)value;
+ *pcreatetime = CIFS_I(inode)->createtime;
+ return sizeof(__u64);
+
+ return rc;
+}
+
+
static int cifs_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *value, size_t size)
@@ -168,10 +217,19 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
rc = -ENOMEM;
goto out;
}
- /* return dos attributes as pseudo xattr */
+
/* return alt name if available as pseudo attr */
switch (handler->flags) {
case XATTR_USER:
+ cifs_dbg(FYI, "%s:querying user xattr %s\n", __func__, name);
+ if (strcmp(name, CIFS_XATTR_ATTRIB) == 0) {
+ rc = cifs_attrib_get(dentry, inode, value, size);
+ break;
+ } else if (strcmp(name, CIFS_XATTR_CREATETIME) == 0) {
+ rc = cifs_creation_time_get(dentry, inode, value, size);
+ break;
+ }
+
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto out;
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 61057b7dbddb..98f87fe8f186 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -151,7 +151,10 @@ static int do_page_crypto(struct inode *inode,
struct page *src_page, struct page *dest_page,
gfp_t gfp_flags)
{
- u8 xts_tweak[FS_XTS_TWEAK_SIZE];
+ struct {
+ __le64 index;
+ u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)];
+ } xts_tweak;
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
@@ -171,17 +174,15 @@ static int do_page_crypto(struct inode *inode,
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
page_crypt_complete, &ecr);
- BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
- memcpy(xts_tweak, &index, sizeof(index));
- memset(&xts_tweak[sizeof(index)], 0,
- FS_XTS_TWEAK_SIZE - sizeof(index));
+ BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
+ xts_tweak.index = cpu_to_le64(index);
+ memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_SIZE, 0);
- skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
- xts_tweak);
+ skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak);
if (rw == FS_DECRYPT)
res = crypto_skcipher_decrypt(req);
else
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index ed115acb5dee..6865663aac69 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -109,6 +109,8 @@ int fscrypt_process_policy(struct file *filp,
if (ret)
return ret;
+ inode_lock(inode);
+
if (!inode_has_encryption_context(inode)) {
if (!S_ISDIR(inode->i_mode))
ret = -EINVAL;
@@ -127,6 +129,8 @@ int fscrypt_process_policy(struct file *filp,
ret = -EINVAL;
}
+ inode_unlock(inode);
+
mnt_drop_write_file(filp);
return ret;
}
diff --git a/fs/exec.c b/fs/exec.c
index 6fcfb3f7b137..4e497b9ee71e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
struct page *page;
int ret;
+ unsigned int gup_flags = FOLL_FORCE;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -199,12 +200,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return NULL;
}
#endif
+
+ if (write)
+ gup_flags |= FOLL_WRITE;
+
/*
* We are doing an exec(). 'current' is the process
* doing the exec and bprm->mm is the new process's mm.
*/
- ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
- 1, &page, NULL);
+ ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
+ &page, NULL);
if (ret <= 0)
return NULL;
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 79101651fe9e..42f9a0a0c4ca 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -137,7 +137,7 @@ Espan:
bad_entry:
EXOFS_ERR(
"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
- "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
+ "offset=%lu, inode=0x%llx, rec_len=%d, name_len=%d\n",
dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
_LLU(le64_to_cpu(p->inode_no)),
rec_len, p->name_len);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d831e24dc885..41b8b44a391c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -622,7 +622,7 @@ static int ext2_get_blocks(struct inode *inode,
u32 *bno, bool *new, bool *boundary,
int create)
{
- int err = -EIO;
+ int err;
int offsets[4];
Indirect chain[4];
Indirect *partial;
@@ -639,7 +639,7 @@ static int ext2_get_blocks(struct inode *inode,
depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (depth == 0)
- return (err);
+ return -EIO;
partial = ext2_get_branch(inode, depth, offsets, chain, &err);
/* Simplest case - block found, no allocation needed */
@@ -761,7 +761,6 @@ static int ext2_get_blocks(struct inode *inode,
ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
mutex_unlock(&ei->truncate_mutex);
got_it:
- *bno = le32_to_cpu(chain[depth-1].key);
if (count > blocks_to_boundary)
*boundary = true;
err = count;
@@ -772,6 +771,8 @@ cleanup:
brelse(partial->bh);
partial--;
}
+ if (err > 0)
+ *bno = le32_to_cpu(chain[depth-1].key);
return err;
}
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 02ddec6d8a7d..fdb19543af1e 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -128,12 +128,12 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
node = rb_first(&sbi->system_blks);
while (node) {
entry = rb_entry(node, struct ext4_system_zone, node);
- printk("%s%llu-%llu", first ? "" : ", ",
+ printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
entry->start_blk, entry->start_blk + entry->count - 1);
first = 0;
node = rb_next(node);
}
- printk("\n");
+ printk(KERN_CONT "\n");
}
int ext4_setup_system_zone(struct super_block *sb)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 3ef1df6ae9ec..1aba469f8220 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -27,16 +27,15 @@
#ifdef CONFIG_EXT4_DEBUG
extern ushort ext4_mballoc_debug;
-#define mb_debug(n, fmt, a...) \
- do { \
- if ((n) <= ext4_mballoc_debug) { \
- printk(KERN_DEBUG "(%s, %d): %s: ", \
- __FILE__, __LINE__, __func__); \
- printk(fmt, ## a); \
- } \
- } while (0)
+#define mb_debug(n, fmt, ...) \
+do { \
+ if ((n) <= ext4_mballoc_debug) { \
+ printk(KERN_DEBUG "(%s, %d): %s: " fmt, \
+ __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
+ } \
+} while (0)
#else
-#define mb_debug(n, fmt, a...) no_printk(fmt, ## a)
+#define mb_debug(n, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f92f10d4f66a..104f8bfba718 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -577,12 +577,13 @@ static inline unsigned dx_node_limit(struct inode *dir)
static void dx_show_index(char * label, struct dx_entry *entries)
{
int i, n = dx_get_count (entries);
- printk(KERN_DEBUG "%s index ", label);
+ printk(KERN_DEBUG "%s index", label);
for (i = 0; i < n; i++) {
- printk("%x->%lu ", i ? dx_get_hash(entries + i) :
- 0, (unsigned long)dx_get_block(entries + i));
+ printk(KERN_CONT " %x->%lu",
+ i ? dx_get_hash(entries + i) : 0,
+ (unsigned long)dx_get_block(entries + i));
}
- printk("\n");
+ printk(KERN_CONT "\n");
}
struct stats
@@ -679,7 +680,7 @@ static struct stats dx_show_leaf(struct inode *dir,
}
de = ext4_next_entry(de, size);
}
- printk("(%i)\n", names);
+ printk(KERN_CONT "(%i)\n", names);
return (struct stats) { names, space, 1 };
}
@@ -798,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
q = entries + count - 1;
while (p <= q) {
m = p + (q - p) / 2;
- dxtrace(printk("."));
+ dxtrace(printk(KERN_CONT "."));
if (dx_get_hash(m) > hash)
q = m - 1;
else
@@ -810,7 +811,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
at = entries;
while (n--)
{
- dxtrace(printk(","));
+ dxtrace(printk(KERN_CONT ","));
if (dx_get_hash(++at) > hash)
{
at--;
@@ -821,7 +822,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
}
at = p - 1;
- dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at),
+ dxtrace(printk(KERN_CONT " %x->%u\n",
+ at == entries ? 0 : dx_get_hash(at),
dx_get_block(at)));
frame->entries = entries;
frame->at = at;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6db81fbcbaa6..20da99da0a34 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -597,14 +597,15 @@ void __ext4_std_error(struct super_block *sb, const char *function,
void __ext4_abort(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
save_error_info(sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
- function, line);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
+ sb->s_id, function, line, &vaf);
va_end(args);
if ((sb->s_flags & MS_RDONLY) == 0) {
@@ -2715,12 +2716,12 @@ static void print_daily_error_info(unsigned long arg)
es->s_first_error_func,
le32_to_cpu(es->s_first_error_line));
if (es->s_first_error_ino)
- printk(": inode %u",
+ printk(KERN_CONT ": inode %u",
le32_to_cpu(es->s_first_error_ino));
if (es->s_first_error_block)
- printk(": block %llu", (unsigned long long)
+ printk(KERN_CONT ": block %llu", (unsigned long long)
le64_to_cpu(es->s_first_error_block));
- printk("\n");
+ printk(KERN_CONT "\n");
}
if (es->s_last_error_time) {
printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
@@ -2729,12 +2730,12 @@ static void print_daily_error_info(unsigned long arg)
es->s_last_error_func,
le32_to_cpu(es->s_last_error_line));
if (es->s_last_error_ino)
- printk(": inode %u",
+ printk(KERN_CONT ": inode %u",
le32_to_cpu(es->s_last_error_ino));
if (es->s_last_error_block)
- printk(": block %llu", (unsigned long long)
+ printk(KERN_CONT ": block %llu", (unsigned long long)
le64_to_cpu(es->s_last_error_block));
- printk("\n");
+ printk(KERN_CONT "\n");
}
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 73bcfd41f5f2..42145be5c6b4 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -223,14 +223,18 @@ static struct attribute *ext4_attrs[] = {
EXT4_ATTR_FEATURE(lazy_itable_init);
EXT4_ATTR_FEATURE(batched_discard);
EXT4_ATTR_FEATURE(meta_bg_resize);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
EXT4_ATTR_FEATURE(encryption);
+#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
ATTR_LIST(batched_discard),
ATTR_LIST(meta_bg_resize),
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
ATTR_LIST(encryption),
+#endif
ATTR_LIST(metadata_csum_seed),
NULL,
};
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c15d63389957..d77be9e9f535 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -61,18 +61,12 @@
#include "acl.h"
#ifdef EXT4_XATTR_DEBUG
-# define ea_idebug(inode, f...) do { \
- printk(KERN_DEBUG "inode %s:%lu: ", \
- inode->i_sb->s_id, inode->i_ino); \
- printk(f); \
- printk("\n"); \
- } while (0)
-# define ea_bdebug(bh, f...) do { \
- printk(KERN_DEBUG "block %pg:%lu: ", \
- bh->b_bdev, (unsigned long) bh->b_blocknr); \
- printk(f); \
- printk("\n"); \
- } while (0)
+# define ea_idebug(inode, fmt, ...) \
+ printk(KERN_DEBUG "inode %s:%lu: " fmt "\n", \
+ inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__)
+# define ea_bdebug(bh, fmt, ...) \
+ printk(KERN_DEBUG "block %pg:%lu: " fmt "\n", \
+ bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__)
#else
# define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
@@ -241,7 +235,7 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
int error = -EFSCORRUPTED;
if (((void *) header >= end) ||
- (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
+ (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
goto errout;
error = ext4_xattr_check_names(entry, end, entry);
errout:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 93985c64d8a8..6f14ee923acd 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -852,16 +852,16 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
for (segno = start_segno; segno < end_segno; segno++) {
- if (get_valid_blocks(sbi, segno, 1) == 0 ||
- unlikely(f2fs_cp_error(sbi)))
- goto next;
-
/* find segment summary of victim */
sum_page = find_get_page(META_MAPPING(sbi),
GET_SUM_BLOCK(sbi, segno));
- f2fs_bug_on(sbi, !PageUptodate(sum_page));
f2fs_put_page(sum_page, 0);
+ if (get_valid_blocks(sbi, segno, 1) == 0 ||
+ !PageUptodate(sum_page) ||
+ unlikely(f2fs_cp_error(sbi)))
+ goto next;
+
sum = page_address(sum_page);
f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
diff --git a/fs/iomap.c b/fs/iomap.c
index 013d1d36fbbf..a8ee8c33ca78 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct page *page = data;
int ret;
- ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
- NULL, iomap);
+ ret = __block_write_begin_int(page, pos, length, NULL, iomap);
if (ret)
return ret;
@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
}
while (len > 0) {
- ret = iomap_apply(inode, start, len, 0, ops, &ctx,
+ ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
iomap_fiemap_actor);
/* inode with no (attribute) mapping will give ENOENT */
if (ret == -ENOENT)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ad0c745ebad7..871c8b392099 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -687,6 +687,11 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
pri_bh = NULL;
root_found:
+ /* We don't support read-write mounts */
+ if (!(s->s_flags & MS_RDONLY)) {
+ error = -EACCES;
+ goto out_freebh;
+ }
if (joliet_level && (pri == NULL || !opt.rock)) {
/* This is the case of Joliet with the norock mount flag.
@@ -1501,9 +1506,6 @@ struct inode *__isofs_iget(struct super_block *sb,
static struct dentry *isofs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- /* We don't support read-write mounts */
- if (!(flags & MS_RDONLY))
- return ERR_PTR(-EACCES);
return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3d8246a9faa4..e1652665bd93 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1149,6 +1149,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+ spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction == journal->j_committing_transaction) {
/* first access by this transaction */
jh->b_modified = 0;
@@ -1156,8 +1157,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "set next transaction");
spin_lock(&journal->j_list_lock);
jh->b_next_transaction = transaction;
+ spin_unlock(&journal->j_list_lock);
}
- spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
/*
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index dcd96aac02f5..cf4c636ff4da 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -110,8 +110,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
* kn_to: /n1/n2/n3 [depth=3]
* result: /../..
*
- * return value: length of the string. If greater than buflen,
- * then contents of buf are undefined. On error, -1 is returned.
+ * Returns the length of the full path. If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'. On error, -errno is returned.
*/
static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
struct kernfs_node *kn_from,
@@ -119,9 +120,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
{
struct kernfs_node *kn, *common;
const char parent_str[] = "/..";
- size_t depth_from, depth_to, len = 0, nlen = 0;
- char *p;
- int i;
+ size_t depth_from, depth_to, len = 0;
+ int i, j;
if (!kn_from)
kn_from = kernfs_root(kn_to)->kn;
@@ -131,7 +131,7 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
common = kernfs_common_ancestor(kn_from, kn_to);
if (WARN_ON(!common))
- return -1;
+ return -EINVAL;
depth_to = kernfs_depth(common, kn_to);
depth_from = kernfs_depth(common, kn_from);
@@ -144,22 +144,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
len < buflen ? buflen - len : 0);
/* Calculate how many bytes we need for the rest */
- for (kn = kn_to; kn != common; kn = kn->parent)
- nlen += strlen(kn->name) + 1;
-
- if (len + nlen >= buflen)
- return len + nlen;
-
- p = buf + len + nlen;
- *p = '\0';
- for (kn = kn_to; kn != common; kn = kn->parent) {
- size_t tmp = strlen(kn->name);
- p -= tmp;
- memcpy(p, kn->name, tmp);
- *(--p) = '/';
+ for (i = depth_to - 1; i >= 0; i--) {
+ for (kn = kn_to, j = 0; j < i; j++)
+ kn = kn->parent;
+ len += strlcpy(buf + len, "/",
+ len < buflen ? buflen - len : 0);
+ len += strlcpy(buf + len, kn->name,
+ len < buflen ? buflen - len : 0);
}
- return len + nlen;
+ return len;
}
/**
@@ -186,29 +180,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
}
/**
- * kernfs_path_len - determine the length of the full path of a given node
- * @kn: kernfs_node of interest
- *
- * The returned length doesn't include the space for the terminating '\0'.
- */
-size_t kernfs_path_len(struct kernfs_node *kn)
-{
- size_t len = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&kernfs_rename_lock, flags);
-
- do {
- len += strlen(kn->name) + 1;
- kn = kn->parent;
- } while (kn && kn->parent);
-
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
-
- return len;
-}
-
-/**
* kernfs_path_from_node - build path of node @to relative to @from.
* @from: parent kernfs_node relative to which we need to build the path
* @to: kernfs_node of interest
@@ -220,8 +191,9 @@ size_t kernfs_path_len(struct kernfs_node *kn)
* path (which includes '..'s) as needed to reach from @from to @to is
* returned.
*
- * If @buf isn't long enough, the return value will be greater than @buflen
- * and @buf contents are undefined.
+ * Returns the length of the full path. If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'. On error, -errno is returned.
*/
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
char *buf, size_t buflen)
@@ -237,28 +209,6 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
EXPORT_SYMBOL_GPL(kernfs_path_from_node);
/**
- * kernfs_path - build full path of a given node
- * @kn: kernfs_node of interest
- * @buf: buffer to copy @kn's name into
- * @buflen: size of @buf
- *
- * Builds and returns the full path of @kn in @buf of @buflen bytes. The
- * path is built from the end of @buf so the returned pointer usually
- * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated
- * and %NULL is returned.
- */
-char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
-{
- int ret;
-
- ret = kernfs_path_from_node(kn, NULL, buf, buflen);
- if (ret < 0 || ret >= buflen)
- return NULL;
- return buf;
-}
-EXPORT_SYMBOL_GPL(kernfs_path);
-
-/**
* pr_cont_kernfs_name - pr_cont name of a kernfs_node
* @kn: kernfs_node of interest
*
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 2bcb86e6e6ca..78219d5644e9 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -911,6 +911,7 @@ const struct file_operations kernfs_file_fops = {
.open = kernfs_fop_open,
.release = kernfs_fop_release,
.poll = kernfs_fop_poll,
+ .fsync = noop_fsync,
};
/**
diff --git a/fs/locks.c b/fs/locks.c
index ce93b416b490..22c5b4aa4961 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1609,6 +1609,7 @@ int fcntl_getlease(struct file *filp)
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
@@ -1618,6 +1619,8 @@ int fcntl_getlease(struct file *filp)
break;
}
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
+
locks_dispose_list(&dispose);
}
return type;
@@ -2529,11 +2532,14 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
if (list_empty(&ctx->flc_lease))
return;
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
if (filp == fl->fl_file)
lease_modify(fl, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
+
locks_dispose_list(&dispose);
}
diff --git a/fs/namei.c b/fs/namei.c
index a7f601cd521a..5b4eed221530 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4668,6 +4668,31 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
}
EXPORT_SYMBOL(generic_readlink);
+/**
+ * vfs_get_link - get symlink body
+ * @dentry: dentry on which to get symbolic link
+ * @done: caller needs to free returned data with this
+ *
+ * Calls security hook and i_op->get_link() on the supplied inode.
+ *
+ * It does not touch atime. That's up to the caller if necessary.
+ *
+ * Does not work on "special" symlinks like /proc/$$/fd/N
+ */
+const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
+{
+ const char *res = ERR_PTR(-EINVAL);
+ struct inode *inode = d_inode(dentry);
+
+ if (d_is_symlink(dentry)) {
+ res = ERR_PTR(security_inode_readlink(dentry));
+ if (!res)
+ res = inode->i_op->get_link(dentry, inode, done);
+ }
+ return res;
+}
+EXPORT_SYMBOL(vfs_get_link);
+
/* get the link contents into pagecache */
const char *page_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
diff --git a/fs/namespace.c b/fs/namespace.c
index 58aca9c931ac..e6c234b1a645 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2824,6 +2824,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
return new_ns;
}
+__latent_entropy
struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
struct user_namespace *user_ns, struct fs_struct *new_fs)
{
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 217847679f0e..2905479f214a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -344,9 +344,10 @@ static void bl_write_cleanup(struct work_struct *work)
u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
u64 end = (hdr->args.offset + hdr->args.count +
PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
+ u64 lwb = hdr->args.offset + hdr->args.count;
ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
- (end - start) >> SECTOR_SHIFT, end);
+ (end - start) >> SECTOR_SHIFT, lwb);
}
pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ad917bd72b38..7897826d7c51 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1545,7 +1545,7 @@ static int update_open_stateid(struct nfs4_state *state,
struct nfs_client *clp = server->nfs_client;
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_delegation *deleg_cur;
- nfs4_stateid freeme = {0};
+ nfs4_stateid freeme = { };
int ret = 0;
fmode &= (FMODE_READ|FMODE_WRITE);
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index 1e8fe844e69f..5355efba4bc8 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -73,7 +73,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
}
}
- dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+ orangefs_set_timeout(dentry);
ret = 1;
out_release_op:
op_release(new_op);
@@ -94,8 +94,9 @@ out_drop:
static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
int ret;
+ unsigned long time = (unsigned long) dentry->d_fsdata;
- if (time_before(jiffies, dentry->d_time))
+ if (time_before(jiffies, time))
return 1;
if (flags & LOOKUP_RCU)
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 66ea0cc37b18..02cc6139ec90 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -621,9 +621,9 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
* readahead cache (if any); this forces an expensive refresh of
* data for the next caller of mmap (or 'get_block' accesses)
*/
- if (file->f_path.dentry->d_inode &&
- file->f_path.dentry->d_inode->i_mapping &&
- mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) {
+ if (file_inode(file) &&
+ file_inode(file)->i_mapping &&
+ mapping_nrpages(&file_inode(file)->i_data)) {
if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
gossip_debug(GOSSIP_INODE_DEBUG,
"calling flush_racache on %pU\n",
@@ -632,7 +632,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
gossip_debug(GOSSIP_INODE_DEBUG,
"flush_racache finished\n");
}
- truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+ truncate_inode_pages(file_inode(file)->i_mapping,
0);
}
return 0;
@@ -648,7 +648,7 @@ static int orangefs_fsync(struct file *file,
{
int ret = -EINVAL;
struct orangefs_inode_s *orangefs_inode =
- ORANGEFS_I(file->f_path.dentry->d_inode);
+ ORANGEFS_I(file_inode(file));
struct orangefs_kernel_op_s *new_op = NULL;
/* required call */
@@ -661,7 +661,7 @@ static int orangefs_fsync(struct file *file,
ret = service_operation(new_op,
"orangefs_fsync",
- get_interruptible_flag(file->f_path.dentry->d_inode));
+ get_interruptible_flag(file_inode(file)));
gossip_debug(GOSSIP_FILE_DEBUG,
"orangefs_fsync got return value of %d\n",
@@ -669,7 +669,7 @@ static int orangefs_fsync(struct file *file,
op_release(new_op);
- orangefs_flush_inode(file->f_path.dentry->d_inode);
+ orangefs_flush_inode(file_inode(file));
return ret;
}
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index d15d3d2dba62..a290ff6ec756 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -72,7 +72,7 @@ static int orangefs_create(struct inode *dir,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+ orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
@@ -183,7 +183,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
- dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+ orangefs_set_timeout(dentry);
inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
if (IS_ERR(inode)) {
@@ -322,7 +322,7 @@ static int orangefs_symlink(struct inode *dir,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+ orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
@@ -386,7 +386,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
d_instantiate(dentry, inode);
unlock_new_inode(inode);
- dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+ orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 0a82048f3aaf..3bf803d732c5 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -580,4 +580,11 @@ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
#endif
}
+static inline void orangefs_set_timeout(struct dentry *dentry)
+{
+ unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+
+ dentry->d_fsdata = (void *) time;
+}
+
#endif /* __ORANGEFSKERNEL_H */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 3f803b3a1f82..aeb60f791418 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -57,6 +57,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
int uninitialized_var(error);
+ size_t slen;
if (!(old->d_inode->i_opflags & IOP_XATTR) ||
!(new->d_inode->i_opflags & IOP_XATTR))
@@ -79,7 +80,16 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
goto out;
}
- for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+ for (name = buf; list_size; name += slen) {
+ slen = strnlen(name, list_size) + 1;
+
+ /* underlying fs providing us with an broken xattr list? */
+ if (WARN_ON(slen > list_size)) {
+ error = -EIO;
+ break;
+ }
+ list_size -= slen;
+
if (ovl_is_private_xattr(name))
continue;
retry:
@@ -174,40 +184,6 @@ out_fput:
return error;
}
-static char *ovl_read_symlink(struct dentry *realdentry)
-{
- int res;
- char *buf;
- struct inode *inode = realdentry->d_inode;
- mm_segment_t old_fs;
-
- res = -EINVAL;
- if (!inode->i_op->readlink)
- goto err;
-
- res = -ENOMEM;
- buf = (char *) __get_free_page(GFP_KERNEL);
- if (!buf)
- goto err;
-
- old_fs = get_fs();
- set_fs(get_ds());
- /* The cast to a user pointer is valid due to the set_fs() */
- res = inode->i_op->readlink(realdentry,
- (char __user *)buf, PAGE_SIZE - 1);
- set_fs(old_fs);
- if (res < 0) {
- free_page((unsigned long) buf);
- goto err;
- }
- buf[res] = '\0';
-
- return buf;
-
-err:
- return ERR_PTR(res);
-}
-
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
@@ -354,19 +330,20 @@ out_cleanup:
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct path *lowerpath, struct kstat *stat)
{
+ DEFINE_DELAYED_CALL(done);
struct dentry *workdir = ovl_workdir(dentry);
int err;
struct kstat pstat;
struct path parentpath;
+ struct dentry *lowerdentry = lowerpath->dentry;
struct dentry *upperdir;
struct dentry *upperdentry;
- const struct cred *old_cred;
- char *link = NULL;
+ const char *link = NULL;
if (WARN_ON(!workdir))
return -EROFS;
- ovl_do_check_copy_up(lowerpath->dentry);
+ ovl_do_check_copy_up(lowerdentry);
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
@@ -376,13 +353,11 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return err;
if (S_ISLNK(stat->mode)) {
- link = ovl_read_symlink(lowerpath->dentry);
+ link = vfs_get_link(lowerdentry, &done);
if (IS_ERR(link))
return PTR_ERR(link);
}
- old_cred = ovl_override_creds(dentry->d_sb);
-
err = -EIO;
if (lock_rename(workdir, upperdir) != NULL) {
pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -403,19 +378,16 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
}
out_unlock:
unlock_rename(workdir, upperdir);
- revert_creds(old_cred);
-
- if (link)
- free_page((unsigned long) link);
+ do_delayed_call(&done);
return err;
}
int ovl_copy_up(struct dentry *dentry)
{
- int err;
+ int err = 0;
+ const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
- err = 0;
while (!err) {
struct dentry *next;
struct dentry *parent;
@@ -447,6 +419,7 @@ int ovl_copy_up(struct dentry *dentry)
dput(parent);
dput(next);
}
+ revert_creds(old_cred);
return err;
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 5f90ddf778ba..306b6c161840 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -14,6 +14,7 @@
#include <linux/cred.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/atomic.h>
#include "overlayfs.h"
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -37,8 +38,10 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
{
struct dentry *temp;
char name[20];
+ static atomic_t temp_id = ATOMIC_INIT(0);
- snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
+ /* counter is allowed to wrap, since temp dentries are ephemeral */
+ snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
temp = lookup_one_len(name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index c18d6a4ff456..c58f01babf30 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -19,6 +19,7 @@ static int ovl_copy_up_truncate(struct dentry *dentry)
struct dentry *parent;
struct kstat stat;
struct path lowerpath;
+ const struct cred *old_cred;
parent = dget_parent(dentry);
err = ovl_copy_up(parent);
@@ -26,12 +27,14 @@ static int ovl_copy_up_truncate(struct dentry *dentry)
goto out_dput_parent;
ovl_path_lower(dentry, &lowerpath);
- err = vfs_getattr(&lowerpath, &stat);
- if (err)
- goto out_dput_parent;
- stat.size = 0;
- err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
+ old_cred = ovl_override_creds(dentry->d_sb);
+ err = vfs_getattr(&lowerpath, &stat);
+ if (!err) {
+ stat.size = 0;
+ err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
+ }
+ revert_creds(old_cred);
out_dput_parent:
dput(parent);
@@ -153,45 +156,18 @@ static const char *ovl_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct dentry *realdentry;
- struct inode *realinode;
const struct cred *old_cred;
const char *p;
if (!dentry)
return ERR_PTR(-ECHILD);
- realdentry = ovl_dentry_real(dentry);
- realinode = realdentry->d_inode;
-
- if (WARN_ON(!realinode->i_op->get_link))
- return ERR_PTR(-EPERM);
-
old_cred = ovl_override_creds(dentry->d_sb);
- p = realinode->i_op->get_link(realdentry, realinode, done);
+ p = vfs_get_link(ovl_dentry_real(dentry), done);
revert_creds(old_cred);
return p;
}
-static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
-{
- struct path realpath;
- struct inode *realinode;
- const struct cred *old_cred;
- int err;
-
- ovl_path_real(dentry, &realpath);
- realinode = realpath.dentry->d_inode;
-
- if (!realinode->i_op->readlink)
- return -EINVAL;
-
- old_cred = ovl_override_creds(dentry->d_sb);
- err = realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
- revert_creds(old_cred);
- return err;
-}
-
bool ovl_is_private_xattr(const char *name)
{
return strncmp(name, OVL_XATTR_PREFIX,
@@ -375,7 +351,7 @@ static const struct inode_operations ovl_file_inode_operations = {
static const struct inode_operations ovl_symlink_inode_operations = {
.setattr = ovl_setattr,
.get_link = ovl_get_link,
- .readlink = ovl_readlink,
+ .readlink = generic_readlink,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.update_time = ovl_update_time,
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7e3f0127fc1a..bcf3965be819 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -273,12 +273,11 @@ static bool ovl_is_opaquedir(struct dentry *dentry)
{
int res;
char val;
- struct inode *inode = dentry->d_inode;
- if (!S_ISDIR(inode->i_mode) || !(inode->i_opflags & IOP_XATTR))
+ if (!d_is_dir(dentry))
return false;
- res = __vfs_getxattr(dentry, inode, OVL_XATTR_OPAQUE, &val, 1);
+ res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
if (res == 1 && val == 'y')
return true;
@@ -419,16 +418,12 @@ static bool ovl_dentry_weird(struct dentry *dentry)
DCACHE_OP_COMPARE);
}
-static inline struct dentry *ovl_lookup_real(struct super_block *ovl_sb,
- struct dentry *dir,
+static inline struct dentry *ovl_lookup_real(struct dentry *dir,
const struct qstr *name)
{
- const struct cred *old_cred;
struct dentry *dentry;
- old_cred = ovl_override_creds(ovl_sb);
dentry = lookup_one_len_unlocked(name->name, dir, name->len);
- revert_creds(old_cred);
if (IS_ERR(dentry)) {
if (PTR_ERR(dentry) == -ENOENT)
@@ -469,6 +464,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct ovl_entry *oe;
+ const struct cred *old_cred;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
struct path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
@@ -479,9 +475,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int i;
int err;
+ old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_upperdentry_dereference(poe);
if (upperdir) {
- this = ovl_lookup_real(dentry->d_sb, upperdir, &dentry->d_name);
+ this = ovl_lookup_real(upperdir, &dentry->d_name);
err = PTR_ERR(this);
if (IS_ERR(this))
goto out;
@@ -514,8 +511,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
bool opaque = false;
struct path lowerpath = poe->lowerstack[i];
- this = ovl_lookup_real(dentry->d_sb,
- lowerpath.dentry, &dentry->d_name);
+ this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
/*
@@ -588,6 +584,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
ovl_copyattr(realdentry->d_inode, inode);
}
+ revert_creds(old_cred);
oe->opaque = upperopaque;
oe->__upperdentry = upperdentry;
memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
@@ -606,6 +603,7 @@ out_put:
out_put_upper:
dput(upperdentry);
out:
+ revert_creds(old_cred);
return ERR_PTR(err);
}
@@ -834,6 +832,19 @@ retry:
if (err)
goto out_dput;
+ /*
+ * Try to remove POSIX ACL xattrs from workdir. We are good if:
+ *
+ * a) success (there was a POSIX ACL xattr and was removed)
+ * b) -ENODATA (there was no POSIX ACL xattr)
+ * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
+ *
+ * There are various other error values that could effectively
+ * mean that the xattr doesn't exist (e.g. -ERANGE is returned
+ * if the xattr name is too long), but the set of filesystems
+ * allowed as upper are limited to "normal" ones, where checking
+ * for the above two errors is sufficient.
+ */
err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
@@ -1292,6 +1303,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!oe)
goto out_put_cred;
+ sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+ sb->s_op = &ovl_super_operations;
+ sb->s_xattr = ovl_xattr_handlers;
+ sb->s_fs_info = ufs;
+ sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
+
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR));
if (!root_dentry)
goto out_free_oe;
@@ -1315,12 +1332,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
ovl_copyattr(realinode, d_inode(root_dentry));
- sb->s_magic = OVERLAYFS_SUPER_MAGIC;
- sb->s_op = &ovl_super_operations;
- sb->s_xattr = ovl_xattr_handlers;
sb->s_root = root_dentry;
- sb->s_fs_info = ufs;
- sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
return 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 89600fd5963d..81818adb8e9e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -412,10 +412,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
mm = get_task_mm(task);
if (mm) {
vsize = task_vsize(mm);
- if (permitted) {
- eip = KSTK_EIP(task);
- esp = KSTK_ESP(task);
- }
+ /*
+ * esp and eip are intentionally zeroed out. There is no
+ * non-racy way to read them without freezing the task.
+ * Programs that need reliable values can use ptrace(2).
+ */
}
get_task_comm(tcomm, task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c2964d890c9a..ca651ac00660 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -832,6 +832,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
unsigned long addr = *ppos;
ssize_t copied;
char *page;
+ unsigned int flags;
if (!mm)
return 0;
@@ -844,6 +845,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+ /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+ flags = FOLL_FORCE;
+ if (write)
+ flags |= FOLL_WRITE;
+
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
@@ -852,7 +858,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
break;
}
- this_len = access_remote_vm(mm, addr, page, this_len, write);
+ this_len = access_remote_vm(mm, addr, page, this_len, flags);
if (!this_len) {
if (!copied)
copied = -EIO;
@@ -964,8 +970,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src),
- page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
if (retval <= 0) {
ret = retval;
@@ -1007,6 +1012,9 @@ static ssize_t auxv_read(struct file *file, char __user *buf,
{
struct mm_struct *mm = file->private_data;
unsigned int nwords = 0;
+
+ if (!mm)
+ return 0;
do {
nwords += 2;
} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6909582ce5e5..35b92d81692f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -266,24 +266,15 @@ static int do_maps_open(struct inode *inode, struct file *file,
* /proc/PID/maps that is the stack of the main task.
*/
static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, int is_pid)
+ struct vm_area_struct *vma)
{
- int stack = 0;
-
- if (is_pid) {
- stack = vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack;
- } else {
- struct inode *inode = priv->inode;
- struct task_struct *task;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task)
- stack = vma_is_stack_for_task(vma, task);
- rcu_read_unlock();
- }
- return stack;
+ /*
+ * We make no effort to guess what a given thread considers to be
+ * its "stack". It's not even well-defined for programs written
+ * languages like Go.
+ */
+ return vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack;
}
static void
@@ -354,7 +345,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- if (is_stack(priv, vma, is_pid))
+ if (is_stack(priv, vma))
name = "[stack]";
}
@@ -1669,7 +1660,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else if (is_stack(proc_priv, vma, is_pid)) {
+ } else if (is_stack(proc_priv, vma)) {
seq_puts(m, " stack");
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index faacb0c0d857..37175621e890 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm,
}
static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, int is_pid)
+ struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
- int stack = 0;
-
- if (is_pid) {
- stack = vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack;
- } else {
- struct inode *inode = priv->inode;
- struct task_struct *task;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task)
- stack = vma_is_stack_for_task(vma, task);
- rcu_read_unlock();
- }
- return stack;
+
+ /*
+ * We make no effort to guess what a given thread considers to be
+ * its "stack". It's not even well-defined for programs written
+ * languages like Go.
+ */
+ return vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack;
}
/*
@@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm && is_stack(priv, vma, is_pid)) {
+ } else if (mm && is_stack(priv, vma)) {
seq_pad(m, ' ');
seq_printf(m, "[stack]");
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 66215a7b17cf..190e0d362581 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -730,6 +730,35 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ * into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ * either @fast_pointer, a newly allocated kernel array, or NULL,
+ * depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ * or a negative error code on error.
+ */
ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
diff --git a/fs/super.c b/fs/super.c
index c2ff475c1711..c183835566c1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1269,25 +1269,34 @@ EXPORT_SYMBOL(__sb_start_write);
static void sb_wait_write(struct super_block *sb, int level)
{
percpu_down_write(sb->s_writers.rw_sem + level-1);
- /*
- * We are going to return to userspace and forget about this lock, the
- * ownership goes to the caller of thaw_super() which does unlock.
- *
- * FIXME: we should do this before return from freeze_super() after we
- * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
- * should re-acquire these locks before s_op->unfreeze_fs(sb). However
- * this leads to lockdep false-positives, so currently we do the early
- * release right after acquire.
- */
- percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
}
-static void sb_freeze_unlock(struct super_block *sb)
+/*
+ * We are going to return to userspace and forget about these locks, the
+ * ownership goes to the caller of thaw_super() which does unlock().
+ */
+static void lockdep_sb_freeze_release(struct super_block *sb)
+{
+ int level;
+
+ for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+ percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+/*
+ * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
+ */
+static void lockdep_sb_freeze_acquire(struct super_block *sb)
{
int level;
for (level = 0; level < SB_FREEZE_LEVELS; ++level)
percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+static void sb_freeze_unlock(struct super_block *sb)
+{
+ int level;
for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
percpu_up_write(sb->s_writers.rw_sem + level);
@@ -1379,10 +1388,11 @@ int freeze_super(struct super_block *sb)
}
}
/*
- * This is just for debugging purposes so that fs can warn if it
- * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+ * For debugging purposes so that fs can warn if it sees write activity
+ * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
*/
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+ lockdep_sb_freeze_release(sb);
up_write(&sb->s_umount);
return 0;
}
@@ -1399,7 +1409,7 @@ int thaw_super(struct super_block *sb)
int error;
down_write(&sb->s_umount);
- if (sb->s_writers.frozen == SB_UNFROZEN) {
+ if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
up_write(&sb->s_umount);
return -EINVAL;
}
@@ -1409,11 +1419,14 @@ int thaw_super(struct super_block *sb)
goto out;
}
+ lockdep_sb_freeze_acquire(sb);
+
if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb);
if (error) {
printk(KERN_ERR
"VFS:Filesystem thaw failed\n");
+ lockdep_sb_freeze_release(sb);
up_write(&sb->s_umount);
return error;
}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 94374e435025..2b67bda2021b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -21,14 +21,14 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock);
void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
{
- char *buf, *path = NULL;
+ char *buf;
buf = kzalloc(PATH_MAX, GFP_KERNEL);
if (buf)
- path = kernfs_path(parent, buf, PATH_MAX);
+ kernfs_path(parent, buf, PATH_MAX);
WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n",
- path, name);
+ buf, name);
kfree(buf);
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index c8f60df2733e..ca16c5d7bab1 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -439,7 +439,7 @@ static unsigned int vfs_dent_type(uint8_t type)
*/
static int ubifs_readdir(struct file *file, struct dir_context *ctx)
{
- int err;
+ int err = 0;
struct qstr nm;
union ubifs_key key;
struct ubifs_dent_node *dent;
@@ -541,14 +541,20 @@ out:
kfree(file->private_data);
file->private_data = NULL;
- if (err != -ENOENT) {
+ if (err != -ENOENT)
ubifs_err(c, "cannot find next direntry, error %d", err);
- return err;
- }
+ else
+ /*
+ * -ENOENT is a non-fatal error in this context, the TNC uses
+ * it to indicate that the cursor moved past the current directory
+ * and readdir() has to stop.
+ */
+ err = 0;
+
/* 2 is a special value indicating that there are no more direntries */
ctx->pos = 2;
- return 0;
+ return err;
}
/* Free saved readdir() state when the directory is closed */
@@ -1060,9 +1066,9 @@ static void unlock_4_inodes(struct inode *inode1, struct inode *inode2,
mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
}
-static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
struct ubifs_info *c = old_dir->i_sb->s_fs_info;
struct inode *old_inode = d_inode(old_dentry);
@@ -1323,7 +1329,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
-static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -1336,7 +1342,7 @@ static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry,
if (flags & RENAME_EXCHANGE)
return ubifs_xrename(old_dir, old_dentry, new_dir, new_dentry);
- return ubifs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+ return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -1387,7 +1393,7 @@ const struct inode_operations ubifs_dir_inode_operations = {
.mkdir = ubifs_mkdir,
.rmdir = ubifs_rmdir,
.mknod = ubifs_mknod,
- .rename = ubifs_rename2,
+ .rename = ubifs_rename,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
.listxattr = ubifs_listxattr,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 6c2f4d41ed73..d9f9615bfd71 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -172,6 +172,7 @@ out_cancel:
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
+ host_ui->xattr_names -= nm->len;
mutex_unlock(&host_ui->ui_mutex);
out_free:
make_bad_inode(inode);
@@ -478,6 +479,7 @@ out_cancel:
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
+ host_ui->xattr_names += nm->len;
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
make_bad_inode(inode);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c27344cf38e1..c6eb21940783 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc(
* allocating, so skip that check by pretending to be freeing.
*/
error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
- if (error)
- goto error0;
-error0:
xfs_perag_put(args.pag);
if (error)
trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
@@ -3999,6 +3996,39 @@ xfs_bmap_alloc(
return xfs_bmap_btalloc(ap);
}
+/* Trim extent to fit a logical block range. */
+void
+xfs_trim_extent(
+ struct xfs_bmbt_irec *irec,
+ xfs_fileoff_t bno,
+ xfs_filblks_t len)
+{
+ xfs_fileoff_t distance;
+ xfs_fileoff_t end = bno + len;
+
+ if (irec->br_startoff + irec->br_blockcount <= bno ||
+ irec->br_startoff >= end) {
+ irec->br_blockcount = 0;
+ return;
+ }
+
+ if (irec->br_startoff < bno) {
+ distance = bno - irec->br_startoff;
+ if (isnullstartblock(irec->br_startblock))
+ irec->br_startblock = DELAYSTARTBLOCK;
+ if (irec->br_startblock != DELAYSTARTBLOCK &&
+ irec->br_startblock != HOLESTARTBLOCK)
+ irec->br_startblock += distance;
+ irec->br_startoff += distance;
+ irec->br_blockcount -= distance;
+ }
+
+ if (end < irec->br_startoff + irec->br_blockcount) {
+ distance = irec->br_startoff + irec->br_blockcount - end;
+ irec->br_blockcount -= distance;
+ }
+}
+
/*
* Trim the returned map to the required bounds
*/
@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen(
return stolen;
}
+int
+xfs_bmap_del_extent_delay(
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_extnum_t *idx,
+ struct xfs_bmbt_irec *got,
+ struct xfs_bmbt_irec *del)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_bmbt_irec new;
+ int64_t da_old, da_new, da_diff = 0;
+ xfs_fileoff_t del_endoff, got_endoff;
+ xfs_filblks_t got_indlen, new_indlen, stolen;
+ int error = 0, state = 0;
+ bool isrt;
+
+ XFS_STATS_INC(mp, xs_del_exlist);
+
+ isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ del_endoff = del->br_startoff + del->br_blockcount;
+ got_endoff = got->br_startoff + got->br_blockcount;
+ da_old = startblockval(got->br_startblock);
+ da_new = 0;
+
+ ASSERT(*idx >= 0);
+ ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(del->br_blockcount > 0);
+ ASSERT(got->br_startoff <= del->br_startoff);
+ ASSERT(got_endoff >= del_endoff);
+
+ if (isrt) {
+ int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+
+ do_div(rtexts, mp->m_sb.sb_rextsize);
+ xfs_mod_frextents(mp, rtexts);
+ }
+
+ /*
+ * Update the inode delalloc counter now and wait to update the
+ * sb counters as we might have to borrow some blocks for the
+ * indirect block accounting.
+ */
+ xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
+ isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ ip->i_delayed_blks -= del->br_blockcount;
+
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
+
+ if (got->br_startoff == del->br_startoff)
+ state |= BMAP_LEFT_CONTIG;
+ if (got_endoff == del_endoff)
+ state |= BMAP_RIGHT_CONTIG;
+
+ switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+ case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+ /*
+ * Matches the whole extent. Delete the entry.
+ */
+ xfs_iext_remove(ip, *idx, 1, state);
+ --*idx;
+ break;
+ case BMAP_LEFT_CONTIG:
+ /*
+ * Deleting the first part of the extent.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ got->br_startoff = del_endoff;
+ got->br_blockcount -= del->br_blockcount;
+ da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+ got->br_blockcount), da_old);
+ got->br_startblock = nullstartblock((int)da_new);
+ xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ break;
+ case BMAP_RIGHT_CONTIG:
+ /*
+ * Deleting the last part of the extent.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ got->br_blockcount = got->br_blockcount - del->br_blockcount;
+ da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+ got->br_blockcount), da_old);
+ got->br_startblock = nullstartblock((int)da_new);
+ xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ break;
+ case 0:
+ /*
+ * Deleting the middle of the extent.
+ *
+ * Distribute the original indlen reservation across the two new
+ * extents. Steal blocks from the deleted extent if necessary.
+ * Stealing blocks simply fudges the fdblocks accounting below.
+ * Warn if either of the new indlen reservations is zero as this
+ * can lead to delalloc problems.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+
+ got->br_blockcount = del->br_startoff - got->br_startoff;
+ got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
+
+ new.br_blockcount = got_endoff - del_endoff;
+ new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+
+ WARN_ON_ONCE(!got_indlen || !new_indlen);
+ stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
+ del->br_blockcount);
+
+ got->br_startblock = nullstartblock((int)got_indlen);
+ xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+ trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
+
+ new.br_startoff = del_endoff;
+ new.br_state = got->br_state;
+ new.br_startblock = nullstartblock((int)new_indlen);
+
+ ++*idx;
+ xfs_iext_insert(ip, *idx, 1, &new, state);
+
+ da_new = got_indlen + new_indlen - stolen;
+ del->br_blockcount -= stolen;
+ break;
+ }
+
+ ASSERT(da_old >= da_new);
+ da_diff = da_old - da_new;
+ if (!isrt)
+ da_diff += del->br_blockcount;
+ if (da_diff)
+ xfs_mod_fdblocks(mp, da_diff, false);
+ return error;
+}
+
+void
+xfs_bmap_del_extent_cow(
+ struct xfs_inode *ip,
+ xfs_extnum_t *idx,
+ struct xfs_bmbt_irec *got,
+ struct xfs_bmbt_irec *del)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec new;
+ xfs_fileoff_t del_endoff, got_endoff;
+ int state = BMAP_COWFORK;
+
+ XFS_STATS_INC(mp, xs_del_exlist);
+
+ del_endoff = del->br_startoff + del->br_blockcount;
+ got_endoff = got->br_startoff + got->br_blockcount;
+
+ ASSERT(*idx >= 0);
+ ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(del->br_blockcount > 0);
+ ASSERT(got->br_startoff <= del->br_startoff);
+ ASSERT(got_endoff >= del_endoff);
+ ASSERT(!isnullstartblock(got->br_startblock));
+
+ if (got->br_startoff == del->br_startoff)
+ state |= BMAP_LEFT_CONTIG;
+ if (got_endoff == del_endoff)
+ state |= BMAP_RIGHT_CONTIG;
+
+ switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+ case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+ /*
+ * Matches the whole extent. Delete the entry.
+ */
+ xfs_iext_remove(ip, *idx, 1, state);
+ --*idx;
+ break;
+ case BMAP_LEFT_CONTIG:
+ /*
+ * Deleting the first part of the extent.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ got->br_startoff = del_endoff;
+ got->br_blockcount -= del->br_blockcount;
+ got->br_startblock = del->br_startblock + del->br_blockcount;
+ xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ break;
+ case BMAP_RIGHT_CONTIG:
+ /*
+ * Deleting the last part of the extent.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ got->br_blockcount -= del->br_blockcount;
+ xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ break;
+ case 0:
+ /*
+ * Deleting the middle of the extent.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ got->br_blockcount = del->br_startoff - got->br_startoff;
+ xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+ new.br_startoff = del_endoff;
+ new.br_blockcount = got_endoff - del_endoff;
+ new.br_state = got->br_state;
+ new.br_startblock = del->br_startblock + del->br_blockcount;
+
+ ++*idx;
+ xfs_iext_insert(ip, *idx, 1, &new, state);
+ break;
+ }
+}
+
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space (or undoing a delayed allocation).
@@ -5171,175 +5414,6 @@ done:
return error;
}
-/* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */
-int
-xfs_bunmapi_cow(
- struct xfs_inode *ip,
- struct xfs_bmbt_irec *del)
-{
- xfs_filblks_t da_new;
- xfs_filblks_t da_old;
- xfs_fsblock_t del_endblock = 0;
- xfs_fileoff_t del_endoff;
- int delay;
- struct xfs_bmbt_rec_host *ep;
- int error;
- struct xfs_bmbt_irec got;
- xfs_fileoff_t got_endoff;
- struct xfs_ifork *ifp;
- struct xfs_mount *mp;
- xfs_filblks_t nblks;
- struct xfs_bmbt_irec new;
- /* REFERENCED */
- uint qfield;
- xfs_filblks_t temp;
- xfs_filblks_t temp2;
- int state = BMAP_COWFORK;
- int eof;
- xfs_extnum_t eidx;
-
- mp = ip->i_mount;
- XFS_STATS_INC(mp, xs_del_exlist);
-
- ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
- &eidx, &got, &new);
-
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
- ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
- (uint)sizeof(xfs_bmbt_rec_t)));
- ASSERT(del->br_blockcount > 0);
- ASSERT(got.br_startoff <= del->br_startoff);
- del_endoff = del->br_startoff + del->br_blockcount;
- got_endoff = got.br_startoff + got.br_blockcount;
- ASSERT(got_endoff >= del_endoff);
- delay = isnullstartblock(got.br_startblock);
- ASSERT(isnullstartblock(del->br_startblock) == delay);
- qfield = 0;
- error = 0;
- /*
- * If deleting a real allocation, must free up the disk space.
- */
- if (!delay) {
- nblks = del->br_blockcount;
- qfield = XFS_TRANS_DQ_BCOUNT;
- /*
- * Set up del_endblock and cur for later.
- */
- del_endblock = del->br_startblock + del->br_blockcount;
- da_old = da_new = 0;
- } else {
- da_old = startblockval(got.br_startblock);
- da_new = 0;
- nblks = 0;
- }
- qfield = qfield;
- nblks = nblks;
-
- /*
- * Set flag value to use in switch statement.
- * Left-contig is 2, right-contig is 1.
- */
- switch (((got.br_startoff == del->br_startoff) << 1) |
- (got_endoff == del_endoff)) {
- case 3:
- /*
- * Matches the whole extent. Delete the entry.
- */
- xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
- --eidx;
- break;
-
- case 2:
- /*
- * Deleting the first part of the extent.
- */
- trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
- xfs_bmbt_set_startoff(ep, del_endoff);
- temp = got.br_blockcount - del->br_blockcount;
- xfs_bmbt_set_blockcount(ep, temp);
- if (delay) {
- temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
- da_old);
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
- da_new = temp;
- break;
- }
- xfs_bmbt_set_startblock(ep, del_endblock);
- trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
- break;
-
- case 1:
- /*
- * Deleting the last part of the extent.
- */
- temp = got.br_blockcount - del->br_blockcount;
- trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(ep, temp);
- if (delay) {
- temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
- da_old);
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
- da_new = temp;
- break;
- }
- trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
- break;
-
- case 0:
- /*
- * Deleting the middle of the extent.
- */
- temp = del->br_startoff - got.br_startoff;
- trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(ep, temp);
- new.br_startoff = del_endoff;
- temp2 = got_endoff - del_endoff;
- new.br_blockcount = temp2;
- new.br_state = got.br_state;
- if (!delay) {
- new.br_startblock = del_endblock;
- } else {
- temp = xfs_bmap_worst_indlen(ip, temp);
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- temp2 = xfs_bmap_worst_indlen(ip, temp2);
- new.br_startblock = nullstartblock((int)temp2);
- da_new = temp + temp2;
- while (da_new > da_old) {
- if (temp) {
- temp--;
- da_new--;
- xfs_bmbt_set_startblock(ep,
- nullstartblock((int)temp));
- }
- if (da_new == da_old)
- break;
- if (temp2) {
- temp2--;
- da_new--;
- new.br_startblock =
- nullstartblock((int)temp2);
- }
- }
- }
- trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
- xfs_iext_insert(ip, eidx + 1, 1, &new, state);
- ++eidx;
- break;
- }
-
- /*
- * Account for change in delayed indirect blocks.
- * Nothing to do for disk quota accounting here.
- */
- ASSERT(da_old >= da_new);
- if (da_old > da_new)
- xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
-
- return error;
-}
-
/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f97db7132564..7cae6ec27fa6 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -190,6 +190,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
#endif
+void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+ xfs_filblks_t len);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
@@ -221,7 +223,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
struct xfs_defer_ops *dfops, int *done);
-int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del);
+int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+ xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
+ struct xfs_bmbt_irec *del);
+void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
+ struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5c8e6f2ce44f..0e80993c8a59 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
return rval;
}
-int
+static int
xfs_btree_count_blocks_helper(
struct xfs_btree_cur *cur,
int level,
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 3cc3cf767474..ac9a003dd29a 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
- ndquots = xfs_calc_dquots_per_chunk(
- XFS_BB_TO_FSB(mp, bp->b_length));
+ ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
for (i = 0; i < ndquots; i++, d++) {
if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index f6547fc5e016..6b7579e7b60a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
* padding field for v3 inodes.
*/
#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
-#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
typedef struct xfs_dinode {
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__be16 di_mode; /* mode and type of file */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 8de9a3a29589..134424fac434 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -57,6 +57,17 @@ xfs_inobp_check(
}
#endif
+bool
+xfs_dinode_good_version(
+ struct xfs_mount *mp,
+ __u8 version)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return version == 3;
+
+ return version == 1 || version == 2;
+}
+
/*
* If we are doing readahead on an inode buffer, we might be in log recovery
* reading an inode allocation buffer that hasn't yet been replayed, and hence
@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
- XFS_DINODE_GOOD_VERSION(dip->di_version);
+ xfs_dinode_good_version(mp, dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
XFS_RANDOM_ITOBP_INOTOBP))) {
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 62d9d4681c8c..3cfe12a4f58a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -74,6 +74,8 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
void xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
struct xfs_dinode *to);
+bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
+
#if defined(DEBUG)
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#else
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a314fc7b56fa..6e4f7f900fea 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
struct xfs_inode *ip = XFS_I(inode);
loff_t isize = i_size_read(inode);
size_t count = iov_iter_count(to);
+ loff_t end = iocb->ki_pos + count - 1;
struct iov_iter data;
struct xfs_buftarg *target;
ssize_t ret = 0;
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
file_accessed(iocb->ki_filp);
- /*
- * Locking is a bit tricky here. If we take an exclusive lock for direct
- * IO, we effectively serialise all new concurrent read IO to this file
- * and block it behind IO that is currently in progress because IO in
- * progress holds the IO lock shared. We only need to hold the lock
- * exclusive to blow away the page cache, so only take lock exclusively
- * if the page cache needs invalidation. This allows the normal direct
- * IO case of no page cache pages to proceeed concurrently without
- * serialisation.
- */
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
if (mapping->nrpages) {
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
- xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+ if (ret)
+ goto out_unlock;
/*
- * The generic dio code only flushes the range of the particular
- * I/O. Because we take an exclusive lock here, this whole
- * sequence is considerably more expensive for us. This has a
- * noticeable performance impact for any file with cached pages,
- * even when outside of the range of the particular I/O.
- *
- * Hence, amortize the cost of the lock against a full file
- * flush and reduce the chances of repeated iolock cycles going
- * forward.
+ * Invalidate whole pages. This can return an error if we fail
+ * to invalidate a page, but this should never happen on XFS.
+ * Warn if it does fail.
*/
- if (mapping->nrpages) {
- ret = filemap_write_and_wait(mapping);
- if (ret) {
- xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
- }
-
- /*
- * Invalidate whole pages. This can return an error if
- * we fail to invalidate a page, but this should never
- * happen on XFS. Warn if it does fail.
- */
- ret = invalidate_inode_pages2(mapping);
- WARN_ON_ONCE(ret);
- ret = 0;
- }
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ ret = invalidate_inode_pages2_range(mapping,
+ iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
data = *to;
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
}
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+out_unlock:
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
return -EINVAL;
- /* "unaligned" here means not aligned to a filesystem block */
- if ((iocb->ki_pos & mp->m_blockmask) ||
- ((iocb->ki_pos + count) & mp->m_blockmask))
- unaligned_io = 1;
-
/*
- * We don't need to take an exclusive lock unless there page cache needs
- * to be invalidated or unaligned IO is being executed. We don't need to
- * consider the EOF extension case here because
- * xfs_file_aio_write_checks() will relock the inode as necessary for
- * EOF zeroing cases and fill out the new inode size as appropriate.
+ * Don't take the exclusive iolock here unless the I/O is unaligned to
+ * the file system block size. We don't need to consider the EOF
+ * extension case here because xfs_file_aio_write_checks() will relock
+ * the inode as necessary for EOF zeroing cases and fill out the new
+ * inode size as appropriate.
*/
- if (unaligned_io || mapping->nrpages)
+ if ((iocb->ki_pos & mp->m_blockmask) ||
+ ((iocb->ki_pos + count) & mp->m_blockmask)) {
+ unaligned_io = 1;
iolock = XFS_IOLOCK_EXCL;
- else
+ } else {
iolock = XFS_IOLOCK_SHARED;
- xfs_rw_ilock(ip, iolock);
-
- /*
- * Recheck if there are cached pages that need invalidate after we got
- * the iolock to protect against other threads adding new pages while
- * we were waiting for the iolock.
- */
- if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, iolock);
- iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, iolock);
}
+ xfs_rw_ilock(ip, iolock);
+
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
count = iov_iter_count(from);
end = iocb->ki_pos + count - 1;
- /*
- * See xfs_file_dio_aio_read() for why we do a full-file flush here.
- */
if (mapping->nrpages) {
- ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out;
+
/*
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
- ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+ ret = invalidate_inode_pages2_range(mapping,
+ iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
/*
* If we are doing unaligned IO, wait for all other IO to drain,
- * otherwise demote the lock if we had to flush cached pages
+ * otherwise demote the lock if we had to take the exclusive lock
+ * for other reasons in xfs_file_aio_write_checks.
*/
if (unaligned_io)
inode_dio_wait(inode);
@@ -947,134 +909,6 @@ out_unlock:
return error;
}
-/*
- * Flush all file writes out to disk.
- */
-static int
-xfs_file_wait_for_io(
- struct inode *inode,
- loff_t offset,
- size_t len)
-{
- loff_t rounding;
- loff_t ioffset;
- loff_t iendoffset;
- loff_t bs;
- int ret;
-
- bs = inode->i_sb->s_blocksize;
- inode_dio_wait(inode);
-
- rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
- ioffset = round_down(offset, rounding);
- iendoffset = round_up(offset + len, rounding) - 1;
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- iendoffset);
- return ret;
-}
-
-/* Hook up to the VFS reflink function */
-STATIC int
-xfs_file_share_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len,
- bool is_dedupe)
-{
- struct inode *inode_in;
- struct inode *inode_out;
- ssize_t ret;
- loff_t bs;
- loff_t isize;
- int same_inode;
- loff_t blen;
- unsigned int flags = 0;
-
- inode_in = file_inode(file_in);
- inode_out = file_inode(file_out);
- bs = inode_out->i_sb->s_blocksize;
-
- /* Don't touch certain kinds of inodes */
- if (IS_IMMUTABLE(inode_out))
- return -EPERM;
- if (IS_SWAPFILE(inode_in) ||
- IS_SWAPFILE(inode_out))
- return -ETXTBSY;
-
- /* Reflink only works within this filesystem. */
- if (inode_in->i_sb != inode_out->i_sb)
- return -EXDEV;
- same_inode = (inode_in->i_ino == inode_out->i_ino);
-
- /* Don't reflink dirs, pipes, sockets... */
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
- return -EINVAL;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
- /* Don't share DAX file data for now. */
- if (IS_DAX(inode_in) || IS_DAX(inode_out))
- return -EINVAL;
-
- /* Are we going all the way to the end? */
- isize = i_size_read(inode_in);
- if (isize == 0)
- return 0;
- if (len == 0)
- len = isize - pos_in;
-
- /* Ensure offsets don't wrap and the input is inside i_size */
- if (pos_in + len < pos_in || pos_out + len < pos_out ||
- pos_in + len > isize)
- return -EINVAL;
-
- /* Don't allow dedupe past EOF in the dest file */
- if (is_dedupe) {
- loff_t disize;
-
- disize = i_size_read(inode_out);
- if (pos_out >= disize || pos_out + len > disize)
- return -EINVAL;
- }
-
- /* If we're linking to EOF, continue to the block boundary. */
- if (pos_in + len == isize)
- blen = ALIGN(isize, bs) - pos_in;
- else
- blen = len;
-
- /* Only reflink if we're aligned to block boundaries */
- if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
- !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
- return -EINVAL;
-
- /* Don't allow overlapped reflink within the same file */
- if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
- return -EINVAL;
-
- /* Wait for the completion of any pending IOs on srcfile */
- ret = xfs_file_wait_for_io(inode_in, pos_in, len);
- if (ret)
- goto out;
- ret = xfs_file_wait_for_io(inode_out, pos_out, len);
- if (ret)
- goto out;
-
- if (is_dedupe)
- flags |= XFS_REFLINK_DEDUPE;
- ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
- pos_out, len, flags);
- if (ret < 0)
- goto out;
-
-out:
- return ret;
-}
-
STATIC ssize_t
xfs_file_copy_range(
struct file *file_in,
@@ -1086,7 +920,7 @@ xfs_file_copy_range(
{
int error;
- error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+ error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, false);
if (error)
return error;
@@ -1101,7 +935,7 @@ xfs_file_clone_range(
loff_t pos_out,
u64 len)
{
- return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+ return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, false);
}
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
if (len > XFS_MAX_DEDUPE_LEN)
len = XFS_MAX_DEDUPE_LEN;
- error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
+ error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
return error;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 14796b744e0a..f295049db681 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1656,9 +1656,9 @@ void
xfs_inode_set_cowblocks_tag(
xfs_inode_t *ip)
{
- trace_xfs_inode_set_eofblocks_tag(ip);
+ trace_xfs_inode_set_cowblocks_tag(ip);
return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
- trace_xfs_perag_set_eofblocks,
+ trace_xfs_perag_set_cowblocks,
XFS_ICI_COWBLOCKS_TAG);
}
@@ -1666,7 +1666,7 @@ void
xfs_inode_clear_cowblocks_tag(
xfs_inode_t *ip)
{
- trace_xfs_inode_clear_eofblocks_tag(ip);
+ trace_xfs_inode_clear_cowblocks_tag(ip);
return __xfs_inode_clear_eofblocks_tag(ip,
- trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG);
+ trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d907eb9f8ef3..436e109bb01e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
&got, &prev);
if (!eof && got.br_startoff <= offset_fsb) {
+ if (xfs_is_reflink_inode(ip)) {
+ bool shared;
+
+ end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
+ maxbytes_fsb);
+ xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
+ error = xfs_reflink_reserve_cow(ip, &got, &shared);
+ if (error)
+ goto out_unlock;
+ }
+
trace_xfs_iomap_found(ip, offset, count, 0, &got);
goto done;
}
@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb;
- bool shared, trimmed;
int nimaps = 1, error = 0;
+ bool shared = false, trimmed = false;
unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_reserve_cow_range(ip, offset, length);
- if (error < 0)
- return error;
- }
-
if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
!xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
iomap);
}
- lockmode = xfs_ilock_data_map_shared(ip);
+ /*
+ * COW writes will allocate delalloc space, so we need to make sure
+ * to take the lock exclusively here.
+ */
+ if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+ lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ } else {
+ lockmode = xfs_ilock_data_map_shared(ip);
+ }
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,16 +1005,24 @@ xfs_file_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
- if (error) {
- xfs_iunlock(ip, lockmode);
- return error;
+ if (error)
+ goto out_unlock;
+
+ if (flags & IOMAP_REPORT) {
+ /* Trim the mapping to the nearest shared extent boundary. */
+ error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+ &trimmed);
+ if (error)
+ goto out_unlock;
}
- /* Trim the mapping to the nearest shared extent boundary. */
- error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
- if (error) {
- xfs_iunlock(ip, lockmode);
- return error;
+ if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+ error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+ if (error)
+ goto out_unlock;
+
+ end_fsb = imap.br_startoff + imap.br_blockcount;
+ length = XFS_FSB_TO_B(mp, end_fsb) - offset;
}
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin(
if (shared)
iomap->flags |= IOMAP_F_SHARED;
return 0;
+out_unlock:
+ xfs_iunlock(ip, lockmode);
+ return error;
}
static int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fc7873942bea..b341f10cf481 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1009,6 +1009,7 @@ xfs_mountfs(
out_quota:
xfs_qm_unmount_quotas(mp);
out_rtunmount:
+ mp->m_super->s_flags &= ~MS_ACTIVE;
xfs_rtunmount_inodes(mp);
out_rele_rip:
IRELE(rip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 5965e9455d91..a279b4e7f5fe 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
if (!xfs_is_reflink_inode(ip) ||
ISUNWRITTEN(irec) ||
irec->br_startblock == HOLESTARTBLOCK ||
- irec->br_startblock == DELAYSTARTBLOCK) {
+ irec->br_startblock == DELAYSTARTBLOCK ||
+ isnullstartblock(irec->br_startblock)) {
*shared = false;
return 0;
}
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
}
}
-/* Create a CoW reservation for a range of blocks within a file. */
-static int
-__xfs_reflink_reserve_cow(
+/*
+ * Trim the passed in imap to the next shared/unshared extent boundary, and
+ * if imap->br_startoff points to a shared extent reserve space for it in the
+ * COW fork. In this case *shared is set to true, else to false.
+ *
+ * Note that imap will always contain the block numbers for the existing blocks
+ * in the data fork, as the upper layers need them for read-modify-write
+ * operations.
+ */
+int
+xfs_reflink_reserve_cow(
struct xfs_inode *ip,
- xfs_fileoff_t *offset_fsb,
- xfs_fileoff_t end_fsb,
- bool *skipped)
+ struct xfs_bmbt_irec *imap,
+ bool *shared)
{
- struct xfs_bmbt_irec got, prev, imap;
- xfs_fileoff_t orig_end_fsb;
- int nimaps, eof = 0, error = 0;
- bool shared = false, trimmed = false;
+ struct xfs_bmbt_irec got, prev;
+ xfs_fileoff_t end_fsb, orig_end_fsb;
+ int eof = 0, error = 0;
+ bool trimmed;
xfs_extnum_t idx;
xfs_extlen_t align;
- /* Already reserved? Skip the refcount btree access. */
- xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
+ /*
+ * Search the COW fork extent list first. This serves two purposes:
+ * first this implement the speculative preallocation using cowextisze,
+ * so that we also unshared block adjacent to shared blocks instead
+ * of just the shared blocks themselves. Second the lookup in the
+ * extent list is generally faster than going out to the shared extent
+ * tree.
+ */
+ xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
&got, &prev);
- if (!eof && got.br_startoff <= *offset_fsb) {
- end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
- trace_xfs_reflink_cow_found(ip, &got);
- goto done;
- }
+ if (!eof && got.br_startoff <= imap->br_startoff) {
+ trace_xfs_reflink_cow_found(ip, imap);
+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
- /* Read extent from the source file. */
- nimaps = 1;
- error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
- &imap, &nimaps, 0);
- if (error)
- goto out_unlock;
- ASSERT(nimaps == 1);
+ *shared = true;
+ return 0;
+ }
/* Trim the mapping to the nearest shared extent boundary. */
- error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
+ error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
if (error)
- goto out_unlock;
-
- end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
+ return error;
/* Not shared? Just report the (potentially capped) extent. */
- if (!shared) {
- *skipped = true;
- goto done;
- }
+ if (!*shared)
+ return 0;
/*
* Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
*/
error = xfs_qm_dqattach_locked(ip, 0);
if (error)
- goto out_unlock;
+ return error;
+
+ end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
if (align)
end_fsb = roundup_64(end_fsb, align);
retry:
- error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
- end_fsb - *offset_fsb, &got,
- &prev, &idx, eof);
+ error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+ end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
switch (error) {
case 0:
break;
case -ENOSPC:
case -EDQUOT:
/* retry without any preallocation */
- trace_xfs_reflink_cow_enospc(ip, &imap);
+ trace_xfs_reflink_cow_enospc(ip, imap);
if (end_fsb != orig_end_fsb) {
end_fsb = orig_end_fsb;
goto retry;
}
/*FALLTHRU*/
default:
- goto out_unlock;
+ return error;
}
if (end_fsb != orig_end_fsb)
xfs_inode_set_cowblocks_tag(ip);
trace_xfs_reflink_cow_alloc(ip, &got);
-done:
- *offset_fsb = end_fsb;
-out_unlock:
- return error;
-}
-
-/* Create a CoW reservation for part of a file. */
-int
-xfs_reflink_reserve_cow_range(
- struct xfs_inode *ip,
- xfs_off_t offset,
- xfs_off_t count)
-{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb, end_fsb;
- bool skipped = false;
- int error;
-
- trace_xfs_reflink_reserve_cow_range(ip, offset, count);
-
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- end_fsb = XFS_B_TO_FSB(mp, offset + count);
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- while (offset_fsb < end_fsb) {
- error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
- &skipped);
- if (error) {
- trace_xfs_reflink_reserve_cow_range_error(ip, error,
- _RET_IP_);
- break;
- }
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- return error;
+ return 0;
}
/* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
struct xfs_defer_ops dfops;
struct xfs_trans *tp;
xfs_fsblock_t first_block;
- xfs_fileoff_t next_fsb;
int nimaps = 1, error;
- bool skipped = false;
+ bool shared;
xfs_defer_init(&dfops, &first_block);
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL);
- next_fsb = *offset_fsb;
- error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
+ /* Read extent from the source file. */
+ nimaps = 1;
+ error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+ &imap, &nimaps, 0);
+ if (error)
+ goto out_unlock;
+ ASSERT(nimaps == 1);
+
+ error = xfs_reflink_reserve_cow(ip, &imap, &shared);
if (error)
goto out_trans_cancel;
- if (skipped) {
- *offset_fsb = next_fsb;
+ if (!shared) {
+ *offset_fsb = imap.br_startoff + imap.br_blockcount;
goto out_trans_cancel;
}
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
+ error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
XFS_BMAPI_COWFORK, &first_block,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
&imap, &nimaps, &dfops);
if (error)
goto out_trans_cancel;
- /* We might not have been able to map the whole delalloc extent */
- *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
-
error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
goto out_trans_cancel;
error = xfs_trans_commit(tp);
+ *offset_fsb = imap.br_startoff + imap.br_blockcount;
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
xfs_fileoff_t offset_fsb,
xfs_fileoff_t end_fsb)
{
- struct xfs_bmbt_irec irec;
- xfs_filblks_t count_fsb;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got, prev, del;
+ xfs_extnum_t idx;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
- int error = 0;
- int nimaps;
+ int error = 0, eof = 0;
if (!xfs_is_reflink_inode(ip))
return 0;
- /* Go find the old extent in the CoW fork. */
- while (offset_fsb < end_fsb) {
- nimaps = 1;
- count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
- error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
- &nimaps, XFS_BMAPI_COWFORK);
- if (error)
- break;
- ASSERT(nimaps == 1);
-
- trace_xfs_reflink_cancel_cow(ip, &irec);
+ xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
+ &got, &prev);
+ if (eof)
+ return 0;
- if (irec.br_startblock == DELAYSTARTBLOCK) {
- /* Free a delayed allocation. */
- xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
- false);
- ip->i_delayed_blks -= irec.br_blockcount;
+ while (got.br_startoff < end_fsb) {
+ del = got;
+ xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+ trace_xfs_reflink_cancel_cow(ip, &del);
- /* Remove the mapping from the CoW fork. */
- error = xfs_bunmapi_cow(ip, &irec);
+ if (isnullstartblock(del.br_startblock)) {
+ error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
+ &idx, &got, &del);
if (error)
break;
- } else if (irec.br_startblock == HOLESTARTBLOCK) {
- /* empty */
} else {
xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb);
/* Free the CoW orphan record. */
error = xfs_refcount_free_cow_extent(ip->i_mount,
- &dfops, irec.br_startblock,
- irec.br_blockcount);
+ &dfops, del.br_startblock,
+ del.br_blockcount);
if (error)
break;
xfs_bmap_add_free(ip->i_mount, &dfops,
- irec.br_startblock, irec.br_blockcount,
+ del.br_startblock, del.br_blockcount,
NULL);
/* Update quota accounting */
xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
- -(long)irec.br_blockcount);
+ -(long)del.br_blockcount);
/* Roll the transaction */
error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
}
/* Remove the mapping from the CoW fork. */
- error = xfs_bunmapi_cow(ip, &irec);
- if (error)
- break;
+ xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
}
- /* Roll on... */
- offset_fsb = irec.br_startoff + irec.br_blockcount;
+ if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
+ break;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
+ /* clear tag if cow fork is emptied */
+ if (!ifp->if_bytes)
+ xfs_inode_clear_cowblocks_tag(ip);
+
return error;
}
@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
xfs_off_t offset,
xfs_off_t count)
{
- struct xfs_bmbt_irec irec;
- struct xfs_bmbt_irec uirec;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got, prev, del;
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t end_fsb;
- xfs_filblks_t count_fsb;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
- int error;
+ int error, eof = 0;
unsigned int resblks;
- xfs_filblks_t ilen;
xfs_filblks_t rlen;
- int nimaps;
+ xfs_extnum_t idx;
trace_xfs_reflink_end_cow(ip, offset, count);
+ /* No COW extents? That's easy! */
+ if (ifp->if_bytes == 0)
+ return 0;
+
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
- count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
/* Start a rolling transaction to switch the mappings */
resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- /* Go find the old extent in the CoW fork. */
- while (offset_fsb < end_fsb) {
- /* Read extent from the source file */
- nimaps = 1;
- count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
- error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
- &nimaps, XFS_BMAPI_COWFORK);
- if (error)
- goto out_cancel;
- ASSERT(nimaps == 1);
+ xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
+ &got, &prev);
- ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
- trace_xfs_reflink_cow_remap(ip, &irec);
+ /* If there is a hole at end_fsb - 1 go to the previous extent */
+ if (eof || got.br_startoff > end_fsb) {
+ ASSERT(idx > 0);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+ }
- /*
- * We can have a hole in the CoW fork if part of a directio
- * write is CoW but part of it isn't.
- */
- rlen = ilen = irec.br_blockcount;
- if (irec.br_startblock == HOLESTARTBLOCK)
+ /* Walk backwards until we're out of the I/O range... */
+ while (got.br_startoff + got.br_blockcount > offset_fsb) {
+ del = got;
+ xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+
+ /* Extent delete may have bumped idx forward */
+ if (!del.br_blockcount) {
+ idx--;
goto next_extent;
+ }
+
+ ASSERT(!isnullstartblock(got.br_startblock));
/* Unmap the old blocks in the data fork. */
- while (rlen) {
- xfs_defer_init(&dfops, &firstfsb);
- error = __xfs_bunmapi(tp, ip, irec.br_startoff,
- &rlen, 0, 1, &firstfsb, &dfops);
- if (error)
- goto out_defer;
-
- /*
- * Trim the extent to whatever got unmapped.
- * Remember, bunmapi works backwards.
- */
- uirec.br_startblock = irec.br_startblock + rlen;
- uirec.br_startoff = irec.br_startoff + rlen;
- uirec.br_blockcount = irec.br_blockcount - rlen;
- irec.br_blockcount = rlen;
- trace_xfs_reflink_cow_remap_piece(ip, &uirec);
+ xfs_defer_init(&dfops, &firstfsb);
+ rlen = del.br_blockcount;
+ error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
+ &firstfsb, &dfops);
+ if (error)
+ goto out_defer;
- /* Free the CoW orphan record. */
- error = xfs_refcount_free_cow_extent(tp->t_mountp,
- &dfops, uirec.br_startblock,
- uirec.br_blockcount);
- if (error)
- goto out_defer;
+ /* Trim the extent to whatever got unmapped. */
+ if (rlen) {
+ xfs_trim_extent(&del, del.br_startoff + rlen,
+ del.br_blockcount - rlen);
+ }
+ trace_xfs_reflink_cow_remap(ip, &del);
- /* Map the new blocks into the data fork. */
- error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
- ip, &uirec);
- if (error)
- goto out_defer;
+ /* Free the CoW orphan record. */
+ error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
+ del.br_startblock, del.br_blockcount);
+ if (error)
+ goto out_defer;
- /* Remove the mapping from the CoW fork. */
- error = xfs_bunmapi_cow(ip, &uirec);
- if (error)
- goto out_defer;
+ /* Map the new blocks into the data fork. */
+ error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
+ if (error)
+ goto out_defer;
- error = xfs_defer_finish(&tp, &dfops, ip);
- if (error)
- goto out_defer;
- }
+ /* Remove the mapping from the CoW fork. */
+ xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+
+ error = xfs_defer_finish(&tp, &dfops, ip);
+ if (error)
+ goto out_defer;
next_extent:
- /* Roll on... */
- offset_fsb = irec.br_startoff + ilen;
+ if (idx < 0)
+ break;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
error = xfs_trans_commit(tp);
@@ -774,7 +737,6 @@ next_extent:
out_defer:
xfs_defer_cancel(&dfops);
-out_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
@@ -1312,19 +1274,26 @@ out_error:
*/
int
xfs_reflink_remap_range(
- struct xfs_inode *src,
- xfs_off_t srcoff,
- struct xfs_inode *dest,
- xfs_off_t destoff,
- xfs_off_t len,
- unsigned int flags)
+ struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len,
+ bool is_dedupe)
{
+ struct inode *inode_in = file_inode(file_in);
+ struct xfs_inode *src = XFS_I(inode_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
+ loff_t bs = inode_out->i_sb->s_blocksize;
+ bool same_inode = (inode_in == inode_out);
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
- int error;
xfs_extlen_t cowextsize;
- bool is_same;
+ loff_t isize;
+ ssize_t ret;
+ loff_t blen;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- /* Don't reflink realtime inodes */
- if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
- return -EINVAL;
-
- if (flags & ~XFS_REFLINK_ALL)
- return -EINVAL;
-
- trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
-
/* Lock both files against IO */
- if (src->i_ino == dest->i_ino) {
+ if (same_inode) {
xfs_ilock(src, XFS_IOLOCK_EXCL);
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
} else {
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
}
+ /* Don't touch certain kinds of inodes */
+ ret = -EPERM;
+ if (IS_IMMUTABLE(inode_out))
+ goto out_unlock;
+
+ ret = -ETXTBSY;
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ goto out_unlock;
+
+
+ /* Don't reflink dirs, pipes, sockets... */
+ ret = -EISDIR;
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ goto out_unlock;
+ ret = -EINVAL;
+ if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
+ goto out_unlock;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ goto out_unlock;
+
+ /* Don't reflink realtime inodes */
+ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
+ goto out_unlock;
+
+ /* Don't share DAX file data for now. */
+ if (IS_DAX(inode_in) || IS_DAX(inode_out))
+ goto out_unlock;
+
+ /* Are we going all the way to the end? */
+ isize = i_size_read(inode_in);
+ if (isize == 0) {
+ ret = 0;
+ goto out_unlock;
+ }
+
+ if (len == 0)
+ len = isize - pos_in;
+
+ /* Ensure offsets don't wrap and the input is inside i_size */
+ if (pos_in + len < pos_in || pos_out + len < pos_out ||
+ pos_in + len > isize)
+ goto out_unlock;
+
+ /* Don't allow dedupe past EOF in the dest file */
+ if (is_dedupe) {
+ loff_t disize;
+
+ disize = i_size_read(inode_out);
+ if (pos_out >= disize || pos_out + len > disize)
+ goto out_unlock;
+ }
+
+ /* If we're linking to EOF, continue to the block boundary. */
+ if (pos_in + len == isize)
+ blen = ALIGN(isize, bs) - pos_in;
+ else
+ blen = len;
+
+ /* Only reflink if we're aligned to block boundaries */
+ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+ !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+ goto out_unlock;
+
+ /* Don't allow overlapped reflink within the same file */
+ if (same_inode) {
+ if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+ goto out_unlock;
+ }
+
+ /* Wait for the completion of any pending IOs on both files */
+ inode_dio_wait(inode_in);
+ if (!same_inode)
+ inode_dio_wait(inode_out);
+
+ ret = filemap_write_and_wait_range(inode_in->i_mapping,
+ pos_in, pos_in + len - 1);
+ if (ret)
+ goto out_unlock;
+
+ ret = filemap_write_and_wait_range(inode_out->i_mapping,
+ pos_out, pos_out + len - 1);
+ if (ret)
+ goto out_unlock;
+
+ trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
/*
* Check that the extents are the same.
*/
- if (flags & XFS_REFLINK_DEDUPE) {
- is_same = false;
- error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
- destoff, len, &is_same);
- if (error)
- goto out_error;
+ if (is_dedupe) {
+ bool is_same = false;
+
+ ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
+ len, &is_same);
+ if (ret)
+ goto out_unlock;
if (!is_same) {
- error = -EBADE;
- goto out_error;
+ ret = -EBADE;
+ goto out_unlock;
}
}
- error = xfs_reflink_set_inode_flag(src, dest);
- if (error)
- goto out_error;
+ ret = xfs_reflink_set_inode_flag(src, dest);
+ if (ret)
+ goto out_unlock;
/*
* Invalidate the page cache so that we can clear any CoW mappings
* in the destination file.
*/
- truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
- PAGE_ALIGN(destoff + len) - 1);
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
- dfsbno = XFS_B_TO_FSBT(mp, destoff);
- sfsbno = XFS_B_TO_FSBT(mp, srcoff);
+ dfsbno = XFS_B_TO_FSBT(mp, pos_out);
+ sfsbno = XFS_B_TO_FSBT(mp, pos_in);
fsblen = XFS_B_TO_FSB(mp, len);
- error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
- destoff + len);
- if (error)
- goto out_error;
+ ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
+ pos_out + len);
+ if (ret)
+ goto out_unlock;
/*
* Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
* has a cowextsize hint, and the destination file does not.
*/
cowextsize = 0;
- if (srcoff == 0 && len == i_size_read(VFS_I(src)) &&
+ if (pos_in == 0 && len == i_size_read(inode_in) &&
(src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
- destoff == 0 && len >= i_size_read(VFS_I(dest)) &&
+ pos_out == 0 && len >= i_size_read(inode_out) &&
!(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
cowextsize = src->i_d.di_cowextsize;
- error = xfs_reflink_update_dest(dest, destoff + len, cowextsize);
- if (error)
- goto out_error;
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
-out_error:
+out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
xfs_iunlock(src, XFS_IOLOCK_EXCL);
if (src->i_ino != dest->i_ino) {
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
xfs_iunlock(dest, XFS_IOLOCK_EXCL);
}
- if (error)
- trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
- return error;
+ if (ret)
+ trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+ return ret;
}
/*
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 5dc3c8ac12aa..fad11607c9ad 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
-extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
- xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap, bool *shared);
extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */
-#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE)
-extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
- struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
- unsigned int flags);
+extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
struct xfs_trans **tpp);
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 5f8d55d29a11..276d3023d60f 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
};
-struct kobj_type xfs_error_cfg_ktype = {
+static struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_error_attrs,
};
-struct kobj_type xfs_error_ktype = {
+static struct kobj_type xfs_error_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ad188d3a83f3..0907752be62d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
-DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
+DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);