summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/block-group.c44
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/file.c46
-rw-r--r--fs/btrfs/inode.c39
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/erofs/zdata.h20
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/io_uring.c22
-rw-r--r--fs/nfs/direct.c13
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c11
-rw-r--r--fs/nfs/nfs4namespace.c1
-rw-r--r--fs/ocfs2/dlmglue.c17
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/suballoc.c9
17 files changed, 169 insertions, 70 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 176e8a292fd1..c037ef514b64 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1089,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
+
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
@@ -1123,17 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
-
- ret = remove_block_group_item(trans, path, block_group);
- if (ret < 0)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
@@ -1145,10 +1156,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_extent_map(em);
}
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 30ce7039bc27..d404cce8ae40 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1009,6 +1009,8 @@ enum {
BTRFS_ROOT_DEAD_RELOC_TREE,
/* Mark dead root stored on device whose cleanup needs to be resumed */
BTRFS_ROOT_DEAD_TREE,
+ /* The root has a log tree. Used only for subvolume roots. */
+ BTRFS_ROOT_HAS_LOG_TREE,
};
/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2c14312b05e8..2520605afc25 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
}
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
- size_t *write_bytes)
+ size_t *write_bytes, bool nowait)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
@@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
u64 num_bytes;
int ret;
- if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
+ if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
return -EAGAIN;
lockstart = round_down(pos, fs_info->sectorsize);
lockend = round_up(pos + *write_bytes,
fs_info->sectorsize) - 1;
+ num_bytes = lockend - lockstart + 1;
- btrfs_lock_and_flush_ordered_range(inode, lockstart,
- lockend, NULL);
+ if (nowait) {
+ struct btrfs_ordered_extent *ordered;
+
+ if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
+ return -EAGAIN;
+
+ ordered = btrfs_lookup_ordered_range(inode, lockstart,
+ num_bytes);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ btrfs_lock_and_flush_ordered_range(inode, lockstart,
+ lockend, NULL);
+ }
- num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
NULL, NULL, NULL);
if (ret <= 0) {
ret = 0;
- btrfs_drew_write_unlock(&root->snapshot_lock);
+ if (!nowait)
+ btrfs_drew_write_unlock(&root->snapshot_lock);
} else {
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
}
-
+out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend);
return ret;
@@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
check_can_nocow(BTRFS_I(inode), pos,
- &write_bytes) > 0) {
+ &write_bytes, false) > 0) {
/*
* For nodata cow case, no need to reserve
* data space.
@@ -1904,13 +1920,25 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
if (iocb->ki_flags & IOCB_NOWAIT) {
+ size_t nocow_bytes = count;
+
/*
* We will allocate space in case nodatacow is not set,
* so bail
*/
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) ||
- check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
+ true) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ /*
+ * There are holes in the range or parts of the range that must
+ * be COWed (shared extents, RO block groups, etc), so just bail
+ * out.
+ */
+ if (nocow_bytes < count) {
inode_unlock(inode);
return -EAGAIN;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d04c82c88418..18d384f4af54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1361,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1391,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
@@ -7865,9 +7889,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
offset, count);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 168deb8ef68a..e8f7c5f00894 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2692,7 +2692,7 @@ out:
btrfs_put_root(root);
out_free:
btrfs_free_path(path);
- kzfree(subvol_info);
+ kfree(subvol_info);
return ret;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 920cee312f4e..cd5348f352dd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -169,6 +169,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
if (ret)
goto out;
+ set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
}
@@ -195,6 +196,9 @@ static int join_running_log_trans(struct btrfs_root *root)
{
int ret = -ENOENT;
+ if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
+ return ret;
+
mutex_lock(&root->log_mutex);
if (root->log_root) {
ret = 0;
@@ -3303,6 +3307,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
if (root->log_root) {
free_log_tree(trans, root->log_root);
root->log_root = NULL;
+ clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
}
return 0;
}
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 7824f5563a55..9b66c28b3ae9 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -144,22 +144,22 @@ static inline void z_erofs_onlinepage_init(struct page *page)
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
{
- unsigned long *p, o, v, id;
-repeat:
- p = &page_private(page);
- o = READ_ONCE(*p);
+ union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
+ int orig, orig_index, val;
- id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
- if (id) {
+repeat:
+ orig = atomic_read(u.o);
+ orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+ if (orig_index) {
if (!index)
return;
- DBG_BUGON(id != index);
+ DBG_BUGON(orig_index != index);
}
- v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
- ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
- if (cmpxchg(p, o, v) != o)
+ val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+ ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
+ if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..65603502fed6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -230,7 +230,7 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
d_set_d_op(path.dentry, &anon_ops);
path.mnt = mntget(mnt);
d_instantiate(path.dentry, inode);
- file = alloc_file(&path, flags, fops);
+ file = alloc_file(&path, flags | FMODE_NONOTIFY, fops);
if (IS_ERR(file)) {
ihold(inode);
path_put(&path);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a78201b96179..e507737f044e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -890,6 +890,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_files_update *ip,
unsigned nr_args);
static int io_grab_files(struct io_kiocb *req);
+static void io_complete_rw_common(struct kiocb *kiocb, long res);
static void io_cleanup_req(struct io_kiocb *req);
static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
int fd, struct file **out_file, bool fixed);
@@ -1749,6 +1750,14 @@ static void io_iopoll_queue(struct list_head *again)
do {
req = list_first_entry(again, struct io_kiocb, list);
list_del(&req->list);
+
+ /* shouldn't happen unless io_uring is dying, cancel reqs */
+ if (unlikely(!current->mm)) {
+ io_complete_rw_common(&req->rw.kiocb, -EAGAIN);
+ io_put_req(req);
+ continue;
+ }
+
refcount_inc(&req->refs);
io_queue_async_work(req);
} while (!list_empty(again));
@@ -1994,10 +2003,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
WRITE_ONCE(req->result, res);
/* order with io_poll_complete() checking ->result */
- if (res != -EAGAIN) {
- smp_wmb();
- WRITE_ONCE(req->iopoll_completed, 1);
- }
+ smp_wmb();
+ WRITE_ONCE(req->iopoll_completed, 1);
}
/*
@@ -5353,9 +5360,6 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) {
const bool in_async = io_wq_current_is_worker();
- if (req->result == -EAGAIN)
- return -EAGAIN;
-
/* workqueue context doesn't hold uring_lock, grab it now */
if (in_async)
mutex_lock(&ctx->uring_lock);
@@ -6011,7 +6015,7 @@ static int io_sq_thread(void *data)
* If submit got -EBUSY, flag us as needing the application
* to enter the kernel to reap and flush events.
*/
- if (!to_submit || ret == -EBUSY) {
+ if (!to_submit || ret == -EBUSY || need_resched()) {
/*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
@@ -6027,7 +6031,7 @@ static int io_sq_thread(void *data)
* more IO, we should wait for the application to
* reap events and wake us up.
*/
- if (!list_empty(&ctx->poll_list) ||
+ if (!list_empty(&ctx->poll_list) || need_resched() ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
if (current->task_works)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1b79dd5cf661..3d113cf8908a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -267,8 +267,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
{
struct inode *inode = dreq->inode;
- inode_dio_end(inode);
-
if (dreq->iocb) {
long res = (long) dreq->error;
if (dreq->count != 0) {
@@ -280,7 +278,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
}
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
@@ -410,8 +411,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- inode_dio_end(inode);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
return result < 0 ? result : -EIO;
}
@@ -864,8 +867,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- inode_dio_end(inode);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
return result < 0 ? result : -EIO;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f96367a2463e..ccd6c1637b27 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
+ inode_dio_wait(inode);
nfs_file_clear_open_context(filp);
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7d399f72ebbb..de03e440b7ee 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -907,9 +907,8 @@ retry:
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
- if (WARN_ON_ONCE(pgio->pg_mirror_count !=
- FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
- goto out_mds;
+ if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
+ goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -931,7 +930,10 @@ retry:
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
return;
-
+out_eagain:
+ pnfs_generic_pg_cleanup(pgio);
+ pgio->pg_error = -EAGAIN;
+ return;
out_mds:
trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_RW,
@@ -941,6 +943,7 @@ out_mds:
pgio->pg_lseg = NULL;
pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
+ pgio->pg_error = -EAGAIN;
}
static unsigned int
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a3ab6e219061..873342308dc0 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -308,6 +308,7 @@ static int try_location(struct fs_context *fc,
if (IS_ERR(export_path))
return PTR_ERR(export_path);
+ kfree(ctx->nfs_server.export_path);
ctx->nfs_server.export_path = export_path;
source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 152a0fc4e905..751bc4dc7466 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_nfs_sync_lops, osb);
}
+static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
+{
+ ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ init_rwsem(&osb->nfs_sync_rwlock);
+}
+
void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
{
struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
@@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
+ if (ex)
+ down_write(&osb->nfs_sync_rwlock);
+ else
+ down_read(&osb->nfs_sync_rwlock);
+
if (ocfs2_mount_local(osb))
return 0;
@@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
+ if (ex)
+ up_write(&osb->nfs_sync_rwlock);
+ else
+ up_read(&osb->nfs_sync_rwlock);
}
int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
@@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
- ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ ocfs2_nfs_sync_lock_init(osb);
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
osb->cconn = conn;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ee5d98516212..2dd71d626196 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -395,6 +395,7 @@ struct ocfs2_super
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
+ struct rw_semaphore nfs_sync_rwlock;
struct ocfs2_lock_res osb_trim_fs_lockres;
struct mutex obs_trim_fs_mutex;
struct ocfs2_dlm_debug *osb_dlm_debug;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 0dd8c41bafd4..19137c6d087b 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -290,7 +290,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
@@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 4836becb7578..45745cc3408a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2825,9 +2825,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- inode_alloc_inode =
- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
- suballoc_slot);
+ if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
+ else
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */