diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/disk-io.c | 11 | ||||
-rw-r--r-- | fs/btrfs/file.c | 24 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 3 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 1 | ||||
-rw-r--r-- | fs/btrfs/send.c | 11 | ||||
-rw-r--r-- | fs/btrfs/super.c | 1 | ||||
-rw-r--r-- | fs/dax.c | 60 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 22 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 21 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.h | 4 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 | ||||
-rw-r--r-- | fs/nfs/nfs42proc.c | 19 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 16 | ||||
-rw-r--r-- | fs/nilfs2/btnode.c | 4 |
15 files changed, 139 insertions, 79 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f0b6d1936e8..6d776717d8b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -477,9 +477,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, int mirror_num = 0; int failed_mirror = 0; - clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { + clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, mirror_num); if (!ret) { @@ -493,15 +493,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, break; } - /* - * This buffer's crc is fine, but its contents are corrupted, so - * there is no reason to read the other copies, they won't be - * any less wrong. - */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) || - ret == -EUCLEAN) - break; - num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a3c22e16509b..58e93bce3036 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2089,6 +2089,30 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* + * Before we acquired the inode's lock, someone may have dirtied more + * pages in the target range. We need to make sure that writeback for + * any such pages does not start while we are logging the inode, because + * if it does, any of the following might happen when we are not doing a + * full inode sync: + * + * 1) We log an extent after its writeback finishes but before its + * checksums are added to the csum tree, leading to -EIO errors + * when attempting to read the extent after a log replay. + * + * 2) We can end up logging an extent before its writeback finishes. + * Therefore after the log replay we will have a file extent item + * pointing to an unwritten extent (and no data checksums as well). + * + * So trigger writeback for any eventual new dirty pages and then we + * wait for all ordered extents to complete below. + */ + ret = start_ordered_ops(inode, start, end); + if (ret) { + inode_unlock(inode); + goto out; + } + + /* * We have to do this here to avoid the priority inversion of waiting on * IO of a lower priority task while holding a transaciton open. */ diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 45868fd76209..f70825af6438 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2659,7 +2659,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, int i; u64 *i_qgroups; struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_root *quota_root; struct btrfs_qgroup *srcgroup; struct btrfs_qgroup *dstgroup; u32 level_size = 0; @@ -2669,6 +2669,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) goto out; + quota_root = fs_info->quota_root; if (!quota_root) { ret = -EINVAL; goto out; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 924116f654a1..a3f75b8926d4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3959,6 +3959,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans); + trans = NULL; continue; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 094cc1444a90..5be83b5a1b43 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3340,7 +3340,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) kfree(m); } -static void tail_append_pending_moves(struct pending_dir_move *moves, +static void tail_append_pending_moves(struct send_ctx *sctx, + struct pending_dir_move *moves, struct list_head *stack) { if (list_empty(&moves->list)) { @@ -3351,6 +3352,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves, list_add_tail(&moves->list, stack); list_splice_tail(&list, stack); } + if (!RB_EMPTY_NODE(&moves->node)) { + rb_erase(&moves->node, &sctx->pending_dir_moves); + RB_CLEAR_NODE(&moves->node); + } } static int apply_children_dir_moves(struct send_ctx *sctx) @@ -3365,7 +3370,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) return 0; INIT_LIST_HEAD(&stack); - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); while (!list_empty(&stack)) { pm = list_first_entry(&stack, struct pending_dir_move, list); @@ -3376,7 +3381,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) goto out; pm = get_pending_dir_moves(sctx, parent_ino); if (pm) - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); } return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index cbc9d0d2c12d..645fc81e2a94 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2237,6 +2237,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, vol = memdup_user((void __user *)arg, sizeof(*vol)); if (IS_ERR(vol)) return PTR_ERR(vol); + vol->name[BTRFS_PATH_NAME_MAX] = '\0'; switch (cmd) { case BTRFS_IOC_SCAN_DEV: @@ -98,12 +98,6 @@ static void *dax_make_entry(pfn_t pfn, unsigned long flags) return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); } -static void *dax_make_page_entry(struct page *page) -{ - pfn_t pfn = page_to_pfn_t(page); - return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0); -} - static bool dax_is_locked(void *entry) { return xa_to_value(entry) & DAX_LOCKED; @@ -116,12 +110,12 @@ static unsigned int dax_entry_order(void *entry) return 0; } -static int dax_is_pmd_entry(void *entry) +static unsigned long dax_is_pmd_entry(void *entry) { return xa_to_value(entry) & DAX_PMD; } -static int dax_is_pte_entry(void *entry) +static bool dax_is_pte_entry(void *entry) { return !(xa_to_value(entry) & DAX_PMD); } @@ -222,9 +216,8 @@ static void *get_unlocked_entry(struct xa_state *xas) ewait.wait.func = wake_exceptional_entry_func; for (;;) { - entry = xas_load(xas); - if (!entry || xa_is_internal(entry) || - WARN_ON_ONCE(!xa_is_value(entry)) || + entry = xas_find_conflict(xas); + if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || !dax_is_locked(entry)) return entry; @@ -255,6 +248,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) { void *old; + BUG_ON(dax_is_locked(entry)); xas_reset(xas); xas_lock_irq(xas); old = xas_store(xas, entry); @@ -352,16 +346,27 @@ static struct page *dax_busy_page(void *entry) return NULL; } +/* + * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page + * @page: The page whose entry we want to lock + * + * Context: Process context. + * Return: %true if the entry was locked or does not need to be locked. + */ bool dax_lock_mapping_entry(struct page *page) { XA_STATE(xas, NULL, 0); void *entry; + bool locked; + /* Ensure page->mapping isn't freed while we look at it */ + rcu_read_lock(); for (;;) { struct address_space *mapping = READ_ONCE(page->mapping); + locked = false; if (!dax_mapping(mapping)) - return false; + break; /* * In the device-dax case there's no need to lock, a @@ -370,8 +375,9 @@ bool dax_lock_mapping_entry(struct page *page) * otherwise we would not have a valid pfn_to_page() * translation. */ + locked = true; if (S_ISCHR(mapping->host->i_mode)) - return true; + break; xas.xa = &mapping->i_pages; xas_lock_irq(&xas); @@ -382,28 +388,35 @@ bool dax_lock_mapping_entry(struct page *page) xas_set(&xas, page->index); entry = xas_load(&xas); if (dax_is_locked(entry)) { + rcu_read_unlock(); entry = get_unlocked_entry(&xas); - /* Did the page move while we slept? */ - if (dax_to_pfn(entry) != page_to_pfn(page)) { - xas_unlock_irq(&xas); - continue; - } + xas_unlock_irq(&xas); + put_unlocked_entry(&xas, entry); + rcu_read_lock(); + continue; } dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); - return true; + break; } + rcu_read_unlock(); + return locked; } void dax_unlock_mapping_entry(struct page *page) { struct address_space *mapping = page->mapping; XA_STATE(xas, &mapping->i_pages, page->index); + void *entry; if (S_ISCHR(mapping->host->i_mode)) return; - dax_unlock_entry(&xas, dax_make_page_entry(page)); + rcu_read_lock(); + entry = xas_load(&xas); + rcu_read_unlock(); + entry = dax_make_entry(page_to_pfn_t(page), dax_is_pmd_entry(entry)); + dax_unlock_entry(&xas, entry); } /* @@ -445,11 +458,9 @@ static void *grab_mapping_entry(struct xa_state *xas, retry: xas_lock_irq(xas); entry = get_unlocked_entry(xas); - if (xa_is_internal(entry)) - goto fallback; if (entry) { - if (WARN_ON_ONCE(!xa_is_value(entry))) { + if (!xa_is_value(entry)) { xas_set_err(xas, EIO); goto out_unlock; } @@ -1628,8 +1639,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) /* Did we race with someone splitting entry or so? */ if (!entry || (order == 0 && !dax_is_pte_entry(entry)) || - (order == PMD_ORDER && (xa_is_internal(entry) || - !dax_is_pmd_entry(entry)))) { + (order == PMD_ORDER && !dax_is_pmd_entry(entry))) { put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7b861bbc0b43..315967354954 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -686,20 +686,24 @@ __be32 nfs4_callback_offload(void *data, void *dummy, { struct cb_offloadargs *args = data; struct nfs_server *server; - struct nfs4_copy_state *copy; + struct nfs4_copy_state *copy, *tmp_copy; bool found = false; + copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); + if (!copy) + return htonl(NFS4ERR_SERVERFAULT); + spin_lock(&cps->clp->cl_lock); rcu_read_lock(); list_for_each_entry_rcu(server, &cps->clp->cl_superblocks, client_link) { - list_for_each_entry(copy, &server->ss_copies, copies) { + list_for_each_entry(tmp_copy, &server->ss_copies, copies) { if (memcmp(args->coa_stateid.other, - copy->stateid.other, + tmp_copy->stateid.other, sizeof(args->coa_stateid.other))) continue; - nfs4_copy_cb_args(copy, args); - complete(©->completion); + nfs4_copy_cb_args(tmp_copy, args); + complete(&tmp_copy->completion); found = true; goto out; } @@ -707,15 +711,11 @@ __be32 nfs4_callback_offload(void *data, void *dummy, out: rcu_read_unlock(); if (!found) { - copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); - if (!copy) { - spin_unlock(&cps->clp->cl_lock); - return htonl(NFS4ERR_SERVERFAULT); - } memcpy(©->stateid, &args->coa_stateid, NFS4_STATEID_SIZE); nfs4_copy_cb_args(copy, args); list_add_tail(©->copies, &cps->clp->pending_cb_stateids); - } + } else + kfree(copy); spin_unlock(&cps->clp->cl_lock); return 0; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 86bcba40ca61..74b36ed883ca 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1361,12 +1361,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_read_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_READ) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_read_prepare_common(task, hdr); } static void ff_layout_read_call_done(struct rpc_task *task, void *data) @@ -1542,12 +1537,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_write_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_WRITE) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_write_prepare_common(task, hdr); } static void ff_layout_write_call_done(struct rpc_task *task, void *data) @@ -1742,6 +1732,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; + + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1804,6 +1798,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 411798346e48..de50a342d5a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -215,6 +215,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, unsigned int maxnum); struct nfs_fh * nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid); struct nfs4_pnfs_ds * nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 74d8d5352438..d23347389626 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -370,6 +370,25 @@ out: return fh; } +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); + + if (!ff_layout_mirror_valid(lseg, mirror, false)) { + pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", + __func__, mirror_idx); + goto out; + } + + nfs4_stateid_copy(stateid, &mirror->stateid); + return 1; +out: + return 0; +} + /** * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call * @lseg: the layout segment we're operating on diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ac5b784a1de0..fed06fd9998d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -137,31 +137,32 @@ static int handle_async_copy(struct nfs42_copy_res *res, struct file *dst, nfs4_stateid *src_stateid) { - struct nfs4_copy_state *copy; + struct nfs4_copy_state *copy, *tmp_copy; int status = NFS4_OK; bool found_pending = false; struct nfs_open_context *ctx = nfs_file_open_context(dst); + copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); + if (!copy) + return -ENOMEM; + spin_lock(&server->nfs_client->cl_lock); - list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids, + list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids, copies) { - if (memcmp(&res->write_res.stateid, ©->stateid, + if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, NFS4_STATEID_SIZE)) continue; found_pending = true; - list_del(©->copies); + list_del(&tmp_copy->copies); break; } if (found_pending) { spin_unlock(&server->nfs_client->cl_lock); + kfree(copy); + copy = tmp_copy; goto out; } - copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); - if (!copy) { - spin_unlock(&server->nfs_client->cl_lock); - return -ENOMEM; - } memcpy(©->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); init_completion(©->completion); copy->parent_state = ctx->state; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8d59c9655ec4..1b994b527518 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -41,6 +41,8 @@ enum nfs4_client_state { NFS4CLNT_MOVED, NFS4CLNT_LEASE_MOVED, NFS4CLNT_DELEGATION_EXPIRED, + NFS4CLNT_RUN_MANAGER, + NFS4CLNT_DELEGRETURN_RUNNING, }; #define NFS4_RENEW_TIMEOUT 0x01 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ffea57885394..d8decf2ec48f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1210,6 +1210,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); @@ -2503,6 +2504,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Ensure exclusive access to NFSv4 state */ do { + clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { section = "purge state"; status = nfs4_purge_lease(clp); @@ -2593,14 +2595,18 @@ static void nfs4_state_manager(struct nfs_client *clp) } nfs4_end_drain_session(clp); - if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { - nfs_client_return_marked_delegations(clp); - continue; + nfs4_clear_state_manager_bit(clp); + + if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) { + if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { + nfs_client_return_marked_delegations(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + } + clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state); } - nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ - if (clp->cl_state == 0) + if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index de99db518571..f2129a5d9f23 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -266,9 +266,7 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, return; if (nbh == NULL) { /* blocksize == pagesize */ - xa_lock_irq(&btnc->i_pages); - __xa_erase(&btnc->i_pages, newkey); - xa_unlock_irq(&btnc->i_pages); + xa_erase_irq(&btnc->i_pages, newkey); unlock_page(ctxt->bh->b_page); } else brelse(nbh); |