From 59b5639490f51aa604d18064dcf0c2d72eb1decf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jan 2020 13:07:26 -0500 Subject: NFSv4/pnfs: pnfs_set_layout_stateid() should update the layout cred If the cred assigned to the layout that we're updating differs from the one used to retrieve the new layout segment, then we need to update the layout plh_lc_cred field. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index cd4c6bc81cae..b6ffac9963c8 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -280,7 +280,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, goto unlock; } - pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); + pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true); switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list, &args->cbl_range, be32_to_cpu(args->cbl_stateid.seqid))) { -- cgit v1.2.3-70-g09d2 From d911c57a19551c6bef116a3b55c6b089901aacb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Feb 2020 11:01:12 -0500 Subject: NFSv4/pnfs: Return valid stateids in nfs_layout_find_inode_by_stateid() Make sure to test the stateid for validity so that we catch instances where the server may have been reusing stateids in nfs_layout_find_inode_by_stateid(). Fixes: 7b410d9ce460 ("pNFS: Delay getting the layout header in CB_LAYOUTRECALL handlers") Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b6ffac9963c8..eb9d035451a2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -128,6 +128,8 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!pnfs_layout_is_valid(lo)) + continue; if (stateid != NULL && !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) continue; -- cgit v1.2.3-70-g09d2 From 58ac3e59235f1fa174c6e9c5e69111a7b2fa2652 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Feb 2020 11:24:06 -0500 Subject: NFSv4/pnfs: Clean up nfs_layout_find_inode() Now that we can rely on just the rcu_read_lock(), remove the clp->cl_lock and clean up. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 52 ++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 31 deletions(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index eb9d035451a2..97084804a953 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -121,33 +121,31 @@ out: */ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, const nfs4_stateid *stateid) + __must_hold(RCU) { struct nfs_server *server; struct inode *inode; struct pnfs_layout_hdr *lo; + rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - list_for_each_entry(lo, &server->layouts, plh_layouts) { + list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { if (!pnfs_layout_is_valid(lo)) continue; if (stateid != NULL && !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) continue; + if (!nfs_sb_active(server->super)) + continue; inode = igrab(lo->plh_inode); - if (!inode) - return ERR_PTR(-EAGAIN); - if (!nfs_sb_active(inode->i_sb)) { - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); - spin_lock(&clp->cl_lock); - rcu_read_lock(); - return ERR_PTR(-EAGAIN); - } - return inode; + rcu_read_unlock(); + if (inode) + return inode; + nfs_sb_deactive(server->super); + return ERR_PTR(-EAGAIN); } } - + rcu_read_unlock(); return ERR_PTR(-ENOENT); } @@ -165,28 +163,25 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp, struct inode *inode; struct pnfs_layout_hdr *lo; + rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - list_for_each_entry(lo, &server->layouts, plh_layouts) { + list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { nfsi = NFS_I(lo->plh_inode); if (nfs_compare_fh(fh, &nfsi->fh)) continue; if (nfsi->layout != lo) continue; + if (!nfs_sb_active(server->super)) + continue; inode = igrab(lo->plh_inode); - if (!inode) - return ERR_PTR(-EAGAIN); - if (!nfs_sb_active(inode->i_sb)) { - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); - spin_lock(&clp->cl_lock); - rcu_read_lock(); - return ERR_PTR(-EAGAIN); - } - return inode; + rcu_read_unlock(); + if (inode) + return inode; + nfs_sb_deactive(server->super); + return ERR_PTR(-EAGAIN); } } - + rcu_read_unlock(); return ERR_PTR(-ENOENT); } @@ -196,14 +191,9 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp, { struct inode *inode; - spin_lock(&clp->cl_lock); - rcu_read_lock(); inode = nfs_layout_find_inode_by_stateid(clp, stateid); if (inode == ERR_PTR(-ENOENT)) inode = nfs_layout_find_inode_by_fh(clp, fh); - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - return inode; } -- cgit v1.2.3-70-g09d2 From b5fdf8418c370d69e8b2d3588e0cf2a375ab26c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2020 15:58:31 -0500 Subject: NFSv4: Add support for CB_RECALL_ANY for flexfiles layouts When we receive a CB_RECALL_ANY that asks us to return flexfiles layouts, we iterate through all the layouts and look at whether or not there are active open file descriptors that might need them for I/O. If there are no such descriptors, we return the layouts. Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 4 +- fs/nfs/callback_proc.c | 13 +++++ fs/nfs/nfs4_fs.h | 4 +- fs/nfs/nfs4state.c | 24 +++++++- fs/nfs/nfs4trace.h | 8 ++- fs/nfs/pnfs.c | 148 +++++++++++++++++++++++++++++++++++++++++++++---- fs/nfs/pnfs.h | 3 + 7 files changed, 186 insertions(+), 18 deletions(-) (limited to 'fs/nfs/callback_proc.c') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 549350259840..6a2033131c06 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -127,7 +127,9 @@ extern __be32 nfs4_callback_sequence(void *argp, void *resp, #define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9 #define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12 #define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15 -#define RCA4_TYPE_MASK_ALL 0xf31f +#define PNFS_FF_RCA4_TYPE_MASK_READ 16 +#define PNFS_FF_RCA4_TYPE_MASK_RW 17 +#define RCA4_TYPE_MASK_ALL 0x3f31f struct cb_recallanyargs { uint32_t craa_objs_to_keep; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 97084804a953..e61dbc9b86ae 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -597,6 +597,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp, struct cb_recallanyargs *args = argp; __be32 status; fmode_t flags = 0; + bool schedule_manager = false; status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); if (!cps->clp) /* set in cb_sequence */ @@ -619,6 +620,18 @@ __be32 nfs4_callback_recallany(void *argp, void *resp, if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT)) pnfs_recall_all_layouts(cps->clp); + + if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) { + set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state); + schedule_manager = true; + } + if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_RW)) { + set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &cps->clp->cl_state); + schedule_manager = true; + } + if (schedule_manager) + nfs4_schedule_state_manager(cps->clp); + out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8be1ba7c62bb..2b7f6dcd2eb8 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -42,7 +42,9 @@ enum nfs4_client_state { NFS4CLNT_LEASE_MOVED, NFS4CLNT_DELEGATION_EXPIRED, NFS4CLNT_RUN_MANAGER, - NFS4CLNT_DELEGRETURN_RUNNING, + NFS4CLNT_RECALL_RUNNING, + NFS4CLNT_RECALL_ANY_LAYOUT_READ, + NFS4CLNT_RECALL_ANY_LAYOUT_RW, }; #define NFS4_RENEW_TIMEOUT 0x01 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f7723d221945..ac93715c05a4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2524,6 +2524,21 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) } return 0; } + +static void nfs4_layoutreturn_any_run(struct nfs_client *clp) +{ + int iomode = 0; + + if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &clp->cl_state)) + iomode += IOMODE_READ; + if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &clp->cl_state)) + iomode += IOMODE_RW; + /* Note: IOMODE_READ + IOMODE_RW == IOMODE_ANY */ + if (iomode) { + pnfs_layout_return_unused_byclid(clp, iomode); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + } +} #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } @@ -2531,6 +2546,10 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) { return 0; } + +static void nfs4_layoutreturn_any_run(struct nfs_client *clp) +{ +} #endif /* CONFIG_NFS_V4_1 */ static void nfs4_state_manager(struct nfs_client *clp) @@ -2635,12 +2654,13 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); - if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) { + if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) { if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { nfs_client_return_marked_delegations(clp); set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); } - clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state); + nfs4_layoutreturn_any_run(clp); + clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state); } /* Did we race with an attempt to give us more work? */ diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 1e97e5e04cb4..543541173a3d 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -584,7 +584,9 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED); TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED); TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED); TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER); -TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING); +TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING); +TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ); +TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW); #define show_nfs4_clp_state(state) \ __print_flags(state, "|", \ @@ -605,7 +607,9 @@ TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING); { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \ { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \ { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \ - { NFS4CLNT_DELEGRETURN_RUNNING, "DELEGRETURN_RUNNING" }) + { NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \ + { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \ + { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" }) TRACE_EVENT(nfs4_state_mgr, TP_PROTO( diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 268e7b9ff54e..6b25117fca5f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -309,6 +309,16 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static struct inode * +pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = igrab(lo->plh_inode); + if (inode) + return inode; + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); + return NULL; +} + static void pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, u32 seq) @@ -782,7 +792,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, /* If the sb is being destroyed, just bail */ if (!nfs_sb_active(server->super)) break; - inode = igrab(lo->plh_inode); + inode = pnfs_grab_inode_layout_hdr(lo); if (inode != NULL) { if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) list_del_rcu(&lo->plh_layouts); @@ -795,7 +805,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, } else { rcu_read_unlock(); spin_unlock(&clp->cl_lock); - set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); } nfs_sb_deactive(server->super); spin_lock(&clp->cl_lock); @@ -2434,29 +2443,26 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, return -ENOENT; } -void pnfs_error_mark_layout_for_return(struct inode *inode, - struct pnfs_layout_segment *lseg) +static void +pnfs_mark_layout_for_return(struct inode *inode, + const struct pnfs_layout_range *range) { - struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; - struct pnfs_layout_range range = { - .iomode = lseg->pls_range.iomode, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; + struct pnfs_layout_hdr *lo; bool return_now = false; spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; if (!pnfs_layout_is_valid(lo)) { spin_unlock(&inode->i_lock); return; } - pnfs_set_plh_return_info(lo, range.iomode, 0); + pnfs_set_plh_return_info(lo, range->iomode, 0); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() * for how it works. */ - if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) { + if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) { nfs4_stateid stateid; enum pnfs_iomode iomode; @@ -2469,8 +2475,126 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, nfs_commit_inode(inode, 0); } } + +void pnfs_error_mark_layout_for_return(struct inode *inode, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_range range = { + .iomode = lseg->pls_range.iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + + pnfs_mark_layout_for_return(inode, &range); +} EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); +static bool +pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo) +{ + return pnfs_layout_is_valid(lo) && + !test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) && + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); +} + +static struct pnfs_layout_segment * +pnfs_find_first_lseg(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + enum pnfs_iomode iomode) +{ + struct pnfs_layout_segment *lseg; + + list_for_each_entry(lseg, &lo->plh_segs, pls_list) { + if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + continue; + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + continue; + if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY) + continue; + if (pnfs_lseg_range_intersecting(&lseg->pls_range, range)) + return lseg; + } + return NULL; +} + +/* Find open file states whose mode matches that of the range */ +static bool +pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range) +{ + struct list_head *head; + struct nfs_open_context *ctx; + fmode_t mode = 0; + + if (!pnfs_layout_can_be_returned(lo) || + !pnfs_find_first_lseg(lo, range, range->iomode)) + return false; + + head = &NFS_I(lo->plh_inode)->open_files; + list_for_each_entry_rcu(ctx, head, list) { + if (ctx->state) + mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE); + } + + switch (range->iomode) { + default: + break; + case IOMODE_READ: + mode &= ~FMODE_WRITE; + break; + case IOMODE_RW: + if (pnfs_find_first_lseg(lo, range, IOMODE_READ)) + mode &= ~FMODE_READ; + } + return mode == 0; +} + +static int +pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data) +{ + const struct pnfs_layout_range *range = data; + struct pnfs_layout_hdr *lo; + struct inode *inode; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { + if (!pnfs_layout_can_be_returned(lo) || + test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) + continue; + inode = lo->plh_inode; + spin_lock(&inode->i_lock); + if (!pnfs_should_return_unused_layout(lo, range)) { + spin_unlock(&inode->i_lock); + continue; + } + spin_unlock(&inode->i_lock); + inode = pnfs_grab_inode_layout_hdr(lo); + if (!inode) + continue; + rcu_read_unlock(); + pnfs_mark_layout_for_return(inode, range); + iput(inode); + cond_resched(); + goto restart; + } + rcu_read_unlock(); + return 0; +} + +void +pnfs_layout_return_unused_byclid(struct nfs_client *clp, + enum pnfs_iomode iomode) +{ + struct pnfs_layout_range range = { + .iomode = iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + + nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver, + &range); +} + void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8df9aa02d336..7bfb6970134a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -329,6 +329,9 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); +void pnfs_layout_return_unused_byclid(struct nfs_client *clp, + enum pnfs_iomode iomode); + /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ -- cgit v1.2.3-70-g09d2