From 7eac52648a4c24ad23a05f62db97867c92a5747b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Feb 2020 19:11:12 -0500 Subject: SUNRPC: Add a flag to avoid reference counts on credentials Add a flag to signal to the RPC layer that the credential is already pinned for the duration of the RPC call. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index a6ef35184ef1..df696efdd675 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -132,6 +132,7 @@ struct rpc_task_setup { #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ #define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_TASK_NO_RETRANS_TIMEOUT 0x4000 /* wait forever for a reply */ +#define RPC_TASK_CRED_NOREF 0x8000 /* No refcount on the credential */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) -- cgit v1.2.3-70-g09d2 From 8d6bda7f23a9b3ef1d7e386f01924c37f18fe771 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Mar 2020 11:21:12 -0400 Subject: SUNRPC: Remove xdr_buf_read_mic() Clean up: this function is no longer used. Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - net/sunrpc/xdr.c | 55 ---------------------------------------------- 2 files changed, 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b41f34977995..8a6dd5bd6748 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -184,7 +184,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); -extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index e5497dc2475b..15b58c5144f9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1235,61 +1235,6 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) } EXPORT_SYMBOL_GPL(xdr_encode_word); -/** - * xdr_buf_read_mic() - obtain the address of the GSS mic from xdr buf - * @buf: pointer to buffer containing a mic - * @mic: on success, returns the address of the mic - * @offset: the offset in buf where mic may be found - * - * This function may modify the xdr buf if the mic is found to be straddling - * a boundary between head, pages, and tail. On success the mic can be read - * from the address returned. There is no need to free the mic. - * - * Return: Success returns 0, otherwise an integer error. - */ -int xdr_buf_read_mic(struct xdr_buf *buf, struct xdr_netobj *mic, unsigned int offset) -{ - struct xdr_buf subbuf; - unsigned int boundary; - - if (xdr_decode_word(buf, offset, &mic->len)) - return -EFAULT; - offset += 4; - - /* Is the mic partially in the head? */ - boundary = buf->head[0].iov_len; - if (offset < boundary && (offset + mic->len) > boundary) - xdr_shift_buf(buf, boundary - offset); - - /* Is the mic partially in the pages? */ - boundary += buf->page_len; - if (offset < boundary && (offset + mic->len) > boundary) - xdr_shrink_pagelen(buf, boundary - offset); - - if (xdr_buf_subsegment(buf, &subbuf, offset, mic->len)) - return -EFAULT; - - /* Is the mic contained entirely in the head? */ - mic->data = subbuf.head[0].iov_base; - if (subbuf.head[0].iov_len == mic->len) - return 0; - /* ..or is the mic contained entirely in the tail? */ - mic->data = subbuf.tail[0].iov_base; - if (subbuf.tail[0].iov_len == mic->len) - return 0; - - /* Find a contiguous area in @buf to hold all of @mic */ - if (mic->len > buf->buflen - buf->len) - return -ENOMEM; - if (buf->tail[0].iov_len != 0) - mic->data = buf->tail[0].iov_base + buf->tail[0].iov_len; - else - mic->data = buf->head[0].iov_base + buf->head[0].iov_len; - __read_bytes_from_xdr_buf(&subbuf, mic->data, mic->len); - return 0; -} -EXPORT_SYMBOL_GPL(xdr_buf_read_mic); - /* Returns 0 on success, or else a negative error code. */ static int xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, -- cgit v1.2.3-70-g09d2 From d7242c4641fba521a1ea9dbccb11a40cf38cd912 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Mar 2020 17:22:47 -0400 Subject: pNFS: Add a helper to allocate the array of buckets Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 3 +++ fs/nfs/pnfs_nfs.c | 31 +++++++++++++++++++++++++++++++ include/linux/nfs_xdr.h | 15 ++++++++++++--- 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 7bfb6970134a..f6b1099aa151 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -366,6 +366,9 @@ bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); /* pnfs_nfs.c */ +struct pnfs_commit_array *pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags); +void pnfs_free_commit_array(struct pnfs_commit_array *p); + void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo); void pnfs_generic_commit_release(void *calldata); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 3d0942541618..c8518ce3a4ef 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -87,6 +87,37 @@ out: } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); +struct pnfs_commit_array * +pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags) +{ + struct pnfs_commit_array *p; + struct pnfs_commit_bucket *b; + + p = kmalloc(struct_size(p, buckets, n), gfp_flags); + if (!p) + return NULL; + p->nbuckets = n; + INIT_LIST_HEAD(&p->cinfo_list); + INIT_LIST_HEAD(&p->lseg_list); + p->lseg = NULL; + for (b = &p->buckets[0]; n != 0; b++, n--) { + INIT_LIST_HEAD(&b->written); + INIT_LIST_HEAD(&b->committing); + b->wlseg = NULL; + b->clseg = NULL; + b->direct_verf.committed = NFS_INVALID_STABLE_HOW; + } + return p; +} +EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array); + +void +pnfs_free_commit_array(struct pnfs_commit_array *p) +{ + kfree_rcu(p, rcu); +} +EXPORT_SYMBOL_GPL(pnfs_free_commit_array); + static int pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 94c77ed55ce1..e91c917c9c1c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1270,10 +1270,19 @@ struct pnfs_commit_bucket { struct nfs_writeverf direct_verf; }; +struct pnfs_commit_array { + struct list_head cinfo_list; + struct list_head lseg_list; + struct pnfs_layout_segment *lseg; + struct rcu_head rcu; + unsigned int nbuckets; + struct pnfs_commit_bucket buckets[]; +}; + struct pnfs_ds_commit_info { - int nwritten; - int ncommitting; - int nbuckets; + unsigned int nwritten; + unsigned int ncommitting; + unsigned int nbuckets; struct pnfs_commit_bucket *buckets; }; -- cgit v1.2.3-70-g09d2 From c21e7168848d4ff4158120dbd4464f0d5cfb1456 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Mar 2020 13:36:36 -0400 Subject: NFSv4/pnfs: Support a list of commit arrays in struct pnfs_ds_commit_info When we have multiple layout segments with different lists of mirrored data, we need to track the commits on a per layout segment basis. This patch adds a list to support this tracking in struct pnfs_ds_commit_info. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 1 + fs/nfs/filelayout/filelayout.c | 5 ++++- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + fs/nfs/pnfs.h | 11 +++++++++++ include/linux/nfs_xdr.h | 1 + 5 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ade2435551c8..f9a73febce02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -305,6 +305,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_get(&dreq->kref); init_completion(&dreq->completion); INIT_LIST_HEAD(&dreq->mds_cinfo.list); + pnfs_init_ds_commit_info(&dreq->ds_cinfo); dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); spin_lock_init(&dreq->lock); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index bd234394a87c..b051d5d320ba 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1140,7 +1140,10 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct nfs4_filelayout *flo; flo = kzalloc(sizeof(*flo), gfp_flags); - return flo != NULL ? &flo->generic_hdr : NULL; + if (flo == NULL) + return NULL; + pnfs_init_ds_commit_info(&flo->commit_info); + return &flo->generic_hdr; } static void diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 19728206e9c6..c9e79c8e62cd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -48,6 +48,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) ffl = kzalloc(sizeof(*ffl), gfp_flags); if (ffl) { + pnfs_init_ds_commit_info(&ffl->commit_info); INIT_LIST_HEAD(&ffl->error_list); INIT_LIST_HEAD(&ffl->mirrors); ffl->last_report_time = ktime_get(); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f6b1099aa151..b293afb48d04 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -462,6 +462,12 @@ pnfs_get_ds_info(struct inode *inode) return ld->get_ds_info(inode); } +static inline void +pnfs_init_ds_commit_info(struct pnfs_ds_commit_info *fl_cinfo) +{ + INIT_LIST_HEAD(&fl_cinfo->commits); +} + static inline void pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) { @@ -759,6 +765,11 @@ pnfs_get_ds_info(struct inode *inode) return NULL; } +static inline void +pnfs_init_ds_commit_info(struct pnfs_ds_commit_info *fl_cinfo) +{ +} + static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e91c917c9c1c..9946787eda72 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1280,6 +1280,7 @@ struct pnfs_commit_array { }; struct pnfs_ds_commit_info { + struct list_head commits; unsigned int nwritten; unsigned int ncommitting; unsigned int nbuckets; -- cgit v1.2.3-70-g09d2 From a9901899b649dc80ef75c14d6d78059cae14def7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Mar 2020 16:04:06 -0400 Subject: pNFS: Add infrastructure for cleaning up per-layout commit structures Ensure that both the file and flexfiles layout types clean up when freeing the layout segments. Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 16 +++++++ fs/nfs/flexfilelayout/flexfilelayout.c | 11 +++++ fs/nfs/internal.h | 4 +- fs/nfs/pnfs.c | 1 + fs/nfs/pnfs.h | 4 ++ fs/nfs/pnfs_nfs.c | 88 ++++++++++++++++++++++++++++++++-- include/linux/nfs_xdr.h | 1 + 7 files changed, 121 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index b051d5d320ba..ffc5e2af1776 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -750,11 +750,16 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) /* This assumes a single RW lseg */ if (lseg->pls_range.iomode == IOMODE_RW) { struct nfs4_filelayout *flo; + struct inode *inode; flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); + inode = flo->generic_hdr.plh_inode; + spin_lock(&inode->i_lock); flo->commit_info.nbuckets = 0; kfree(flo->commit_info.buckets); flo->commit_info.buckets = NULL; + pnfs_generic_ds_cinfo_release_lseg(&flo->commit_info, lseg); + spin_unlock(&inode->i_lock); } _filelayout_free_lseg(fl); } @@ -1163,6 +1168,16 @@ filelayout_get_ds_info(struct inode *inode) return &FILELAYOUT_FROM_HDR(layout)->commit_info; } +static void +filelayout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo, + struct inode *inode) +{ + spin_lock(&inode->i_lock); + pnfs_generic_ds_cinfo_destroy(fl_cinfo); + spin_unlock(&inode->i_lock); +} + + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -1176,6 +1191,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, .get_ds_info = &filelayout_get_ds_info, + .release_ds_info = filelayout_release_ds_info, .mark_request_commit = filelayout_mark_request_commit, .clear_request_commit = pnfs_generic_clear_request_commit, .scan_commit_lists = pnfs_generic_scan_commit_lists, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c9e79c8e62cd..8e1393d75cbc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -580,6 +580,7 @@ ff_layout_free_lseg(struct pnfs_layout_segment *lseg) kfree(ffl->commit_info.buckets); ffl->commit_info.buckets = NULL; } + pnfs_generic_ds_cinfo_release_lseg(&ffl->commit_info, lseg); spin_unlock(&inode->i_lock); } _ff_layout_free_lseg(fls); @@ -2003,6 +2004,15 @@ ff_layout_get_ds_info(struct inode *inode) return &FF_LAYOUT_FROM_HDR(layout)->commit_info; } +static void +ff_layout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo, + struct inode *inode) +{ + spin_lock(&inode->i_lock); + pnfs_generic_ds_cinfo_destroy(fl_cinfo); + spin_unlock(&inode->i_lock); +} + static void ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) { @@ -2503,6 +2513,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .pg_read_ops = &ff_layout_pg_read_ops, .pg_write_ops = &ff_layout_pg_write_ops, .get_ds_info = ff_layout_get_ds_info, + .release_ds_info = ff_layout_release_ds_info, .free_deviceid_node = ff_layout_free_deviceid_node, .mark_request_commit = pnfs_layout_mark_request_commit, .clear_request_commit = pnfs_generic_clear_request_commit, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4a1adad3740f..683146a51599 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -534,9 +534,11 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) pnfs_bucket_clear_pnfs_ds_commit_verifiers(cinfo->buckets, cinfo->nbuckets); - list_for_each_entry(array, &cinfo->commits, cinfo_list) + rcu_read_lock(); + list_for_each_entry_rcu(array, &cinfo->commits, cinfo_list) pnfs_bucket_clear_pnfs_ds_commit_verifiers(array->buckets, array->nbuckets); + rcu_read_unlock(); } #else static inline diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6b25117fca5f..eba18f137fb0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -506,6 +506,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, { INIT_LIST_HEAD(&lseg->pls_list); INIT_LIST_HEAD(&lseg->pls_lc_list); + INIT_LIST_HEAD(&lseg->pls_commits); refcount_set(&lseg->pls_refcount, 1); set_bit(NFS_LSEG_VALID, &lseg->pls_flags); lseg->pls_layout = lo; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2ec97b419b56..6c48bd7b4640 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -66,6 +66,7 @@ struct nfs4_pnfs_ds { struct pnfs_layout_segment { struct list_head pls_list; struct list_head pls_lc_list; + struct list_head pls_commits; struct pnfs_layout_range pls_range; refcount_t pls_refcount; u32 pls_seq; @@ -370,6 +371,9 @@ void nfs4_deviceid_purge_client(const struct nfs_client *); /* pnfs_nfs.c */ struct pnfs_commit_array *pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags); void pnfs_free_commit_array(struct pnfs_commit_array *p); +void pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo, + struct pnfs_layout_segment *lseg); +void pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo); void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f895a28b1e26..edad251a6a48 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -118,6 +118,67 @@ pnfs_free_commit_array(struct pnfs_commit_array *p) } EXPORT_SYMBOL_GPL(pnfs_free_commit_array); +static void +pnfs_release_commit_array_locked(struct pnfs_commit_array *array) +{ + list_del_rcu(&array->cinfo_list); + list_del(&array->lseg_list); + pnfs_free_commit_array(array); +} + +static void +pnfs_put_commit_array_locked(struct pnfs_commit_array *array) +{ + if (refcount_dec_and_test(&array->refcount)) + pnfs_release_commit_array_locked(array); +} + +static void +pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode) +{ + if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) { + pnfs_release_commit_array_locked(array); + spin_unlock(&inode->i_lock); + } +} + +static struct pnfs_commit_array * +pnfs_get_commit_array(struct pnfs_commit_array *array) +{ + if (refcount_inc_not_zero(&array->refcount)) + return array; + return NULL; +} + +static void +pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array) +{ + array->lseg = NULL; + list_del_init(&array->lseg_list); + pnfs_put_commit_array_locked(array); +} + +void +pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_commit_array *array, *tmp; + + list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list) + pnfs_remove_and_free_commit_array(array); +} +EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg); + +void +pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo) +{ + struct pnfs_commit_array *array, *tmp; + + list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list) + pnfs_remove_and_free_commit_array(array); +} +EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy); + /* * Locks the nfs_page requests for commit and moves them to * @bucket->committing. @@ -177,14 +238,21 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max) max -= cnt; if (!max) return rv; - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { + rcu_read_lock(); + list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { + if (!array->lseg || !pnfs_get_commit_array(array)) + continue; + rcu_read_unlock(); cnt = pnfs_bucket_scan_array(cinfo, array->buckets, array->nbuckets, max); + rcu_read_lock(); + pnfs_put_commit_array(array, cinfo->inode); rv += cnt; max -= cnt; if (!max) break; } + rcu_read_unlock(); return rv; } EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); @@ -230,13 +298,20 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, fl_cinfo->nbuckets, cinfo); fl_cinfo->nwritten -= nwritten; - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { + rcu_read_lock(); + list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { + if (!array->lseg || !pnfs_get_commit_array(array)) + continue; + rcu_read_unlock(); nwritten = pnfs_bucket_recover_commit_reqs(dst, array->buckets, array->nbuckets, cinfo); + rcu_read_lock(); + pnfs_put_commit_array(array, cinfo->inode); fl_cinfo->nwritten -= nwritten; } + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); @@ -330,9 +405,16 @@ pnfs_alloc_ds_commits_list(struct list_head *list, struct pnfs_commit_array *array; unsigned int ret = 0; - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) + rcu_read_lock(); + list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { + if (!array->lseg || !pnfs_get_commit_array(array)) + continue; + rcu_read_unlock(); ret += pnfs_bucket_alloc_ds_commits(list, array->buckets, array->nbuckets, cinfo); + rcu_read_lock(); + pnfs_put_commit_array(array, cinfo->inode); + } return ret; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9946787eda72..33be2ee2a248 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1275,6 +1275,7 @@ struct pnfs_commit_array { struct list_head lseg_list; struct pnfs_layout_segment *lseg; struct rcu_head rcu; + refcount_t refcount; unsigned int nbuckets; struct pnfs_commit_bucket buckets[]; }; -- cgit v1.2.3-70-g09d2 From 0aa647b7369dd29de0789c321111b2e4668c46b2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Mar 2020 09:50:05 -0400 Subject: NFS: Remove bucket array from struct pnfs_ds_commit_info Remove the unused bucket array in struct pnfs_ds_commit_info. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 1 - fs/nfs/filelayout/filelayout.c | 75 +-------------------------------- fs/nfs/flexfilelayout/flexfilelayout.c | 76 ---------------------------------- fs/nfs/internal.h | 3 -- fs/nfs/pnfs_nfs.c | 18 -------- include/linux/nfs_xdr.h | 13 ------ 6 files changed, 1 insertion(+), 185 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4ee26465b510..61f93a0fb0e0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -217,7 +217,6 @@ static void nfs_direct_req_free(struct kref *kref) struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode); - nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); if (dreq->l_ctx != NULL) nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 795508054a4d..854f350e2599 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -755,72 +755,12 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); inode = flo->generic_hdr.plh_inode; spin_lock(&inode->i_lock); - flo->commit_info.nbuckets = 0; - kfree(flo->commit_info.buckets); - flo->commit_info.buckets = NULL; pnfs_generic_ds_cinfo_release_lseg(&flo->commit_info, lseg); spin_unlock(&inode->i_lock); } _filelayout_free_lseg(fl); } -static int -filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo, - gfp_t gfp_flags) -{ - struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - struct pnfs_commit_bucket *buckets; - int size, i; - - if (fl->commit_through_mds) - return 0; - - size = (fl->stripe_type == STRIPE_SPARSE) ? - fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - - if (cinfo->ds->nbuckets >= size) { - /* This assumes there is only one IOMODE_RW lseg. What - * we really want to do is have a layout_hdr level - * dictionary of keys, each - * associated with a struct list_head, populated by calls - * to filelayout_write_pagelist(). - * */ - return 0; - } - - buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), - gfp_flags); - if (!buckets) - return -ENOMEM; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&buckets[i].written); - INIT_LIST_HEAD(&buckets[i].committing); - /* mark direct verifier as unset */ - buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; - } - - spin_lock(&cinfo->inode->i_lock); - if (cinfo->ds->nbuckets >= size) - goto out; - for (i = 0; i < cinfo->ds->nbuckets; i++) { - list_splice(&cinfo->ds->buckets[i].written, - &buckets[i].written); - list_splice(&cinfo->ds->buckets[i].committing, - &buckets[i].committing); - buckets[i].direct_verf.committed = - cinfo->ds->buckets[i].direct_verf.committed; - buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; - buckets[i].clseg = cinfo->ds->buckets[i].clseg; - } - swap(cinfo->ds->buckets, buckets); - cinfo->ds->nbuckets = size; -out: - spin_unlock(&cinfo->inode->i_lock); - kfree(buckets); - return 0; -} - static struct pnfs_layout_segment * filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, @@ -943,9 +883,6 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - struct nfs_commit_info cinfo; - int status; - pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, @@ -964,17 +901,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - goto out_mds; - nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); - status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); - if (status < 0) { - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; - goto out_mds; - } - return; -out_mds: - nfs_pageio_reset_write_mds(pgio); + nfs_pageio_reset_write_mds(pgio); } static const struct nfs_pageio_ops filelayout_pg_read_ops = { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f343a241906a..1a4e36d07eab 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -550,17 +550,6 @@ out_err_free: goto out_free_page; } -static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) -{ - struct pnfs_layout_segment *lseg; - - list_for_each_entry(lseg, &layout->plh_segs, pls_list) - if (lseg->pls_range.iomode == IOMODE_RW) - return true; - - return false; -} - static void ff_layout_free_lseg(struct pnfs_layout_segment *lseg) { @@ -575,24 +564,12 @@ ff_layout_free_lseg(struct pnfs_layout_segment *lseg) ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); inode = ffl->generic_hdr.plh_inode; spin_lock(&inode->i_lock); - if (!ff_layout_has_rw_segments(lseg->pls_layout)) { - ffl->commit_info.nbuckets = 0; - kfree(ffl->commit_info.buckets); - ffl->commit_info.buckets = NULL; - } pnfs_generic_ds_cinfo_release_lseg(&ffl->commit_info, lseg); spin_unlock(&inode->i_lock); } _ff_layout_free_lseg(fls); } -/* Return 1 until we have multiple lsegs support */ -static int -ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) -{ - return 1; -} - static void nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now) { @@ -737,52 +714,6 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, spin_unlock(&mirror->lock); } -static int -ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo, - gfp_t gfp_flags) -{ - struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); - struct pnfs_commit_bucket *buckets; - int size; - - if (cinfo->ds->nbuckets != 0) { - /* This assumes there is only one RW lseg per file. - * To support multiple lseg per file, we need to - * change struct pnfs_commit_bucket to allow dynamic - * increasing nbuckets. - */ - return 0; - } - - size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); - - buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), - gfp_flags); - if (!buckets) - return -ENOMEM; - else { - int i; - - spin_lock(&cinfo->inode->i_lock); - if (cinfo->ds->nbuckets != 0) - kfree(buckets); - else { - cinfo->ds->buckets = buckets; - cinfo->ds->nbuckets = size; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&buckets[i].written); - INIT_LIST_HEAD(&buckets[i].committing); - /* mark direct verifier as unset */ - buckets[i].direct_verf.committed = - NFS_INVALID_STABLE_HOW; - } - } - spin_unlock(&cinfo->inode->i_lock); - return 0; - } -} - static void ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) { @@ -944,10 +875,8 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, { struct nfs4_ff_layout_mirror *mirror; struct nfs_pgio_mirror *pgm; - struct nfs_commit_info cinfo; struct nfs4_pnfs_ds *ds; int i; - int status; retry: pnfs_generic_pg_check_layout(pgio); @@ -969,11 +898,6 @@ retry: if (pgio->pg_lseg == NULL) goto out_mds; - nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); - status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); - if (status < 0) - goto out_mds; - /* Use a direct mapping of ds_idx to pgio mirror_idx */ if (WARN_ON_ONCE(pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 683146a51599..78f317fac940 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -531,9 +531,6 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) { struct pnfs_commit_array *array; - pnfs_bucket_clear_pnfs_ds_commit_verifiers(cinfo->buckets, - cinfo->nbuckets); - rcu_read_lock(); list_for_each_entry_rcu(array, &cinfo->commits, cinfo_list) pnfs_bucket_clear_pnfs_ds_commit_verifiers(array->buckets, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 9b55919e64ac..20f12f3cbe38 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -292,12 +292,6 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max) struct pnfs_commit_array *array; int rv = 0, cnt; - cnt = pnfs_bucket_scan_array(cinfo, fl_cinfo->buckets, - fl_cinfo->nbuckets, max); - rv += cnt; - max -= cnt; - if (!max) - return rv; rcu_read_lock(); list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { if (!array->lseg || !pnfs_get_commit_array(array)) @@ -353,11 +347,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, unsigned int nwritten; lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); - nwritten = pnfs_bucket_recover_commit_reqs(dst, - fl_cinfo->buckets, - fl_cinfo->nbuckets, - cinfo); - fl_cinfo->nwritten -= nwritten; rcu_read_lock(); list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { if (!array->lseg || !pnfs_get_commit_array(array)) @@ -412,10 +401,6 @@ pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page struct pnfs_commit_array *array; struct nfs_page *req; - req = pnfs_bucket_search_commit_reqs(fl_cinfo->buckets, - fl_cinfo->nbuckets, page); - if (req) - return req; list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { req = pnfs_bucket_search_commit_reqs(array->buckets, array->nbuckets, page); @@ -550,9 +535,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nreq++; } - nreq += pnfs_bucket_alloc_ds_commits(&list, fl_cinfo->buckets, - fl_cinfo->nbuckets, cinfo); - nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo); if (nreq == 0) goto out; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 33be2ee2a248..2903597ec88c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1284,8 +1284,6 @@ struct pnfs_ds_commit_info { struct list_head commits; unsigned int nwritten; unsigned int ncommitting; - unsigned int nbuckets; - struct pnfs_commit_bucket *buckets; }; struct nfs41_state_protection { @@ -1396,22 +1394,11 @@ struct nfs41_free_stateid_res { unsigned int status; }; -static inline void -nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) -{ - kfree(cinfo->buckets); -} - #else struct pnfs_ds_commit_info { }; -static inline void -nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) -{ -} - #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 -- cgit v1.2.3-70-g09d2 From 9c455a8c1e146dac3a6d1405fe6a7096177b9546 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Mar 2020 11:13:05 -0400 Subject: NFS/pNFS: Clean up pNFS commit operations Move the pNFS commit related operations into a separate structure that can be carried by the pnfs_ds_commit_info. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 +- fs/nfs/filelayout/filelayout.c | 20 +++--- fs/nfs/flexfilelayout/flexfilelayout.c | 19 +++--- fs/nfs/pnfs.h | 110 +++++++++++++++++++++------------ fs/nfs/pnfs_nfs.c | 13 +--- include/linux/nfs_xdr.h | 1 + 6 files changed, 98 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 61f93a0fb0e0..51ab4627c4d6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -511,10 +511,7 @@ nfs_direct_write_scan_commit_list(struct inode *inode, struct nfs_commit_info *cinfo) { mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); -#ifdef CONFIG_NFS_V4_1 - if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) - NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); -#endif + pnfs_recover_commit_reqs(list, cinfo); nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); } @@ -917,6 +914,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); nfs_start_io_direct(inode); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 854f350e2599..a13e69009f19 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -49,6 +49,7 @@ MODULE_AUTHOR("Dean Hildebrand "); MODULE_DESCRIPTION("The NFSv4 file layout driver"); #define FILELAYOUT_POLL_RETRY_MAX (15*HZ) +static const struct pnfs_commit_ops filelayout_commit_ops; static loff_t filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, @@ -1045,6 +1046,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) if (flo == NULL) return NULL; pnfs_init_ds_commit_info(&flo->commit_info); + flo->commit_info.ops = &filelayout_commit_ops; return &flo->generic_hdr; } @@ -1094,6 +1096,16 @@ filelayout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo, spin_unlock(&inode->i_lock); } +static const struct pnfs_commit_ops filelayout_commit_ops = { + .setup_ds_info = filelayout_setup_ds_info, + .release_ds_info = filelayout_release_ds_info, + .mark_request_commit = filelayout_mark_request_commit, + .clear_request_commit = pnfs_generic_clear_request_commit, + .scan_commit_lists = pnfs_generic_scan_commit_lists, + .recover_commit_reqs = pnfs_generic_recover_commit_reqs, + .search_commit_reqs = pnfs_generic_search_commit_reqs, + .commit_pagelist = filelayout_commit_pagelist, +}; static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, @@ -1108,14 +1120,6 @@ static struct pnfs_layoutdriver_type filelayout_type = { .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, .get_ds_info = &filelayout_get_ds_info, - .setup_ds_info = filelayout_setup_ds_info, - .release_ds_info = filelayout_release_ds_info, - .mark_request_commit = filelayout_mark_request_commit, - .clear_request_commit = pnfs_generic_clear_request_commit, - .scan_commit_lists = pnfs_generic_scan_commit_lists, - .recover_commit_reqs = pnfs_generic_recover_commit_reqs, - .search_commit_reqs = pnfs_generic_search_commit_reqs, - .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, .alloc_deviceid_node = filelayout_alloc_deviceid_node, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1a4e36d07eab..d37883a2b51f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -32,6 +32,7 @@ static unsigned short io_maxretrans; +static const struct pnfs_commit_ops ff_layout_commit_ops; static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr); static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, @@ -52,6 +53,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) INIT_LIST_HEAD(&ffl->error_list); INIT_LIST_HEAD(&ffl->mirrors); ffl->last_report_time = ktime_get(); + ffl->commit_info.ops = &ff_layout_commit_ops; return &ffl->generic_hdr; } else return NULL; @@ -2440,6 +2442,16 @@ ff_layout_set_layoutdriver(struct nfs_server *server, return 0; } +static const struct pnfs_commit_ops ff_layout_commit_ops = { + .setup_ds_info = ff_layout_setup_ds_info, + .release_ds_info = ff_layout_release_ds_info, + .mark_request_commit = pnfs_layout_mark_request_commit, + .clear_request_commit = pnfs_generic_clear_request_commit, + .scan_commit_lists = pnfs_generic_scan_commit_lists, + .recover_commit_reqs = pnfs_generic_recover_commit_reqs, + .commit_pagelist = ff_layout_commit_pagelist, +}; + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -2455,14 +2467,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .pg_read_ops = &ff_layout_pg_read_ops, .pg_write_ops = &ff_layout_pg_write_ops, .get_ds_info = ff_layout_get_ds_info, - .setup_ds_info = ff_layout_setup_ds_info, - .release_ds_info = ff_layout_release_ds_info, .free_deviceid_node = ff_layout_free_deviceid_node, - .mark_request_commit = pnfs_layout_mark_request_commit, - .clear_request_commit = pnfs_generic_clear_request_commit, - .scan_commit_lists = pnfs_generic_scan_commit_lists, - .recover_commit_reqs = pnfs_generic_recover_commit_reqs, - .commit_pagelist = ff_layout_commit_pagelist, .read_pagelist = ff_layout_read_pagelist, .write_pagelist = ff_layout_write_pagelist, .alloc_deviceid_node = ff_layout_alloc_deviceid_node, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index faed9be6e479..b32025553f26 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -150,26 +150,6 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_write_ops; struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); - void (*setup_ds_info)(struct pnfs_ds_commit_info *, - struct pnfs_layout_segment *); - void (*release_ds_info)(struct pnfs_ds_commit_info *, - struct inode *inode); - void (*mark_request_commit) (struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo, - u32 ds_commit_idx); - void (*clear_request_commit) (struct nfs_page *req, - struct nfs_commit_info *cinfo); - int (*scan_commit_lists) (struct nfs_commit_info *cinfo, - int max); - void (*recover_commit_reqs) (struct list_head *list, - struct nfs_commit_info *cinfo); - struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, - struct page *page); - int (*commit_pagelist)(struct inode *inode, - struct list_head *mds_pages, - int how, - struct nfs_commit_info *cinfo); int (*sync)(struct inode *inode, bool datasync); @@ -192,6 +172,29 @@ struct pnfs_layoutdriver_type { int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); }; +struct pnfs_commit_ops { + void (*setup_ds_info)(struct pnfs_ds_commit_info *, + struct pnfs_layout_segment *); + void (*release_ds_info)(struct pnfs_ds_commit_info *, + struct inode *inode); + int (*commit_pagelist)(struct inode *inode, + struct list_head *mds_pages, + int how, + struct nfs_commit_info *cinfo); + void (*mark_request_commit) (struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); + void (*clear_request_commit) (struct nfs_page *req, + struct nfs_commit_info *cinfo); + int (*scan_commit_lists) (struct nfs_commit_info *cinfo, + int max); + void (*recover_commit_reqs) (struct list_head *list, + struct nfs_commit_info *cinfo); + struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, + struct page *page); +}; + struct pnfs_layout_hdr { refcount_t plh_refcount; atomic_t plh_outstanding; /* number of RPCs out */ @@ -461,9 +464,11 @@ static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) { - if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0) + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + + if (fl_cinfo == NULL || fl_cinfo->ncommitting == 0) return PNFS_NOT_ATTEMPTED; - return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo); + return fl_cinfo->ops->commit_pagelist(inode, mds_pages, how, cinfo); } static inline struct pnfs_ds_commit_info * @@ -476,19 +481,26 @@ pnfs_get_ds_info(struct inode *inode) return ld->get_ds_info(inode); } +static inline void +pnfs_init_ds_commit_info_ops(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode) +{ + struct pnfs_ds_commit_info *inode_cinfo = pnfs_get_ds_info(inode); + if (inode_cinfo != NULL) + fl_cinfo->ops = inode_cinfo->ops; +} + static inline void pnfs_init_ds_commit_info(struct pnfs_ds_commit_info *fl_cinfo) { INIT_LIST_HEAD(&fl_cinfo->commits); + fl_cinfo->ops = NULL; } static inline void pnfs_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (ld != NULL && ld->release_ds_info != NULL) - ld->release_ds_info(fl_cinfo, inode); + if (fl_cinfo->ops != NULL && fl_cinfo->ops->release_ds_info != NULL) + fl_cinfo->ops->release_ds_info(fl_cinfo, inode); } static inline void @@ -501,24 +513,22 @@ static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx) { - struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - if (lseg == NULL || ld->mark_request_commit == NULL) + if (!lseg || !fl_cinfo->ops->mark_request_commit) return false; - ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx); + fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx); return true; } static inline bool pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - if (ld == NULL || ld->clear_request_commit == NULL) + if (!fl_cinfo || !fl_cinfo->ops || !fl_cinfo->ops->clear_request_commit) return false; - ld->clear_request_commit(req, cinfo); + fl_cinfo->ops->clear_request_commit(req, cinfo); return true; } @@ -526,21 +536,31 @@ static inline int pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, int max) { - if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + + if (!fl_cinfo || fl_cinfo->nwritten == 0) return 0; - else - return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); + return fl_cinfo->ops->scan_commit_lists(cinfo, max); +} + +static inline void +pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) +{ + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + + if (fl_cinfo && fl_cinfo->nwritten != 0) + fl_cinfo->ops->recover_commit_reqs(head, cinfo); } static inline struct nfs_page * pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, struct page *page) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - if (ld == NULL || ld->search_commit_reqs == NULL) + if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs) return NULL; - return ld->search_commit_reqs(cinfo, page); + return fl_cinfo->ops->search_commit_reqs(cinfo, page); } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -788,6 +808,11 @@ pnfs_get_ds_info(struct inode *inode) return NULL; } +static inline void +pnfs_init_ds_commit_info_ops(struct pnfs_ds_commit_info *fl_cinfo, struct inode *inode) +{ +} + static inline void pnfs_init_ds_commit_info(struct pnfs_ds_commit_info *fl_cinfo) { @@ -818,6 +843,11 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return 0; } +static inline void +pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) +{ +} + static inline struct nfs_page * pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, struct page *page) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 20f12f3cbe38..06df2e6663dc 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -149,17 +149,6 @@ pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo, } EXPORT_SYMBOL_GPL(pnfs_add_commit_array); -static void -pnfs_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo, - struct pnfs_layout_segment *lseg) -{ - struct inode *inode = lseg->pls_layout->plh_inode; - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (ld->setup_ds_info != NULL) - ld->setup_ds_info(fl_cinfo, lseg); -} - static struct pnfs_commit_array * pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo, struct pnfs_layout_segment *lseg) @@ -170,7 +159,7 @@ pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo, array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg); if (!array) { rcu_read_unlock(); - pnfs_setup_ds_info(fl_cinfo, lseg); + fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg); rcu_read_lock(); array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2903597ec88c..adbbeae9ce5b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1284,6 +1284,7 @@ struct pnfs_ds_commit_info { struct list_head commits; unsigned int nwritten; unsigned int ncommitting; + const struct pnfs_commit_ops *ops; }; struct nfs41_state_protection { -- cgit v1.2.3-70-g09d2 From c84bea59449aaa699a0600a50f59d441cc1d4501 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 22 Mar 2020 14:47:38 -0400 Subject: NFS/pNFS: Simplify bucket layout segment reference counting Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 39 ++++++++++++++++++++------------------- include/linux/nfs_xdr.h | 3 +-- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 06df2e6663dc..abf16fc98346 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -59,6 +59,17 @@ void pnfs_generic_commit_release(void *calldata) } EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); +static struct pnfs_layout_segment * +pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket) +{ + if (list_empty(&bucket->committing) && list_empty(&bucket->written)) { + struct pnfs_layout_segment *freeme = bucket->lseg; + bucket->lseg = NULL; + return freeme; + } + return NULL; +} + /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. * Note this must be called holding nfsi->commit_mutex @@ -78,8 +89,7 @@ pnfs_generic_clear_request_commit(struct nfs_page *req, bucket = list_first_entry(&req->wb_list, struct pnfs_commit_bucket, written); - freeme = bucket->wlseg; - bucket->wlseg = NULL; + freeme = pnfs_free_bucket_lseg(bucket); } out: nfs_request_remove_commit_list(req, cinfo); @@ -103,8 +113,7 @@ pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags) for (b = &p->buckets[0]; n != 0; b++, n--) { INIT_LIST_HEAD(&b->written); INIT_LIST_HEAD(&b->committing); - b->wlseg = NULL; - b->clseg = NULL; + b->lseg = NULL; b->direct_verf.committed = NFS_INVALID_STABLE_HOW; } return p; @@ -246,12 +255,6 @@ pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, if (ret) { cinfo->ds->nwritten -= ret; cinfo->ds->ncommitting += ret; - if (bucket->clseg == NULL) - bucket->clseg = pnfs_get_lseg(bucket->wlseg); - if (list_empty(src)) { - pnfs_put_lseg(bucket->wlseg); - bucket->wlseg = NULL; - } } return ret; } @@ -317,9 +320,8 @@ restart: if (!nwritten) continue; ret += nwritten; - if (list_empty(&b->written)) { - freeme = b->wlseg; - b->wlseg = NULL; + freeme = pnfs_free_bucket_lseg(b); + if (freeme) { pnfs_put_lseg(freeme); goto restart; } @@ -405,15 +407,12 @@ pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { - struct pnfs_layout_segment *freeme; struct list_head *pos; list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, head); - freeme = bucket->clseg; - bucket->clseg = NULL; - return freeme; + return pnfs_free_bucket_lseg(bucket); } static struct nfs_commit_data * @@ -425,6 +424,8 @@ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, if (!data) return NULL; data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); + if (!data->lseg) + data->lseg = pnfs_get_lseg(bucket->lseg); return data; } @@ -1182,8 +1183,8 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, * off due to a rewrite, in which case it will be done in * pnfs_common_clear_request_commit */ - WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL); - buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg); + if (!buckets[ds_commit_idx].lseg) + buckets[ds_commit_idx].lseg = pnfs_get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index adbbeae9ce5b..7bbb1f6fc1b1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1265,8 +1265,7 @@ struct nfstime4 { struct pnfs_commit_bucket { struct list_head written; struct list_head committing; - struct pnfs_layout_segment *wlseg; - struct pnfs_layout_segment *clseg; + struct pnfs_layout_segment *lseg; struct nfs_writeverf direct_verf; }; -- cgit v1.2.3-70-g09d2 From 08ca8b21f760c0ed5034a5c122092eec22ccf8f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Apr 2020 13:04:49 -0400 Subject: NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests() When a subrequest is being detached from the subgroup, we want to ensure that it is not holding the group lock, or in the process of waiting for the group lock. Fixes: 5b2b5187fa85 ("NFS: Fix nfs_page_group_destroy() and nfs_lock_and_join_requests() race cases") Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 67 ++++++++++++++++++++++++++++++++---------------- fs/nfs/write.c | 10 ++++++-- include/linux/nfs_page.h | 2 ++ 3 files changed, 55 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index be5e209399ea..0e3f0f241d83 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -133,47 +133,70 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* - * nfs_page_group_lock - lock the head of the page group - * @req - request in group that is to be locked + * nfs_page_set_headlock - set the request PG_HEADLOCK + * @req: request that is to be locked * - * this lock must be held when traversing or modifying the page - * group list + * this lock must be held when modifying req->wb_head * * return 0 on success, < 0 on error */ int -nfs_page_group_lock(struct nfs_page *req) +nfs_page_set_headlock(struct nfs_page *req) { - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - - if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) + if (!test_and_set_bit(PG_HEADLOCK, &req->wb_flags)) return 0; - set_bit(PG_CONTENDED1, &head->wb_flags); + set_bit(PG_CONTENDED1, &req->wb_flags); smp_mb__after_atomic(); - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + return wait_on_bit_lock(&req->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); } /* - * nfs_page_group_unlock - unlock the head of the page group - * @req - request in group that is to be unlocked + * nfs_page_clear_headlock - clear the request PG_HEADLOCK + * @req: request that is to be locked */ void -nfs_page_group_unlock(struct nfs_page *req) +nfs_page_clear_headlock(struct nfs_page *req) { - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - smp_mb__before_atomic(); - clear_bit(PG_HEADLOCK, &head->wb_flags); + clear_bit(PG_HEADLOCK, &req->wb_flags); smp_mb__after_atomic(); - if (!test_bit(PG_CONTENDED1, &head->wb_flags)) + if (!test_bit(PG_CONTENDED1, &req->wb_flags)) return; - wake_up_bit(&head->wb_flags, PG_HEADLOCK); + wake_up_bit(&req->wb_flags, PG_HEADLOCK); +} + +/* + * nfs_page_group_lock - lock the head of the page group + * @req: request in group that is to be locked + * + * this lock must be held when traversing or modifying the page + * group list + * + * return 0 on success, < 0 on error + */ +int +nfs_page_group_lock(struct nfs_page *req) +{ + int ret; + + ret = nfs_page_set_headlock(req); + if (ret || req->wb_head == req) + return ret; + return nfs_page_set_headlock(req->wb_head); +} + +/* + * nfs_page_group_unlock - unlock the head of the page group + * @req: request in group that is to be unlocked + */ +void +nfs_page_group_unlock(struct nfs_page *req) +{ + if (req != req->wb_head) + nfs_page_clear_headlock(req->wb_head); + nfs_page_clear_headlock(req); } /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 626e99cbb50e..a6d7926b0653 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -428,22 +428,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, destroy_list = (subreq->wb_this_page == old_head) ? NULL : subreq->wb_this_page; + /* Note: lock subreq in order to change subreq->wb_head */ + nfs_page_set_headlock(subreq); WARN_ON_ONCE(old_head != subreq->wb_head); /* make sure old group is not used */ subreq->wb_this_page = subreq; + subreq->wb_head = subreq; clear_bit(PG_REMOVE, &subreq->wb_flags); /* Note: races with nfs_page_group_destroy() */ if (!kref_read(&subreq->wb_kref)) { /* Check if we raced with nfs_page_group_destroy() */ - if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) + if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) { + nfs_page_clear_headlock(subreq); nfs_free_request(subreq); + } else + nfs_page_clear_headlock(subreq); continue; } + nfs_page_clear_headlock(subreq); - subreq->wb_head = subreq; nfs_release_request(old_head); if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 0bbd587fac6a..7e9419d74b86 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -142,6 +142,8 @@ extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern int nfs_page_set_headlock(struct nfs_page *req); +extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); /* -- cgit v1.2.3-70-g09d2 From a62f8e3bd836bf1abde1648a45e14afd050dbd23 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 11:12:16 -0400 Subject: NFS: Clean up nfs_lock_and_join_requests() Clean up nfs_lock_and_join_requests() to simplify the calculation of the range covered by the page group, taking into account the presence of mirrors. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 74 +++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 91 +++++++++++------------------------------------- include/linux/nfs_page.h | 1 + 3 files changed, 95 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f535a92403bf..261236157e33 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -130,6 +130,80 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); +/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + +/* + * nfs_page_group_lock_subrequests - try to lock the subrequests + * @head: head request of page group + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +int nfs_page_group_lock_subrequests(struct nfs_page *head) +{ + struct nfs_page *subreq; + int ret; + + /* lock each request in the page group */ + for (subreq = head->wb_this_page; subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + return ret; + } + return 0; +} + /* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a6d7926b0653..832cf57ea442 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -379,34 +379,6 @@ static void nfs_end_page_writeback(struct nfs_page *req) clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } -/* - * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req - * - * this is a helper function for nfs_lock_and_join_requests - * - * @inode - inode associated with request page group, must be holding inode lock - * @head - head request of page group, must be holding head lock - * @req - request that couldn't lock and needs to wait on the req bit lock - * - * NOTE: this must be called holding page_group bit lock - * which will be released before returning. - * - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct inode *inode, struct nfs_page *head, - struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - /* * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests * @@ -487,7 +459,7 @@ nfs_lock_and_join_requests(struct page *page) struct inode *inode = page_file_mapping(page)->host; struct nfs_page *head, *subreq; struct nfs_page *destroy_list = NULL; - unsigned int total_bytes; + unsigned int pgbase, off, bytes; int ret; try_again: @@ -520,49 +492,30 @@ try_again: goto release_request; /* lock each request in the page group */ - total_bytes = head->wb_bytes; + ret = nfs_page_group_lock_subrequests(head); + if (ret < 0) + goto release_request; + + pgbase = head->wb_pgbase; + bytes = head->wb_bytes; + off = head->wb_offset; for (subreq = head->wb_this_page; subreq != head; subreq = subreq->wb_this_page) { - - if (!kref_get_unless_zero(&subreq->wb_kref)) { - if (subreq->wb_offset == head->wb_offset + total_bytes) - total_bytes += subreq->wb_bytes; - continue; - } - - while (!nfs_lock_request(subreq)) { - /* - * Unlock page to allow nfs_page_group_sync_on_bit() - * to succeed - */ - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(inode, head, subreq); - nfs_release_request(subreq); - goto release_request; - } - } - /* - * Subrequests are always contiguous, non overlapping - * and in order - but may be repeated (mirrored writes). - */ - if (subreq->wb_offset == (head->wb_offset + total_bytes)) { - /* keep track of how many bytes this group covers */ - total_bytes += subreq->wb_bytes; - } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || - ((subreq->wb_offset + subreq->wb_bytes) > - (head->wb_offset + total_bytes)))) { - nfs_page_group_unlock(head); - nfs_unroll_locks(inode, head, subreq); - nfs_unlock_and_release_request(subreq); - ret = -EIO; - goto release_request; + /* Subrequests should always form a contiguous range */ + if (pgbase > subreq->wb_pgbase) { + off -= pgbase - subreq->wb_pgbase; + bytes += pgbase - subreq->wb_pgbase; + pgbase = subreq->wb_pgbase; } + bytes = max(subreq->wb_pgbase + subreq->wb_bytes + - pgbase, bytes); } + /* Set the head request's range to cover the former page group */ + head->wb_pgbase = pgbase; + head->wb_bytes = bytes; + head->wb_offset = off; + /* Now that all requests are locked, make sure they aren't on any list. * Commit list removal accounting is done after locks are dropped */ subreq = head; @@ -576,10 +529,6 @@ try_again: /* destroy list will be terminated by head */ destroy_list = head->wb_this_page; head->wb_this_page = head; - - /* change head request to cover whole range that - * the former page group covered */ - head->wb_bytes = total_bytes; } /* Postpone destruction of this request */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7e9419d74b86..dd205bc6bc58 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,6 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); +extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3-70-g09d2 From e00ed89d7bd59c4ae49d6aeeee567187b1357a4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 12:40:47 -0400 Subject: NFS: Refactor nfs_lock_and_join_requests() Refactor nfs_lock_and_join_requests() in order to separate out the subrequest merging into its own function nfs_lock_and_join_group() that can be used by O_DIRECT. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 26 +++++++- fs/nfs/write.c | 164 ++++++++++++++++++++++++++++------------------- include/linux/nfs_page.h | 1 + 3 files changed, 123 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b9805d1dac75..f61f96603df7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -130,6 +130,25 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); +/* + * nfs_page_lock_head_request - page lock the head of the page group + * @req: any member of the page group + */ +struct nfs_page * +nfs_page_group_lock_head(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + while (!nfs_lock_request(head)) { + int ret = nfs_wait_on_request(head); + if (ret < 0) + return ERR_PTR(ret); + } + if (head != req) + kref_get(&head->wb_kref); + return head; +} + /* * nfs_unroll_locks - unlock all newly locked reqs and wait on @req * @head: head request of page group, must be holding head lock @@ -186,14 +205,16 @@ nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) * @head: head request of page group * * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. + * must be called with the head request locked. */ int nfs_page_group_lock_subrequests(struct nfs_page *head) { struct nfs_page *subreq; int ret; + ret = nfs_page_group_lock(head); + if (ret < 0) + return ret; /* lock each request in the page group */ for (subreq = head->wb_this_page; subreq != head; subreq = subreq->wb_this_page) { @@ -201,6 +222,7 @@ int nfs_page_group_lock_subrequests(struct nfs_page *head) if (ret < 0) return ret; } + nfs_page_group_unlock(head); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 832cf57ea442..63b64333c3ea 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -149,6 +149,31 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc) kref_put(&ioc->refcount, nfs_io_completion_release); } +static void +nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) +{ + if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) { + kref_get(&req->wb_kref); + atomic_long_inc(&NFS_I(inode)->nrequests); + } +} + +static int +nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +{ + int ret; + + if (!test_bit(PG_REMOVE, &req->wb_flags)) + return 0; + ret = nfs_page_group_lock(req); + if (ret) + return ret; + if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) + nfs_page_set_inode_ref(req, inode); + nfs_page_group_unlock(req); + return 0; +} + static struct nfs_page * nfs_page_private_request(struct page *page) { @@ -218,6 +243,36 @@ static struct nfs_page *nfs_page_find_head_request(struct page *page) return req; } +static struct nfs_page *nfs_find_and_lock_page_request(struct page *page) +{ + struct inode *inode = page_file_mapping(page)->host; + struct nfs_page *req, *head; + int ret; + + for (;;) { + req = nfs_page_find_head_request(page); + if (!req) + return req; + head = nfs_page_group_lock_head(req); + if (head != req) + nfs_release_request(req); + if (IS_ERR(head)) + return head; + ret = nfs_cancel_remove_inode(head, inode); + if (ret < 0) { + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); + } + /* Ensure that nobody removed the request before we locked it */ + if (head == nfs_page_private_request(page)) + break; + if (PageSwapCache(page)) + break; + nfs_unlock_and_release_request(head); + } + return head; +} + /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { @@ -436,65 +491,22 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, } /* - * nfs_lock_and_join_requests - join all subreqs to the head req and return - * a locked reference, cancelling any pending - * operations for this page. - * - * @page - the page used to lookup the "page group" of nfs_page structures + * nfs_join_page_group - destroy subrequests of the head req + * @head: the page used to lookup the "page group" of nfs_page structures + * @inode: Inode to which the request belongs. * * This function joins all sub requests to the head request by first * locking all requests in the group, cancelling any pending operations * and finally updating the head request to cover the whole range covered by * the (former) group. All subrequests are removed from any write or commit * lists, unlinked from the group and destroyed. - * - * Returns a locked, referenced pointer to the head request - which after - * this call is guaranteed to be the only request associated with the page. - * Returns NULL if no requests are found for @page, or a ERR_PTR if an - * error was encountered. */ -static struct nfs_page * -nfs_lock_and_join_requests(struct page *page) +static void +nfs_join_page_group(struct nfs_page *head, struct inode *inode) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *head, *subreq; + struct nfs_page *subreq; struct nfs_page *destroy_list = NULL; unsigned int pgbase, off, bytes; - int ret; - -try_again: - /* - * A reference is taken only on the head request which acts as a - * reference to the whole page group - the group will not be destroyed - * until the head reference is released. - */ - head = nfs_page_find_head_request(page); - if (!head) - return NULL; - - /* lock the page head first in order to avoid an ABBA inefficiency */ - if (!nfs_lock_request(head)) { - ret = nfs_wait_on_request(head); - nfs_release_request(head); - if (ret < 0) - return ERR_PTR(ret); - goto try_again; - } - - /* Ensure that nobody removed the request before we locked it */ - if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { - nfs_unlock_and_release_request(head); - goto try_again; - } - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto release_request; - - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); - if (ret < 0) - goto release_request; pgbase = head->wb_pgbase; bytes = head->wb_bytes; @@ -531,30 +543,50 @@ try_again: head->wb_this_page = head; } - /* Postpone destruction of this request */ - if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { - set_bit(PG_INODE_REF, &head->wb_flags); - kref_get(&head->wb_kref); - atomic_long_inc(&NFS_I(inode)->nrequests); - } + nfs_destroy_unlinked_subrequests(destroy_list, head, inode); +} - nfs_page_group_unlock(head); +/* + * nfs_lock_and_join_requests - join all subreqs to the head req + * @page: the page used to lookup the "page group" of nfs_page structures + * + * This function joins all sub requests to the head request by first + * locking all requests in the group, cancelling any pending operations + * and finally updating the head request to cover the whole range covered by + * the (former) group. All subrequests are removed from any write or commit + * lists, unlinked from the group and destroyed. + * + * Returns a locked, referenced pointer to the head request - which after + * this call is guaranteed to be the only request associated with the page. + * Returns NULL if no requests are found for @page, or a ERR_PTR if an + * error was encountered. + */ +static struct nfs_page * +nfs_lock_and_join_requests(struct page *page) +{ + struct inode *inode = page_file_mapping(page)->host; + struct nfs_page *head; + int ret; - nfs_destroy_unlinked_subrequests(destroy_list, head, inode); + /* + * A reference is taken only on the head request which acts as a + * reference to the whole page group - the group will not be destroyed + * until the head reference is released. + */ + head = nfs_find_and_lock_page_request(page); + if (IS_ERR_OR_NULL(head)) + return head; - /* Did we lose a race with nfs_inode_remove_request()? */ - if (!(PagePrivate(page) || PageSwapCache(page))) { + /* lock each request in the page group */ + ret = nfs_page_group_lock_subrequests(head); + if (ret < 0) { nfs_unlock_and_release_request(head); - return NULL; + return ERR_PTR(ret); } - /* still holds ref on head from nfs_page_find_head_request - * and still has lock on head from lock loop */ - return head; + nfs_join_page_group(head, inode); -release_request: - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + return head; } static void nfs_write_error(struct nfs_page *req, int error) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index dd205bc6bc58..99198c039bd6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,6 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); +extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); -- cgit v1.2.3-70-g09d2 From ed5d588fe47feef290f271022820e255d8371561 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 20:57:49 -0400 Subject: NFS: Try to join page groups before an O_DIRECT retransmission If we have to retransmit requests, try to join their page groups first. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 20 ++++++++++++++++++++ fs/nfs/write.c | 2 +- include/linux/nfs_page.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8074304fd5b4..a57e7c72c7f4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -505,6 +505,24 @@ out: return result; } +static void +nfs_direct_join_group(struct list_head *list, struct inode *inode) +{ + struct nfs_page *req, *next; + + list_for_each_entry(req, list, wb_list) { + if (req->wb_head != req || req->wb_this_page == req) + continue; + for (next = req->wb_this_page; + next != req->wb_head; + next = next->wb_this_page) { + nfs_list_remove_request(next); + nfs_release_request(next); + } + nfs_join_page_group(req, inode); + } +} + static void nfs_direct_write_scan_commit_list(struct inode *inode, struct list_head *list, @@ -527,6 +545,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); + nfs_direct_join_group(&reqs, dreq->inode); + dreq->count = 0; dreq->max_count = 0; list_for_each_entry(req, &reqs, wb_list) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 63b64333c3ea..df4b87c30ac9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -501,7 +501,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, * the (former) group. All subrequests are removed from any write or commit * lists, unlinked from the group and destroyed. */ -static void +void nfs_join_page_group(struct nfs_page *head, struct inode *inode) { struct nfs_page *subreq; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 99198c039bd6..c32c15216da3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -141,6 +141,7 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); +extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3-70-g09d2 From 93ce4af774bc3d8a72ce2271d03241c96383629d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Apr 2020 13:39:29 -0400 Subject: NFS: Clean up process of marking inode stale. Instead of the various open coded calls to set the NFS_INO_STALE bit and call nfs_zap_caches(), consolidate them into a single function nfs_set_inode_stale(). Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 5 +++-- fs/nfs/inode.c | 18 +++++++++++++----- fs/nfs/nfstrace.h | 1 + fs/nfs/read.c | 2 +- include/linux/nfs_fs.h | 1 + 5 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f14184d0ba82..d729d8311c7e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2669,9 +2669,10 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) status = NFS_PROTO(inode)->access(inode, &cache); if (status != 0) { if (status == -ESTALE) { - nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_set_inode_stale(inode); + else + nfs_zap_caches(inode); } goto out; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a10fb87c6ac3..b9d0921cb4fe 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -62,7 +62,6 @@ /* Default is to see 64-bit inode numbers */ static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; -static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); static struct kmem_cache * nfs_inode_cachep; @@ -284,10 +283,18 @@ EXPORT_SYMBOL_GPL(nfs_invalidate_atime); * Invalidate, but do not unhash, the inode. * NB: must be called with inode->i_lock held! */ -static void nfs_invalidate_inode(struct inode *inode) +static void nfs_set_inode_stale_locked(struct inode *inode) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); nfs_zap_caches_locked(inode); + trace_nfs_set_inode_stale(inode); +} + +void nfs_set_inode_stale(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_set_inode_stale_locked(inode); + spin_unlock(&inode->i_lock); } struct nfs_find_desc { @@ -1163,9 +1170,10 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = 0; break; case -ESTALE: - nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_set_inode_stale(inode); + else + nfs_zap_caches(inode); } goto err_out; } @@ -2064,7 +2072,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * lookup validation will know that the inode is bad. * (But we fall through to invalidate the caches.) */ - nfs_invalidate_inode(inode); + nfs_set_inode_stale_locked(inode); return -ESTALE; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a9588d19a5ae..7e7a97ae21ed 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -181,6 +181,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, int error \ ), \ TP_ARGS(inode, error)) +DEFINE_NFS_INODE_EVENT(nfs_set_inode_stale); DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 34bb9add2302..13b22e898116 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -250,7 +250,7 @@ static int nfs_readpage_done(struct rpc_task *task, trace_nfs_readpage_done(task, hdr); if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_set_inode_stale(inode); nfs_mark_for_revalidate(inode); } return 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5d5b91e54f73..73eda45f1cfd 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -354,6 +354,7 @@ static inline unsigned long nfs_save_change_attribute(struct inode *dir) extern int nfs_sync_mapping(struct address_space *mapping); extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); extern void nfs_zap_caches(struct inode *); +extern void nfs_set_inode_stale(struct inode *inode); extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); -- cgit v1.2.3-70-g09d2