From deed85e760c8c88cd984c5921dd8cb6b697b6134 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 15:02:01 -0400 Subject: NFS: Remove BUG_ON() calls from the generic writeback code ...and ensure that we set the return value for nfs_page_async_flush() to zero! (Reported-by: Dros Adamson) Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9347ab7c9574..f5bc8e11713b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -239,21 +239,18 @@ int nfs_congestion_kb; #define NFS_CONGESTION_OFF_THRESH \ (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) -static int nfs_set_page_writeback(struct page *page) +static void nfs_set_page_writeback(struct page *page) { + struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host); int ret = test_set_page_writeback(page); - if (!ret) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_server *nfss = NFS_SERVER(inode); + WARN_ON_ONCE(ret != 0); - if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) { - set_bdi_congested(&nfss->backing_dev_info, - BLK_RW_ASYNC); - } + if (atomic_long_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); } - return ret; } static void nfs_end_page_writeback(struct page *page) @@ -315,10 +312,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (IS_ERR(req)) goto out; - ret = nfs_set_page_writeback(page); - BUG_ON(ret != 0); - BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); + nfs_set_page_writeback(page); + WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); ret = pgio->pg_error; @@ -451,8 +448,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&inode->i_lock); if (likely(!PageSwapCache(req->wb_page))) { set_page_private(req->wb_page, 0); @@ -1727,7 +1722,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) struct nfs_page *req; int ret = 0; - BUG_ON(!PageLocked(page)); for (;;) { wait_on_page_writeback(page); req = nfs_page_find_request(page); -- cgit v1.2.3-70-g09d2 From 4c1002100898d03c5c9142ffaf58351c841ab94a Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Mon, 12 Nov 2012 09:27:37 +0800 Subject: nfs: Fix wrong slab cache in nfs_commit_mempool The slab cache in nfs_commit_mempool is wrong, and I think it is just a slip. I tested it on a x86-32 machine, the size of nfs_write_header is 544, and the size of nfs_commit_data is 408, so it works fine. It is also true that sizeof(struct nfs_write_header) > sizeof(struct nfs_commit_data) on other platforms in my opinoin. Just fix it. Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9347ab7c9574..f710e39f6ba2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1829,7 +1829,7 @@ int __init nfs_init_writepagecache(void) goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, - nfs_wdata_cachep); + nfs_cdata_cachep); if (nfs_commit_mempool == NULL) goto out_destroy_commit_cache; -- cgit v1.2.3-70-g09d2 From 81d9bce5309288086b58b4d97a644e495fef75f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Dec 2012 09:25:48 -0500 Subject: nfs: don't extend writes to cover entire page if pagecache is invalid Jian reported that the following sequence would leave "testfile" with corrupt data: # mount localhost:/export /mnt/nfs/ -o vers=3 # echo abc > /mnt/nfs/testfile; echo def >> /export/testfile; echo ghi >> /mnt/nfs/testfile # cat -v /export/testfile abc ^@^@^@^@ghi While there's no locking involved here, the operations are serialized, so CTO should prevent corruption. The first write to the file is fine and writes 4 bytes. The file is then extended on the server. When it's reopened a GETATTR is issued and the size change is noticed. This causes NFS_INO_INVALID_DATA to be set on the file. Because the file is opened for write only, nfs_want_read_modify_write() returns 0 to nfs_write_begin(). nfs_updatepage then calls nfs_write_pageuptodate() to see if it should extend the nfs_page to cover the whole page. NFS_INO_INVALID_DATA is still set on the file at that point, but that flag is ignored and nfs_pageuptodate erroneously extends the write to cover the whole page, with the write done on the server side filled in with zeroes. This patch just has that function check for NFS_INO_INVALID_DATA in addition to NFS_INO_REVAL_PAGECACHE. This fixes the bug, but looking over the code, I wonder if we might have a similar bug in nfs_revalidate_size(). The difference between those two flags is very subtle, so it seems like we ought to be checking for NFS_INO_INVALID_DATA in most of the places that we look for NFS_INO_REVAL_PAGECACHE. I believe this is regression introduced by commit 8d197a568. The code did check for NFS_INO_INVALID_DATA prior to that patch. Original bug report is here: https://bugzilla.redhat.com/show_bug.cgi?id=885743 Cc: # 3.5+ Reported-by: Jian Li Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f710e39f6ba2..eecd8b879afe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -884,7 +884,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) { if (nfs_have_delegated_attributes(inode)) goto out; - if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) return false; out: return PageUptodate(page) != 0; -- cgit v1.2.3-70-g09d2 From ada8e20d044c0fa5610e504ce6fb4578ebd3edd9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 17:12:14 -0500 Subject: NFS: Don't use SetPageError in the NFS writeback code The writeback code is already capable of passing errors back to user space by means of the open_context->error. In the case of ENOSPC, Neil Brown is reporting seeing 2 errors being returned. Neil writes: "e.g. if /mnt2/ if an nfs mounted filesystem that has no space then strace dd if=/dev/zero conv=fsync >> /mnt2/afile count=1 reported Input/output error and the relevant parts of the strace output are: write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 fsync(1) = -1 EIO (Input/output error) close(1) = -1 ENOSPC (No space left on device)" Neil then shows that the duplication of error messages appears to be due to the use of the PageError() mechanism, which causes filemap_fdatawait_range to return the extra EIO. The regression was introduced by commit 7b281ee026552f10862b617a2a51acf49c829554 (NFS: fsync() must exit with an error if page writeback failed). Fix this by removing the call to SetPageError(), and just relying on open_context->error reporting the ENOSPC back to fsync(). Reported-by: Neil Brown Tested-by: Neil Brown Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [3.6+] --- fs/nfs/write.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f608ca606b2b..5209916e1222 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -202,7 +202,6 @@ out: /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct page *page) { - SetPageError(page); nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } -- cgit v1.2.3-70-g09d2