From 9776de96e51565286da25c74d6f631abc50c63ef Mon Sep 17 00:00:00 2001 From: Milosz Tanski Date: Wed, 13 Aug 2014 12:58:16 -0400 Subject: FS-Cache: Timeout for releasepage() This is meant to avoid a recusive hang caused by underlying filesystem trying to grab a free page and causing a write-out. INFO: task kworker/u30:7:28375 blocked for more than 120 seconds. Not tainted 3.15.0-virtual #74 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u30:7 D 0000000000000000 0 28375 2 0x00000000 Workqueue: fscache_operation fscache_op_work_func [fscache] ffff88000b147148 0000000000000046 0000000000000000 ffff88000b1471c8 ffff8807aa031820 0000000000014040 ffff88000b147fd8 0000000000014040 ffff880f0c50c860 ffff8807aa031820 ffff88000b147158 ffff88007be59cd0 Call Trace: [] schedule+0x29/0x70 [] __fscache_wait_on_page_write+0x55/0x90 [fscache] [] ? __wake_up_sync+0x20/0x20 [] __fscache_maybe_release_page+0x65/0x1e0 [fscache] [] ceph_releasepage+0x83/0x100 [ceph] [] ? anon_vma_fork+0x130/0x130 [] try_to_release_page+0x32/0x50 [] shrink_page_list+0x7e6/0x9d0 [] ? isolate_lru_pages.isra.73+0x78/0x1e0 [] shrink_inactive_list+0x252/0x4c0 [] shrink_lruvec+0x3e1/0x670 [] shrink_zone+0x3f/0x110 [] do_try_to_free_pages+0x1d6/0x450 [] ? zone_statistics+0x99/0xc0 [] try_to_free_pages+0xc4/0x180 [] __alloc_pages_nodemask+0x6b2/0xa60 [] ? __find_get_block+0xbe/0x250 [] ? wake_up_bit+0x2e/0x40 [] alloc_pages_current+0xb3/0x180 [] __page_cache_alloc+0xb7/0xd0 [] grab_cache_page_write_begin+0x7c/0xe0 [] ? ext4_mark_inode_dirty+0x82/0x220 [] ext4_da_write_begin+0x89/0x2d0 [] generic_perform_write+0xbe/0x1d0 [] ? update_time+0x81/0xc0 [] ? mnt_clone_write+0x12/0x30 [] __generic_file_aio_write+0x1ce/0x3f0 [] generic_file_aio_write+0x5e/0xe0 [] ext4_file_write+0x9f/0x410 [] ? ext4_file_open+0x66/0x180 [] do_sync_write+0x5a/0x90 [] cachefiles_write_page+0x149/0x430 [cachefiles] [] ? radix_tree_gang_lookup_tag+0x89/0xd0 [] fscache_write_op+0x222/0x3b0 [fscache] [] fscache_op_work_func+0x3a/0x100 [fscache] [] process_one_work+0x179/0x4a0 [] worker_thread+0x11b/0x370 [] ? manage_workers.isra.21+0x2e0/0x2e0 [] kthread+0xc9/0xe0 [] ? ftrace_raw_event_xen_mmu_release_ptpage+0x70/0x90 [] ? flush_kthread_worker+0xb0/0xb0 [] ret_from_fork+0x7c/0xb0 [] ? flush_kthread_worker+0xb0/0xb0 Signed-off-by: Milosz Tanski Signed-off-by: David Howells --- fs/fscache/page.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 85332b9d19d1..781ac7b0b53e 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -43,6 +43,19 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa } EXPORT_SYMBOL(__fscache_wait_on_page_write); +/* + * wait for a page to finish being written to the cache. Put a timeout here + * since we might be called recursively via parent fs. + */ +static +bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page) +{ + wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); + + return wait_event_timeout(*wq, !__fscache_check_page_write(cookie, page), + HZ); +} + /* * decide whether a page can be released, possibly by cancelling a store to it * - we're allowed to sleep if __GFP_WAIT is flagged @@ -115,7 +128,10 @@ page_busy: } fscache_stat(&fscache_n_store_vmscan_wait); - __fscache_wait_on_page_write(cookie, page); + if (!release_page_wait_timeout(cookie, page)) + _debug("fscache writeout timeout page: %p{%lx}", + page, page->index); + gfp &= ~__GFP_WAIT; goto try_again; } -- cgit v1.2.3-70-g09d2 From 920bce20d74817bdd8bfcbc28ecb1179c9e01081 Mon Sep 17 00:00:00 2001 From: Milosz Tanski Date: Wed, 13 Aug 2014 12:58:21 -0400 Subject: FS-Cache: Reduce cookie ref count if submit fails. I've been seeing issues with disposing cookies under vma pressure. The symptom is that the refcount gets out of sync. In this case we fail to decrement the refcount if submit fails. I found this while auditing the error in and around cookie operations. Signed-off-by: Milosz Tanski Signed-off-by: David Howells --- fs/fscache/object.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index d3b4539f1651..da032daf0e0d 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -982,6 +982,7 @@ nomem: submit_op_failed: clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); spin_unlock(&cookie->lock); + fscache_unuse_cookie(object); kfree(op); _leave(" [EIO]"); return transit_to(KILL_OBJECT); -- cgit v1.2.3-70-g09d2 From 3e1199dcad004a40b55297a4736ccd1b9b81a952 Mon Sep 17 00:00:00 2001 From: Milosz Tanski Date: Wed, 13 Aug 2014 12:58:26 -0400 Subject: FS-Cache: refcount becomes corrupt under vma pressure. In rare cases under heavy VMA pressure the ref count for a fscache cookie becomes corrupt. In this case we decrement ref count even if we fail before incrementing the refcount. FS-Cache: Assertion failed bnode-eca5f9c6/syslog 0 > 0 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/cookie.c:519! invalid opcode: 0000 [#1] SMP Call Trace: [] __fscache_relinquish_cookie+0x50/0x220 [fscache] [] ceph_fscache_unregister_inode_cookie+0x3e/0x50 [ceph] [] ceph_destroy_inode+0x33/0x200 [ceph] [] ? __fsnotify_inode_delete+0xe/0x10 [] destroy_inode+0x3c/0x70 [] evict+0x111/0x180 [] iput+0x103/0x190 [] __dentry_kill+0x1c8/0x220 [] shrink_dentry_list+0xf1/0x250 [] prune_dcache_sb+0x4c/0x60 [] super_cache_scan+0xff/0x170 [] shrink_slab_node+0x140/0x2c0 [] shrink_slab+0x8a/0x130 [] balance_pgdat+0x3e2/0x5d0 [] kswapd+0x16a/0x4a0 [] ? __wake_up_sync+0x20/0x20 [] ? balance_pgdat+0x5d0/0x5d0 [] kthread+0xc9/0xe0 [] ? ftrace_raw_event_xen_mmu_release_ptpage+0x70/0x90 [] ? flush_kthread_worker+0xb0/0xb0 [] ret_from_fork+0x7c/0xb0 [] ? flush_kthread_worker+0xb0/0xb0 RIP [] __fscache_disable_cookie+0x1db/0x210 [fscache] RSP ---[ end trace 254d0d7c74a01f25 ]--- Signed-off-by: Milosz Tanski Signed-off-by: David Howells --- fs/fscache/page.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 781ac7b0b53e..de33b3fccca6 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -198,7 +198,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) { struct fscache_operation *op; struct fscache_object *object; - bool wake_cookie; + bool wake_cookie = false; _enter("%p", cookie); @@ -228,15 +228,16 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) __fscache_use_cookie(cookie); if (fscache_submit_exclusive_op(object, op) < 0) - goto nobufs; + goto nobufs_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_attr_changed_ok); fscache_put_operation(op); _leave(" = 0"); return 0; -nobufs: +nobufs_dec: wake_cookie = __fscache_unuse_cookie(cookie); +nobufs: spin_unlock(&cookie->lock); kfree(op); if (wake_cookie) -- cgit v1.2.3-70-g09d2 From 696382f938d22597f4945865ed8e3f25e240cd41 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 27 Aug 2014 15:06:20 +0100 Subject: cachefiles: remove two unused pagevecs. These two have been unused since commit c4d6d8dbf335c7fa47341654a37c53a512b519bb CacheFiles: Fix the marking of cached pages in 3.8. Signed-off-by: NeilBrown Signed-off-by: David Howells --- fs/cachefiles/rdwr.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 4b1fb5ca65b8..25e745b8eb1b 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -151,7 +151,6 @@ static void cachefiles_read_copier(struct fscache_operation *_op) struct cachefiles_one_read *monitor; struct cachefiles_object *object; struct fscache_retrieval *op; - struct pagevec pagevec; int error, max; op = container_of(_op, struct fscache_retrieval, op); @@ -160,8 +159,6 @@ static void cachefiles_read_copier(struct fscache_operation *_op) _enter("{ino=%lu}", object->backer->d_inode->i_ino); - pagevec_init(&pagevec, 0); - max = 8; spin_lock_irq(&object->work_lock); @@ -396,7 +393,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, { struct cachefiles_object *object; struct cachefiles_cache *cache; - struct pagevec pagevec; struct inode *inode; sector_t block0, block; unsigned shift; @@ -427,8 +423,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, op->op.flags |= FSCACHE_OP_ASYNC; op->op.processor = cachefiles_read_copier; - pagevec_init(&pagevec, 0); - /* we assume the absence or presence of the first block is a good * enough indication for the page as a whole * - TODO: don't use bmap() for this as it is _not_ actually good -- cgit v1.2.3-70-g09d2 From e2cf1f1cc7636bd860e47cd0ad6194da8975f8b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Sep 2014 23:28:38 +0100 Subject: CacheFiles: Handle rename2 Not all filesystems now provide the rename i_op - ext4 for one - but rather provide the rename2 i_op. CacheFiles checks that the filesystem has rename and so will reject ext4 now with EPERM: CacheFiles: Failed to register: -1 Fix this by checking for rename2 as an alternative. The call to vfs_rename() actually handles selection of the appropriate function, so we needn't worry about that. Turning on debugging shows: [cachef] ==> cachefiles_get_directory(,,cache) [cachef] subdir -> ffff88000b22b778 positive [cachef] <== cachefiles_get_directory() = -1 [check] where -1 is EPERM. Signed-off-by: David Howells --- fs/cachefiles/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 5bf2b41e66d3..83e9c94ca2cf 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -779,7 +779,8 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, !subdir->d_inode->i_op->lookup || !subdir->d_inode->i_op->mkdir || !subdir->d_inode->i_op->create || - !subdir->d_inode->i_op->rename || + (!subdir->d_inode->i_op->rename && + !subdir->d_inode->i_op->rename2) || !subdir->d_inode->i_op->rmdir || !subdir->d_inode->i_op->unlink) goto check_error; -- cgit v1.2.3-70-g09d2