From 691a7c6f28ac90cccd0dbcf81348ea90b211bdd0 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Wed, 30 Apr 2014 14:06:06 +0800 Subject: UBIFS: fix an mmap and fsync race condition There is a race condition in UBIFS: Thread A (mmap) Thread B (fsync) ->__do_fault ->write_cache_pages -> ubifs_vm_page_mkwrite -> budget_space -> lock_page -> release/convert_page_budget -> SetPagePrivate -> TestSetPageDirty -> unlock_page -> lock_page -> TestClearPageDirty -> ubifs_writepage -> do_writepage -> release_budget -> ClearPagePrivate -> unlock_page -> !(ret & VM_FAULT_LOCKED) -> lock_page -> set_page_dirty -> ubifs_set_page_dirty -> TestSetPageDirty (set page dirty without budgeting) -> unlock_page This leads to situation where we have a diry page but no budget allocated for this page, so further write-back may fail with -ENOSPC. In this fix we return from page_mkwrite without performing unlock_page. We return VM_FAULT_LOCKED instead. After doing this, the race above will not happen. Signed-off-by: hujianyang Tested-by: Laurence Withers Cc: stable@vger.kernel.org Signed-off-by: Artem Bityutskiy --- fs/ubifs/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4f34dbae823d..f7d48a08f443 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1525,8 +1525,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, } wait_for_stable_page(page); - unlock_page(page); - return 0; + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); -- cgit v1.2.3-70-g09d2 From 0da846f42ffa0b6597484d1d9ba92755bfc03018 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 29 Apr 2014 09:44:44 +0800 Subject: UBIFS: Remove unused variables in ubifs_budget_space I found two variables in ubifs_budget_space declared but not use. This state remains since the first commit 1e5176. So just remove them. Signed-off-by: hujianyang Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index e8e01d74dc05..eb997e9c4ab0 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -437,7 +437,6 @@ static int calc_dd_growth(const struct ubifs_info *c, */ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) { - int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); int err, idx_growth, data_growth, dd_growth, retried = 0; ubifs_assert(req->new_page <= 1); -- cgit v1.2.3-70-g09d2 From dac3698147655aba4d71a8e67d6dd46d7a86154f Mon Sep 17 00:00:00 2001 From: hujianyang Date: Wed, 21 May 2014 17:19:45 +0800 Subject: UBIFS: Fix dump messages in ubifs_dump_lprops Function ubifs_read_one_lp will not set @lp and returns an error when ubifs_read_one_lp failed. We should not perform ubifs_dump_lprop in this case because @lp is not initialized as we wanted. Signed-off-by: hujianyang Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 5157b866a853..177b0152fef4 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -745,8 +745,10 @@ void ubifs_dump_lprops(struct ubifs_info *c) for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { err = ubifs_read_one_lp(c, lnum, &lp); - if (err) + if (err) { ubifs_err("cannot read lprops for LEB %d", lnum); + continue; + } ubifs_dump_lprop(c, &lp); } -- cgit v1.2.3-70-g09d2 From a0fd59511e0a0514d24044a29da0f6144f8600e5 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 13 May 2014 22:27:58 +0200 Subject: UBIFS: add missing ui pointer in debugging code If UBIFS_DEBUG is defined an additional assertion of the ui_lock spinlock in do_writepage cannot compile because the ui pointer has not been previously declared. Fix this by declaring and initializing the ui pointer in case UBIFS_DEBUG is defined. Signed-off-by: Daniel Golle Signed-off-by: Artem Bityutskiy --- fs/ubifs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ubifs') diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f7d48a08f443..727506b5e2ea 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -903,6 +903,7 @@ static int do_writepage(struct page *page, int len) struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG + struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); spin_unlock(&ui->ui_lock); -- cgit v1.2.3-70-g09d2 From ba6a7d55634b9ddf119216faef55f2463b17d60b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 27 May 2014 15:24:39 +0300 Subject: UBIFS: fix debugging check The debugging check which verifies that we never write outside of the file length was incorrect, since it was multiplying file length by the page size, instead of dividing. Fix this. Spotted-by: hujianyang Signed-off-by: Artem Bityutskiy --- fs/ubifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 727506b5e2ea..0ab7f7dfb98b 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -905,7 +905,7 @@ static int do_writepage(struct page *page, int len) #ifdef UBIFS_DEBUG struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); + ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT); spin_unlock(&ui->ui_lock); #endif -- cgit v1.2.3-70-g09d2 From 72abc8f4b4e8574318189886de627a2bfe6cd0da Mon Sep 17 00:00:00 2001 From: hujianyang Date: Sat, 31 May 2014 11:39:32 +0800 Subject: UBIFS: Remove incorrect assertion in shrink_tnc() I hit the same assert failed as Dolev Raviv reported in Kernel v3.10 shows like this: [ 9641.164028] UBIFS assert failed in shrink_tnc at 131 (pid 13297) [ 9641.234078] CPU: 1 PID: 13297 Comm: mmap.test Tainted: G O 3.10.40 #1 [ 9641.234116] [] (unwind_backtrace+0x0/0x12c) from [] (show_stack+0x20/0x24) [ 9641.234137] [] (show_stack+0x20/0x24) from [] (dump_stack+0x20/0x28) [ 9641.234188] [] (dump_stack+0x20/0x28) from [] (shrink_tnc_trees+0x25c/0x350 [ubifs]) [ 9641.234265] [] (shrink_tnc_trees+0x25c/0x350 [ubifs]) from [] (ubifs_shrinker+0x25c/0x310 [ubifs]) [ 9641.234307] [] (ubifs_shrinker+0x25c/0x310 [ubifs]) from [] (shrink_slab+0x1d4/0x2f8) [ 9641.234327] [] (shrink_slab+0x1d4/0x2f8) from [] (do_try_to_free_pages+0x300/0x544) [ 9641.234344] [] (do_try_to_free_pages+0x300/0x544) from [] (try_to_free_pages+0x2d0/0x398) [ 9641.234363] [] (try_to_free_pages+0x2d0/0x398) from [] (__alloc_pages_nodemask+0x494/0x7e8) [ 9641.234382] [] (__alloc_pages_nodemask+0x494/0x7e8) from [] (new_slab+0x78/0x238) [ 9641.234400] [] (new_slab+0x78/0x238) from [] (__slab_alloc.constprop.42+0x1a4/0x50c) [ 9641.234419] [] (__slab_alloc.constprop.42+0x1a4/0x50c) from [] (kmem_cache_alloc_trace+0x54/0x188) [ 9641.234459] [] (kmem_cache_alloc_trace+0x54/0x188) from [] (do_readpage+0x168/0x468 [ubifs]) [ 9641.234553] [] (do_readpage+0x168/0x468 [ubifs]) from [] (ubifs_readpage+0x424/0x464 [ubifs]) [ 9641.234606] [] (ubifs_readpage+0x424/0x464 [ubifs]) from [] (filemap_fault+0x304/0x418) [ 9641.234638] [] (filemap_fault+0x304/0x418) from [] (__do_fault+0xd4/0x530) [ 9641.234665] [] (__do_fault+0xd4/0x530) from [] (handle_pte_fault+0x480/0xf54) [ 9641.234690] [] (handle_pte_fault+0x480/0xf54) from [] (handle_mm_fault+0x140/0x184) [ 9641.234716] [] (handle_mm_fault+0x140/0x184) from [] (do_page_fault+0x150/0x3ac) [ 9641.234737] [] (do_page_fault+0x150/0x3ac) from [] (do_DataAbort+0x3c/0xa0) [ 9641.234759] [] (do_DataAbort+0x3c/0xa0) from [] (__dabt_usr+0x38/0x40) After analyzing the code, I found a condition that may cause this failed in correct operations. Thus, I think this assertion is wrong and should be removed. Suppose there are two clean znodes and one dirty znode in TNC. So the per-filesystem atomic_t @clean_zn_cnt is (2). If commit start, dirty_znode is set to COW_ZNODE in get_znodes_to_commit() in case of potentially ops on this znode. We clear COW bit and DIRTY bit in write_index() without @tnc_mutex locked. We don't increase @clean_zn_cnt in this place. As the comments in write_index() shows, if another process hold @tnc_mutex and dirty this znode after we clean it, @clean_zn_cnt would be decreased to (1). We will increase @clean_zn_cnt to (2) with @tnc_mutex locked in free_obsolete_znodes() to keep it right. If shrink_tnc() performs between decrease and increase, it will release other 2 clean znodes it holds and found @clean_zn_cnt is less than zero (1 - 2 = -1), then hit the assertion. Because free_obsolete_znodes() will soon correct @clean_zn_cnt and no harm to fs in this case, I think this assertion could be removed. 2 clean zondes and 1 dirty znode, @clean_zn_cnt == 2 Thread A (commit) Thread B (write or others) Thread C (shrinker) ->write_index ->clear_bit(DIRTY_NODE) ->clear_bit(COW_ZNODE) @clean_zn_cnt == 2 ->mutex_locked(&tnc_mutex) ->dirty_cow_znode ->!ubifs_zn_cow(znode) ->!test_and_set_bit(DIRTY_NODE) ->atomic_dec(&clean_zn_cnt) ->mutex_unlocked(&tnc_mutex) @clean_zn_cnt == 1 ->mutex_locked(&tnc_mutex) ->shrink_tnc ->destroy_tnc_subtree ->atomic_sub(&clean_zn_cnt, 2) ->ubifs_assert <- hit ->mutex_unlocked(&tnc_mutex) @clean_zn_cnt == -1 ->mutex_lock(&tnc_mutex) ->free_obsolete_znodes ->atomic_inc(&clean_zn_cnt) ->mutux_unlock(&tnc_mutex) @clean_zn_cnt == 0 (correct after shrink) Signed-off-by: hujianyang Cc: stable@vger.kernel.org Signed-off-by: Artem Bityutskiy --- fs/ubifs/shrinker.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index f35135e28e96..9a9fb94a41c6 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) freed = ubifs_destroy_tnc_subtree(znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); - ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); total_freed += freed; znode = zprev; } -- cgit v1.2.3-70-g09d2 From 90bea5a3f0bf680b87b90516f3c231997f4b8f3b Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 2 Jun 2014 15:51:10 +0200 Subject: UBIFS: respect MS_SILENT mount flag When attempting to mount a non-ubifs formatted volume, lots of error messages (including a stack dump) are thrown to the kernel log even if the MS_SILENT mount flag is set. Fix this by introducing adding an additional state-variable in struct ubifs_info and suppress error messages in ubifs_read_node if MS_SILENT is set. Signed-off-by: Daniel Golle Signed-off-by: Artem Bityutskiy --- fs/ubifs/io.c | 18 ++++++++++-------- fs/ubifs/super.c | 5 +++++ fs/ubifs/ubifs.h | 11 +++++++++++ 3 files changed, 26 insertions(+), 8 deletions(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index e18b9889a51b..2290d5866725 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -988,30 +988,32 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, return err; if (type != ch->node_type) { - ubifs_err("bad node type (%d but expected %d)", - ch->node_type, type); + ubifs_errc(c, "bad node type (%d but expected %d)", + ch->node_type, type); goto out; } err = ubifs_check_node(c, buf, lnum, offs, 0, 0); if (err) { - ubifs_err("expected node type %d", type); + ubifs_errc(c, "expected node type %d", type); return err; } l = le32_to_cpu(ch->len); if (l != len) { - ubifs_err("bad node length %d, expected %d", l, len); + ubifs_errc(c, "bad node length %d, expected %d", l, len); goto out; } return 0; out: - ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, - ubi_is_mapped(c->ubi, lnum)); - ubifs_dump_node(c, buf); - dump_stack(); + ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, + offs, ubi_is_mapped(c->ubi, lnum)); + if (!c->probing) { + ubifs_dump_node(c, buf); + dump_stack(); + } return -EINVAL; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a81c7b556896..3904c8574ef9 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1149,6 +1149,9 @@ static int mount_ubifs(struct ubifs_info *c) size_t sz; c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); + /* Suppress error messages while probing if MS_SILENT is set */ + c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); + err = init_constants_early(c); if (err) return err; @@ -1214,6 +1217,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_free; + c->probing = 0; + /* * Make sure the compressor which is set as default in the superblock * or overridden by mount options is actually compiled in. diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e8c8cfe1435c..c1f71fe17cc0 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -51,6 +51,15 @@ #define ubifs_warn(fmt, ...) \ pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ current->pid, __func__, ##__VA_ARGS__) +/* + * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description + * object as an argument. + */ +#define ubifs_errc(c, fmt, ...) \ + do { \ + if (!(c)->probing) \ + ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) /* UBIFS file system VFS magic number */ #define UBIFS_SUPER_MAGIC 0x24051905 @@ -1209,6 +1218,7 @@ struct ubifs_debug_info; * @need_recovery: %1 if the file-system needs recovery * @replaying: %1 during journal replay * @mounting: %1 while mounting + * @probing: %1 while attempting to mount if MS_SILENT mount flag is set * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay @@ -1441,6 +1451,7 @@ struct ubifs_info { unsigned int replaying:1; unsigned int mounting:1; unsigned int remounting_rw:1; + unsigned int probing:1; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; -- cgit v1.2.3-70-g09d2 From 380347e9ca76828ee9bac63cfc338ca99cdee4f3 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 3 Jun 2014 14:49:11 +0800 Subject: UBIFS: Add an assertion for clean_zn_cnt This patch adds a new ubifs_assert() in ubifs_tnc_close() to check if there are any leaks of per-filesystem @clean_zn_cnt. This new assert inspects whether the return value of ubifs_destroy_tnc_subtree() is equal to @clean_zn_cnt or not while umount. Artem: a minor amendment Signed-off-by: hujianyang Signed-off-by: Artem Bityutskiy --- fs/ubifs/tnc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ubifs') diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 9083bc7ed4ae..8a40cf9c02d7 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2859,10 +2859,11 @@ void ubifs_tnc_close(struct ubifs_info *c) { tnc_destroy_cnext(c); if (c->zroot.znode) { - long n; + long n, freed; - ubifs_destroy_tnc_subtree(c->zroot.znode); n = atomic_long_read(&c->clean_zn_cnt); + freed = ubifs_destroy_tnc_subtree(c->zroot.znode); + ubifs_assert(freed == n); atomic_long_sub(n, &ubifs_clean_zn_cnt); } kfree(c->gap_lebs); -- cgit v1.2.3-70-g09d2