From bbd0f32721e28cc7097fa50afa96178896f9e33b Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 23 Sep 2019 15:33:08 -0700 Subject: ocfs2: use jbd2_inode dirty range scoping 6ba0e7dc64a5 ("jbd2: introduce jbd2_inode dirty range scoping") allow us scoping each of the inode dirty ranges associated with a given transaction, and ext4 already does this way. Now let's also use the newly introduced jbd2_inode dirty range scoping to prevent us from waiting forever when trying to complete a journal transaction in ocfs2. Link: http://lkml.kernel.org/r/1562977611-8412-1-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Ross Zwisler Reviewed-by: Changwei Ge Cc: "Theodore Ts'o" Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/alloc.c') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0c335b51043d..f5d2bd15e0ca 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6792,6 +6792,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, struct page *page, int zero, u64 *phys) { int ret, partial = 0; + loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from; + loff_t length = to - from; ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0); if (ret) @@ -6811,7 +6813,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, if (ret < 0) mlog_errno(ret); else if (ocfs2_should_order_data(inode)) { - ret = ocfs2_jbd2_file_inode(handle, inode); + ret = ocfs2_jbd2_inode_add_write(handle, inode, + start_byte, length); if (ret < 0) mlog_errno(ret); } -- cgit v1.2.3-70-g09d2 From d7283b39dbf3de4fe54870f07d7975effa3188da Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Mon, 23 Sep 2019 15:33:40 -0700 Subject: ocfs2: checkpoint appending truncate log transaction before flushing Appending truncate log(TA) and and flushing truncate log(TF) are two separated transactions. They can be both committed but not checkpointed. If crash occurs then, both transaction will be replayed with several already released to global bitmap clusters. Then truncate log will be replayed resulting in cluster double free. To reproduce this issue, just crash the host while punching hole to files. Signed-off-by: Changwei Ge Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/ocfs2/alloc.c') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f5d2bd15e0ca..f9baefc76cf9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5993,6 +5993,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) struct buffer_head *data_alloc_bh = NULL; struct ocfs2_dinode *di; struct ocfs2_truncate_log *tl; + struct ocfs2_journal *journal = osb->journal; BUG_ON(inode_trylock(tl_inode)); @@ -6013,6 +6014,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) goto out; } + /* Appending truncate log(TA) and and flushing truncate log(TF) are + * two separated transactions. They can be both committed but not + * checkpointed. If crash occurs then, both two transaction will be + * replayed with several already released to global bitmap clusters. + * Then truncate log will be replayed resulting in cluster double free. + */ + jbd2_journal_lock_updates(journal->j_journal); + status = jbd2_journal_flush(journal->j_journal); + jbd2_journal_unlock_updates(journal->j_journal); + if (status < 0) { + mlog_errno(status); + goto out; + } + data_alloc_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); -- cgit v1.2.3-70-g09d2