From 7534e854b930a021dedf9e16396ced5e70e1aba3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Oct 2013 08:26:08 -0400 Subject: ext4: fixup kerndoc annotation of mpage_map_and_submit_extent() Document give_up_on_write argument of mpage_map_and_submit_extent(). Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e274e9c1171f..e7e5b3d8f002 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2178,6 +2178,9 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) * * @handle - handle for journal operations * @mpd - extent to map + * @give_up_on_write - we set this to true iff there is a fatal error and there + * is no hope of writing the data. The caller should discard + * dirty pages to avoid infinite loops. * * The function maps extent starting at mpd->lblk of length mpd->len. If it is * delayed, blocks are allocated, if it is unwritten, we may need to convert -- cgit v1.2.3-70-g09d2 From aeac589a74b91c4c07458272767e089810fbd23d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 17 Oct 2013 18:56:16 -0400 Subject: ext4: fix performance regression in ext4_writepages Commit 4e7ea81db5(ext4: restructure writeback path) introduces another performance regression on random write: - one more page may be added to ext4 extent in mpage_prepare_extent_to_map, and will be submitted for I/O so nr_to_write will become -1 before 'done' is set - the worse thing is that dirty pages may still be retrieved from page cache after nr_to_write becomes negative, so lots of small chunks can be submitted to block device when page writeback is catching up with write path, and performance is hurted. On one arm A15 board with sata 3.0 SSD(CPU: 1.5GHz dura core, RAM: 2GB, SATA controller: 3.0Gbps), this patch can improve below test's result from 157MB/sec to 174MB/sec(>10%): dd if=/dev/zero of=./z.img bs=8K count=512K The above test is actually prototype of block write in bonnie++ utility. This patch makes sure no more pages than nr_to_write can be added to extent for mapping, so that nr_to_write won't become negative. Cc: linux-ext4@vger.kernel.org Acked-by: Jan Kara Signed-off-by: Ming Lei Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e7e5b3d8f002..94aac67b55c9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2298,6 +2298,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct address_space *mapping = mpd->inode->i_mapping; struct pagevec pvec; unsigned int nr_pages; + long left = mpd->wbc->nr_to_write; pgoff_t index = mpd->first_page; pgoff_t end = mpd->last_page; int tag; @@ -2333,6 +2334,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (page->index > end) goto out; + /* + * Accumulated enough dirty pages? This doesn't apply + * to WB_SYNC_ALL mode. For integrity sync we have to + * keep going because someone may be concurrently + * dirtying pages, and we might have synced a lot of + * newly appeared dirty pages, but have not synced all + * of the old dirty pages. + */ + if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) + goto out; + /* If we can't merge this page, we are done. */ if (mpd->map.m_len > 0 && mpd->next_page != page->index) goto out; @@ -2367,19 +2379,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (err <= 0) goto out; err = 0; - - /* - * Accumulated enough dirty pages? This doesn't apply - * to WB_SYNC_ALL mode. For integrity sync we have to - * keep going because someone may be concurrently - * dirtying pages, and we might have synced a lot of - * newly appeared dirty pages, but have not synced all - * of the old dirty pages. - */ - if (mpd->wbc->sync_mode == WB_SYNC_NONE && - mpd->next_page - mpd->first_page >= - mpd->wbc->nr_to_write) - goto out; + left--; } pagevec_release(&pvec); cond_resched(); -- cgit v1.2.3-70-g09d2 From bbf023c74dcf380769aec00aedd706a266b7d9ef Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 30 Oct 2013 07:27:16 -0400 Subject: ext4: pair trace_ext4_writepages & trace_ext4_writepages_result Pair the two trace events to make troubeshooting writepages easier, and it should be more convinient to write a simple script to parse the traces. Cc: linux-ext4@vger.kernel.org Cc: Jan Kara Signed-off-by: Ming Lei Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 94aac67b55c9..43015fa69c3a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2423,16 +2423,15 @@ static int ext4_writepages(struct address_space *mapping, * because that could violate lock ordering on umount */ if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - return 0; + goto out_writepages; if (ext4_should_journal_data(inode)) { struct blk_plug plug; - int ret; blk_start_plug(&plug); ret = write_cache_pages(mapping, wbc, __writepage, mapping); blk_finish_plug(&plug); - return ret; + goto out_writepages; } /* @@ -2445,8 +2444,10 @@ static int ext4_writepages(struct address_space *mapping, * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) - return -EROFS; + if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { + ret = -EROFS; + goto out_writepages; + } if (ext4_should_dioread_nolock(inode)) { /* -- cgit v1.2.3-70-g09d2 From 9206c561554c948111d3cf6fc563a0beaaf790b3 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 11 Nov 2013 22:38:12 -0500 Subject: ext4: return non-zero st_blocks for inline data Return a non-zero st_blocks to userspace for statfs() and friends. Some versions of tar will assume that files with st_blocks == 0 do not contain any data and will skip reading them entirely. Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 43015fa69c3a..075763474118 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4693,6 +4693,15 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, inode = dentry->d_inode; generic_fillattr(inode, stat); + /* + * If there is inline data in the inode, the inode will normally not + * have data blocks allocated (it may have an external xattr block). + * Report at least one sector for such files, so tools like tar, rsync, + * others doen't incorrectly think the file is completely sparse. + */ + if (unlikely(ext4_has_inline_data(inode))) + stat->blocks += (stat->size + 511) >> 9; + /* * We can't update i_blocks if the block allocation is delayed * otherwise in the case of system crash before the real block @@ -4704,9 +4713,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * blocks for this file. */ delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), - EXT4_I(inode)->i_reserved_data_blocks); - - stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); + EXT4_I(inode)->i_reserved_data_blocks); + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); return 0; } -- cgit v1.2.3-70-g09d2