From 78fb9cdf035d88a39a5e5f83bb8788e4fe7c1f72 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: ext4: remove unused code from ext4_remove_blocks() The "head removal" branch in the condition is never used in any code path in ext4 since the function only caller ext4_ext_rm_leaf() will make sure that the extent is properly split before removing blocks. Note that there is a bug in this branch anyway. This commit removes the unused code completely and makes use of ext4_error() instead of printk if dubious range is provided. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..18c3f1a131b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2432,23 +2432,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, *partial_cluster = EXT4_B2C(sbi, pblk); else *partial_cluster = 0; - } else if (from == le32_to_cpu(ex->ee_block) - && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { - /* head removal */ - ext4_lblk_t num; - ext4_fsblk_t start; - - num = to - from; - start = ext4_ext_pblock(ex); - - ext_debug("free first %u blocks starting %llu\n", num, start); - ext4_free_blocks(handle, inode, NULL, start, num, flags); - - } else { - printk(KERN_INFO "strange request: removal(2) " - "%u-%u from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), ee_len); - } + } else + ext4_error(sbi->s_sb, "strange request: removal(2) " + "%u-%u from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), ee_len); return 0; } -- cgit v1.2.3-70-g09d2 From 61801325f790ea15ba0630a7a26bd80a0741813f Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: ext4: update ext4_ext_remove_space trace point Add "end" variable. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 6 +++--- include/trace/events/ext4.h | 21 ++++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 18c3f1a131b3..fb9b41483c86 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2663,7 +2663,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, return PTR_ERR(handle); again: - trace_ext4_ext_remove_space(inode, start, depth); + trace_ext4_ext_remove_space(inode, start, end, depth); /* * Check if we are removing extents inside the extent tree. If that @@ -2831,8 +2831,8 @@ again: } } - trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, - path->p_hdr->eh_entries); + trace_ext4_ext_remove_space_done(inode, start, end, depth, + partial_cluster, path->p_hdr->eh_entries); /* If we still have something in the partial cluster and we have removed * even the first extent, then we should free the blocks in the partial diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index dcfce96c08a8..bcb5a021945c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2027,14 +2027,16 @@ TRACE_EVENT(ext4_ext_rm_idx, ); TRACE_EVENT(ext4_ext_remove_space, - TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth), + TP_PROTO(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end, int depth), - TP_ARGS(inode, start, depth), + TP_ARGS(inode, start, end, depth), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) + __field( ext4_lblk_t, end ) __field( int, depth ) ), @@ -2042,26 +2044,29 @@ TRACE_EVENT(ext4_ext_remove_space, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; + __entry->end = end; __entry->depth = depth; ), - TP_printk("dev %d,%d ino %lu since %u depth %d", + TP_printk("dev %d,%d ino %lu since %u end %u depth %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, + (unsigned) __entry->end, __entry->depth) ); TRACE_EVENT(ext4_ext_remove_space_done, - TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth, - ext4_lblk_t partial, __le16 eh_entries), + TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, + int depth, ext4_lblk_t partial, __le16 eh_entries), - TP_ARGS(inode, start, depth, partial, eh_entries), + TP_ARGS(inode, start, end, depth, partial, eh_entries), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ext4_lblk_t, start ) + __field( ext4_lblk_t, end ) __field( int, depth ) __field( ext4_lblk_t, partial ) __field( unsigned short, eh_entries ) @@ -2071,16 +2076,18 @@ TRACE_EVENT(ext4_ext_remove_space_done, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->start = start; + __entry->end = end; __entry->depth = depth; __entry->partial = partial; __entry->eh_entries = le16_to_cpu(eh_entries); ), - TP_printk("dev %d,%d ino %lu since %u depth %d partial %u " + TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %u " "remaining_entries %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, + (unsigned) __entry->end, __entry->depth, (unsigned) __entry->partial, (unsigned short) __entry->eh_entries) -- cgit v1.2.3-70-g09d2 From d23142c6271c499d913d0d5e668b5a4eb6dafcb0 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:33:35 -0400 Subject: ext4: make punch hole code path work with bigalloc Currently punch hole is disabled in file systems with bigalloc feature enabled. However the recent changes in punch hole patch should make it easier to support punching holes on bigalloc enabled file systems. This commit changes partial_cluster handling in ext4_remove_blocks(), ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently partial_cluster is unsigned long long type and it makes sure that we will free the partial cluster if all extents has been released from that cluster. However it has been specifically designed only for truncate. With punch hole we can be freeing just some extents in the cluster leaving the rest untouched. So we have to make sure that we will notice cluster which still has some extents. To do this I've changed partial_cluster to be signed long long type. The only scenario where this could be a problem is when cluster_size == block size, however in that case there would not be any partial clusters so we're safe. For bigger clusters the signed type is enough. Now we use the negative value in partial_cluster to mark such cluster used, hence we know that we must not free it even if all other extents has been freed from such cluster. This scenario can be described in simple diagram: |FFF...FF..FF.UUU| ^----------^ punch hole . - free space | - cluster boundary F - freed extent U - used extent Also update respective tracepoints to use signed long long type for partial_cluster. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 69 +++++++++++++++++++++++++++++++++------------ include/trace/events/ext4.h | 25 ++++++++-------- 2 files changed, 64 insertions(+), 30 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fb9b41483c86..214e68a5e79f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2359,7 +2359,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, - ext4_fsblk_t *partial_cluster, + long long *partial_cluster, ext4_lblk_t from, ext4_lblk_t to) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2388,7 +2388,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * partial cluster here. */ pblk = ext4_ext_pblock(ex) + ee_len - 1; - if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { + if ((*partial_cluster > 0) && + (EXT4_B2C(sbi, pblk) != *partial_cluster)) { ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, *partial_cluster), sbi->s_cluster_ratio, flags); @@ -2414,23 +2415,41 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { /* tail removal */ ext4_lblk_t num; + unsigned int unaligned; num = le32_to_cpu(ex->ee_block) + ee_len - from; pblk = ext4_ext_pblock(ex) + ee_len - num; - ext_debug("free last %u blocks starting %llu\n", num, pblk); + /* + * Usually we want to free partial cluster at the end of the + * extent, except for the situation when the cluster is still + * used by any other extent (partial_cluster is negative). + */ + if (*partial_cluster < 0 && + -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) + flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; + + ext_debug("free last %u blocks starting %llu partial %lld\n", + num, pblk, *partial_cluster); ext4_free_blocks(handle, inode, NULL, pblk, num, flags); /* * If the block range to be freed didn't start at the * beginning of a cluster, and we removed the entire - * extent, save the partial cluster here, since we - * might need to delete if we determine that the - * truncate operation has removed all of the blocks in - * the cluster. + * extent and the cluster is not used by any other extent, + * save the partial cluster here, since we might need to + * delete if we determine that the truncate operation has + * removed all of the blocks in the cluster. + * + * On the other hand, if we did not manage to free the whole + * extent, we have to mark the cluster as used (store negative + * cluster number in partial_cluster). */ - if (pblk & (sbi->s_cluster_ratio - 1) && - (ee_len == num)) + unaligned = pblk & (sbi->s_cluster_ratio - 1); + if (unaligned && (ee_len == num) && + (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) *partial_cluster = EXT4_B2C(sbi, pblk); - else + else if (unaligned) + *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); + else if (*partial_cluster > 0) *partial_cluster = 0; } else ext4_error(sbi->s_sb, "strange request: removal(2) " @@ -2448,12 +2467,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * @handle: The journal handle * @inode: The files inode * @path: The path to the leaf + * @partial_cluster: The cluster which we'll have to free if all extents + * has been released from it. It gets negative in case + * that the cluster is still used. * @start: The first block to remove * @end: The last block to remove */ static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, + struct ext4_ext_path *path, + long long *partial_cluster, ext4_lblk_t start, ext4_lblk_t end) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2466,6 +2489,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned short ex_ee_len; unsigned uninitialized = 0; struct ext4_extent *ex; + ext4_fsblk_t pblk; /* the header must be checked already in ext4_ext_remove_space() */ ext_debug("truncate since %u in leaf to %u\n", start, end); @@ -2504,6 +2528,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* If this extent is beyond the end of the hole, skip it */ if (end < ex_ee_block) { + /* + * We're going to skip this extent and move to another, + * so if this extent is not cluster aligned we have + * to mark the current cluster as used to avoid + * accidentally freeing it later on + */ + pblk = ext4_ext_pblock(ex); + if (pblk & (sbi->s_cluster_ratio - 1)) + *partial_cluster = + -((long long)EXT4_B2C(sbi, pblk)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); @@ -2579,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent)); } le16_add_cpu(&eh->eh_entries, -1); - } else + } else if (*partial_cluster > 0) *partial_cluster = 0; err = ext4_ext_dirty(handle, inode, path + depth); @@ -2597,11 +2631,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, err = ext4_ext_correct_indexes(handle, inode, path); /* - * If there is still a entry in the leaf node, check to see if - * it references the partial cluster. This is the only place - * where it could; if it doesn't, we can free the cluster. + * Free the partial cluster only if the current extent does not + * reference it. Otherwise we might free used cluster. */ - if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && + if (*partial_cluster > 0 && (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != *partial_cluster)) { int flags = EXT4_FREE_BLOCKS_FORGET; @@ -2651,7 +2684,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); struct ext4_ext_path *path = NULL; - ext4_fsblk_t partial_cluster = 0; + long long partial_cluster = 0; handle_t *handle; int i = 0, err = 0; @@ -2837,7 +2870,7 @@ again: /* If we still have something in the partial cluster and we have removed * even the first extent, then we should free the blocks in the partial * cluster as well. */ - if (partial_cluster && path->p_hdr->eh_entries == 0) { + if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { int flags = EXT4_FREE_BLOCKS_FORGET; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index bcb5a021945c..e23b2188110a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1928,7 +1928,7 @@ TRACE_EVENT(ext4_ext_show_extent, TRACE_EVENT(ext4_remove_blocks, TP_PROTO(struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_fsblk_t to, - ext4_fsblk_t partial_cluster), + long long partial_cluster), TP_ARGS(inode, ex, from, to, partial_cluster), @@ -1937,7 +1937,7 @@ TRACE_EVENT(ext4_remove_blocks, __field( ino_t, ino ) __field( ext4_lblk_t, from ) __field( ext4_lblk_t, to ) - __field( ext4_fsblk_t, partial ) + __field( long long, partial ) __field( ext4_fsblk_t, ee_pblk ) __field( ext4_lblk_t, ee_lblk ) __field( unsigned short, ee_len ) @@ -1955,7 +1955,7 @@ TRACE_EVENT(ext4_remove_blocks, ), TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" - "from %u to %u partial_cluster %u", + "from %u to %u partial_cluster %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->ee_lblk, @@ -1963,19 +1963,20 @@ TRACE_EVENT(ext4_remove_blocks, (unsigned short) __entry->ee_len, (unsigned) __entry->from, (unsigned) __entry->to, - (unsigned) __entry->partial) + (long long) __entry->partial) ); TRACE_EVENT(ext4_ext_rm_leaf, TP_PROTO(struct inode *inode, ext4_lblk_t start, - struct ext4_extent *ex, ext4_fsblk_t partial_cluster), + struct ext4_extent *ex, + long long partial_cluster), TP_ARGS(inode, start, ex, partial_cluster), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( ext4_fsblk_t, partial ) + __field( long long, partial ) __field( ext4_lblk_t, start ) __field( ext4_lblk_t, ee_lblk ) __field( ext4_fsblk_t, ee_pblk ) @@ -1993,14 +1994,14 @@ TRACE_EVENT(ext4_ext_rm_leaf, ), TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" - "partial_cluster %u", + "partial_cluster %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->ee_lblk, (unsigned long long) __entry->ee_pblk, (unsigned short) __entry->ee_len, - (unsigned) __entry->partial) + (long long) __entry->partial) ); TRACE_EVENT(ext4_ext_rm_idx, @@ -2058,7 +2059,7 @@ TRACE_EVENT(ext4_ext_remove_space, TRACE_EVENT(ext4_ext_remove_space_done, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, - int depth, ext4_lblk_t partial, __le16 eh_entries), + int depth, long long partial, __le16 eh_entries), TP_ARGS(inode, start, end, depth, partial, eh_entries), @@ -2068,7 +2069,7 @@ TRACE_EVENT(ext4_ext_remove_space_done, __field( ext4_lblk_t, start ) __field( ext4_lblk_t, end ) __field( int, depth ) - __field( ext4_lblk_t, partial ) + __field( long long, partial ) __field( unsigned short, eh_entries ) ), @@ -2082,14 +2083,14 @@ TRACE_EVENT(ext4_ext_remove_space_done, __entry->eh_entries = le16_to_cpu(eh_entries); ), - TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %u " + TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %lld " "remaining_entries %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->start, (unsigned) __entry->end, __entry->depth, - (unsigned) __entry->partial, + (long long) __entry->partial, (unsigned short) __entry->eh_entries) ); -- cgit v1.2.3-70-g09d2 From a60697f411eb365fb09e639e6f183fe33d1eb796 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:38:56 -0400 Subject: ext4: fix data offset overflow in ext4_xattr_fiemap() on 32-bit archs On 32-bit architectures with 32-bit sector_t computation of data offset in ext4_xattr_fiemap() can overflow resulting in reporting bogus data location. Fix the problem by typing block number to proper type before shifting. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 214e68a5e79f..299ee9df546f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4679,7 +4679,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4687,7 +4687,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } -- cgit v1.2.3-70-g09d2 From fffb273997cc52f255bde5f18e7f6b4686c914fb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:01:11 -0400 Subject: ext4: better estimate credits needed for ext4_da_writepages() We limit the number of blocks written in a single loop of ext4_da_writepages() to 64 when inode uses indirect blocks. That is unnecessary as credit estimates for mapping logically continguous run of blocks is rather low even for inode with indirect blocks. So just lift this limitation and properly calculate the number of necessary credits. This better credit estimate will also later allow us to always write at least a single page in one iteration. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +-- fs/ext4/extents.c | 16 +++++++------- fs/ext4/inode.c | 62 ++++++++++++++++++++++++------------------------------- 3 files changed, 35 insertions(+), 46 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 25e261da871f..2ebfcde5a156 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2596,8 +2596,7 @@ struct ext4_extent; extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); -extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, - int chunk); +extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern void ext4_ext_truncate(handle_t *, struct inode *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 299ee9df546f..94283d06cace 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2328,17 +2328,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, } /* - * How many index/leaf blocks need to change/allocate to modify nrblocks? + * How many index/leaf blocks need to change/allocate to add @extents extents? * - * if nrblocks are fit in a single extent (chunk flag is 1), then - * in the worse case, each tree level index/leaf need to be changed - * if the tree split due to insert a new extent, then the old tree - * index/leaf need to be updated too + * If we add a single extent, then in the worse case, each tree level + * index/leaf need to be changed in case of the tree split. * - * If the nrblocks are discontiguous, they could cause - * the whole tree split more than once, but this is really rare. + * If more extents are inserted, they could cause the whole tree split more + * than once, but this is really rare. */ -int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) +int ext4_ext_index_trans_blocks(struct inode *inode, int extents) { int index; int depth; @@ -2349,7 +2347,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) depth = ext_depth(inode); - if (chunk) + if (extents <= 1) index = depth * 2; else index = depth * 3; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 23d9a44d721a..2b777e51b677 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -136,6 +136,8 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset, unsigned int length); static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); +static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, + int pextents); /* * Test whether an inode is a fast symlink. @@ -2203,28 +2205,25 @@ static int ext4_writepage(struct page *page, } /* - * This is called via ext4_da_writepages() to - * calculate the total number of credits to reserve to fit - * a single extent allocation into a single transaction, - * ext4_da_writpeages() will loop calling this before - * the block allocation. + * mballoc gives us at most this number of blocks... + * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). + * The rest of mballoc seems to handle chunks upto full group size. */ +#define MAX_WRITEPAGES_EXTENT_LEN 2048 +/* + * Calculate the total number of credits to reserve for one writepages + * iteration. This is called from ext4_da_writepages(). We map an extent of + * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping + * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + + * bpp - 1 blocks in bpp different extents. + */ static int ext4_da_writepages_trans_blocks(struct inode *inode) { - int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; - - /* - * With non-extent format the journal credit needed to - * insert nrblocks contiguous block is dependent on - * number of contiguous block. So we will limit - * number of contiguous block to a sane value - */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && - (max_blocks > EXT4_MAX_TRANS_DATA)) - max_blocks = EXT4_MAX_TRANS_DATA; + int bpp = ext4_journal_blocks_per_page(inode); - return ext4_chunk_trans_blocks(inode, max_blocks); + return ext4_meta_trans_blocks(inode, + MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); } /* @@ -4650,11 +4649,12 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) +static int ext4_index_trans_blocks(struct inode *inode, int lblocks, + int pextents) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return ext4_ind_trans_blocks(inode, nrblocks); - return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); + return ext4_ind_trans_blocks(inode, lblocks); + return ext4_ext_index_trans_blocks(inode, pextents); } /* @@ -4668,7 +4668,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * * Also account for superblock, inode, quota and xattr blocks */ -static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) +static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, + int pextents) { ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); int gdpblocks; @@ -4676,14 +4677,10 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) int ret = 0; /* - * How many index blocks need to touch to modify nrblocks? - * The "Chunk" flag indicating whether the nrblocks is - * physically contiguous on disk - * - * For Direct IO and fallocate, they calls get_block to allocate - * one single extent at a time, so they could set the "Chunk" flag + * How many index blocks need to touch to map @lblocks logical blocks + * to @pextents physical extents? */ - idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); + idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); ret = idxblocks; @@ -4691,12 +4688,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) * Now let's see how many group bitmaps and group descriptors need * to account */ - groups = idxblocks; - if (chunk) - groups += 1; - else - groups += nrblocks; - + groups = idxblocks + pextents; gdpblocks = groups; if (groups > ngroups) groups = ngroups; @@ -4727,7 +4719,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) int bpp = ext4_journal_blocks_per_page(inode); int ret; - ret = ext4_meta_trans_blocks(inode, bpp, 0); + ret = ext4_meta_trans_blocks(inode, bpp, bpp); /* Account for data blocks for journalled mode */ if (ext4_should_journal_data(inode)) -- cgit v1.2.3-70-g09d2 From 6b523df4fb5ae281ddbc817f40504b33e6226554 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:21:11 -0400 Subject: ext4: use transaction reservation for extent conversion in ext4_end_io Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 12 +++++++++--- fs/ext4/ext4_jbd2.h | 5 +++-- fs/ext4/extents.c | 40 +++++++++++++++++++++++++++++----------- fs/ext4/inode.c | 25 ++++++++++++++++++++----- fs/ext4/page-io.c | 11 ++++++++--- 5 files changed, 69 insertions(+), 24 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0a9b729f991b..8de219b758fb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -184,10 +184,13 @@ struct ext4_map_blocks { #define EXT4_IO_END_DIRECT 0x0004 /* - * For converting uninitialized extents on a work queue. + * For converting uninitialized extents on a work queue. 'handle' is used for + * buffered writeback. */ typedef struct ext4_io_end { struct list_head list; /* per-file finished IO list */ + handle_t *handle; /* handle reserved for extent + * conversion */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ loff_t offset; /* offset in the file */ @@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, struct ext4_io_end *io_end) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + /* Writeback has to have coversion transaction reserved */ + WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && + !(io_end->flag & EXT4_IO_END_DIRECT)); io_end->flag |= EXT4_IO_END_UNWRITTEN; atomic_inc(&EXT4_I(inode)->i_unwritten); } @@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); -extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - ssize_t len); +extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len); extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern int ext4_ext_calc_metadata_amount(struct inode *inode, diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index fdd865eb1879..2877258d9497 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) #define EXT4_HT_MIGRATE 8 #define EXT4_HT_MOVE_EXTENTS 9 #define EXT4_HT_XATTR 10 -#define EXT4_HT_MAX 11 +#define EXT4_HT_EXT_CONVERT 11 +#define EXT4_HT_MAX 12 /** * struct ext4_journal_cb_entry - Base structure for callback information. @@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode, #define ext4_journal_stop(handle) \ __ext4_journal_stop(__func__, __LINE__, (handle)) -#define ext4_journal_start_reserve(handle, type) \ +#define ext4_journal_start_reserved(handle, type) \ __ext4_journal_start_reserved((handle), __LINE__, (type)) handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94283d06cace..208f664f9ee0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4566,10 +4566,9 @@ retry: * function, to convert the fallocated extents after IO is completed. * Returns 0 on success. */ -int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - ssize_t len) +int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len) { - handle_t *handle; unsigned int max_blocks; int ret = 0; int ret2 = 0; @@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - map.m_lblk); /* - * credits to insert 1 extent into extent tree + * This is somewhat ugly but the idea is clear: When transaction is + * reserved, everything goes into it. Otherwise we rather start several + * smaller transactions for conversion of each extent separately. */ - credits = ext4_chunk_trans_blocks(inode, max_blocks); + if (handle) { + handle = ext4_journal_start_reserved(handle, + EXT4_HT_EXT_CONVERT); + if (IS_ERR(handle)) + return PTR_ERR(handle); + credits = 0; + } else { + /* + * credits to insert 1 extent into extent tree + */ + credits = ext4_chunk_trans_blocks(inode, max_blocks); + } while (ret >= 0 && ret < max_blocks) { map.m_lblk += ret; map.m_len = (max_blocks -= ret); - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - break; + if (credits) { + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, + credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + break; + } } ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); @@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, inode->i_ino, map.m_lblk, map.m_len, ret); ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); - if (ret <= 0 || ret2 ) + if (credits) + ret2 = ext4_journal_stop(handle); + if (ret <= 0 || ret2) break; } + if (!credits) + ret2 = ext4_journal_stop(handle); return ret > 0 ? ret2 : ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 736d164dc2ba..510dba785db4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page, struct mpage_da_data { struct inode *inode; struct writeback_control *wbc; + pgoff_t first_page; /* The first page to write */ pgoff_t next_page; /* Current page to examine */ pgoff_t last_page; /* Last page to examine */ @@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) err = ext4_map_blocks(handle, inode, map, get_blocks_flags); if (err < 0) return err; - if (map->m_flags & EXT4_MAP_UNINIT) + if (map->m_flags & EXT4_MAP_UNINIT) { + if (!mpd->io_submit.io_end->handle && + ext4_handle_valid(handle)) { + mpd->io_submit.io_end->handle = handle->h_rsv_handle; + handle->h_rsv_handle = NULL; + } ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); + } BUG_ON(map->m_len == 0); if (map->m_flags & EXT4_MAP_NEW) { @@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping, handle_t *handle = NULL; struct mpage_da_data mpd; struct inode *inode = mapping->host; - int needed_blocks, ret = 0; + int needed_blocks, rsv_blocks = 0, ret = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); bool done; struct blk_plug plug; @@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping, if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) return -EROFS; + if (ext4_should_dioread_nolock(inode)) { + /* + * We may need to convert upto one extent per block in + * the page and we may dirty the inode. + */ + rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); + } + /* * If we have inline data and arrive here, it means that * we will soon create the block for the 1st page, so @@ -2438,8 +2453,8 @@ retry: needed_blocks = ext4_da_writepages_trans_blocks(inode); /* start a new transaction */ - handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, - needed_blocks); + handle = ext4_journal_start_with_reserve(inode, + EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " @@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * for non AIO case, since the IO is already * completed, we could do the conversion right here */ - err = ext4_convert_unwritten_extents(inode, + err = ext4_convert_unwritten_extents(NULL, inode, offset, ret); if (err < 0) ret = err; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index de6860c7836e..5f20bc481041 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) { BUG_ON(!list_empty(&io_end->list)); BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); + WARN_ON(io_end->handle); if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) wake_up_all(ext4_ioend_wq(io_end->inode)); @@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io) struct inode *inode = io->inode; loff_t offset = io->offset; ssize_t size = io->size; + handle_t *handle = io->handle; int ret = 0; ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", io, inode->i_ino, io->list.next, io->list.prev); - ret = ext4_convert_unwritten_extents(inode, offset, size); + io->handle = NULL; /* Following call will use up the handle */ + ret = ext4_convert_unwritten_extents(handle, inode, offset, size); if (ret < 0) { ext4_msg(inode->i_sb, KERN_EMERG, "failed to convert unwritten extents to written " @@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end) if (atomic_dec_and_test(&io_end->count)) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) { - err = ext4_convert_unwritten_extents(io_end->inode, - io_end->offset, io_end->size); + err = ext4_convert_unwritten_extents(io_end->handle, + io_end->inode, io_end->offset, + io_end->size); + io_end->handle = NULL; ext4_clear_io_unwritten_flag(io_end); } ext4_release_io_end(io_end); -- cgit v1.2.3-70-g09d2 From 981250ca89261f98bdfd2d6be1fcccb96cbbc00d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 12 Jun 2013 11:48:29 -0400 Subject: ext4: don't use EXT4_FREE_BLOCKS_FORGET unnecessarily Commit 18888cf0883c: "ext4: speed up truncate/unlink by not using bforget() unless needed" removed the use of EXT4_FREE_BLOCKS_FORGET in the most important codepath for file systems using extents, but a similar optimization also can be done for file systems using indirect blocks, and for the two special cases in the ext4 extents code. Cc: Andrey Sidorov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 26 ++++++++++++-------------- fs/ext4/indirect.c | 6 ++++-- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 208f664f9ee0..d04f40936397 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2355,6 +2355,15 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents) return index; } +static inline int get_default_free_blocks_flags(struct inode *inode) +{ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; + else if (ext4_should_journal_data(inode)) + return EXT4_FREE_BLOCKS_FORGET; + return 0; +} + static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, long long *partial_cluster, @@ -2363,12 +2372,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned short ee_len = ext4_ext_get_actual_len(ex); ext4_fsblk_t pblk; - int flags = 0; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; - else if (ext4_should_journal_data(inode)) - flags |= EXT4_FREE_BLOCKS_FORGET; + int flags = get_default_free_blocks_flags(inode); /* * For bigalloc file systems, we never free a partial cluster @@ -2635,10 +2639,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (*partial_cluster > 0 && (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != *partial_cluster)) { - int flags = EXT4_FREE_BLOCKS_FORGET; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + int flags = get_default_free_blocks_flags(inode); ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, *partial_cluster), @@ -2869,10 +2870,7 @@ again: * even the first extent, then we should free the blocks in the partial * cluster as well. */ if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { - int flags = EXT4_FREE_BLOCKS_FORGET; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + int flags = get_default_free_blocks_flags(inode); ext4_free_blocks(handle, inode, NULL, EXT4_C2B(EXT4_SB(sb), partial_cluster), diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 2a1f8a577e08..963d23dc1028 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -926,11 +926,13 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *last) { __le32 *p; - int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; + int flags = EXT4_FREE_BLOCKS_VALIDATED; int err; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; + else if (ext4_should_journal_data(inode)) + flags |= EXT4_FREE_BLOCKS_FORGET; if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, count)) { -- cgit v1.2.3-70-g09d2 From 72dac95d4403ee7231e094e1735ecb96032e6117 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 12 Jun 2013 23:13:59 -0400 Subject: ext4: return FIEMAP_EXTENT_UNKNOWN for delalloc extents Return the FIEMAP_EXTENT_UNKNOWN flag as well except the FIEMAP_EXTENT_DELALLOC because the data location of an delayed allocation extent is unknown. Signed-off-by: Jie Liu --- fs/ext4/extents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d04f40936397..51e41687f51e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, next_del = ext4_find_delayed_extent(inode, &es); if (!exists && next_del) { exists = 1; - flags |= FIEMAP_EXTENT_DELALLOC; + flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNKNOWN); } up_read(&EXT4_I(inode)->i_data_sem); -- cgit v1.2.3-70-g09d2 From aeb2817a4ea99f62532adf3377be3b282d3bda12 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 1 Jul 2013 08:12:38 -0400 Subject: ext4: pass inode pointer instead of file pointer to punch hole No need to pass file pointer when we can directly pass inode pointer. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- fs/ext4/extents.c | 2 +- fs/ext4/inode.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7cc6b18230ec..6ed348d8d3eb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); extern void ext4_truncate(struct inode *); -extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); +extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 51e41687f51e..937593e2f006 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4463,7 +4463,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) - return ext4_punch_hole(file, offset, len); + return ext4_punch_hole(inode, offset, len); ret = ext4_convert_inline_data(inode); if (ret) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9ba51f68777..753c15683809 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3481,9 +3481,8 @@ int ext4_can_truncate(struct inode *inode) * Returns: 0 on success or negative on failure */ -int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) +int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) { - struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; -- cgit v1.2.3-70-g09d2 From 21ddd568c133024196d394c43923f55cad1e7bd0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:40 -0400 Subject: ext4: translate flag bits to strings in tracepoints Translate the bitfields used in various flags argument to strings to make the tracepoint output more human-readable. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- fs/ext4/indirect.c | 2 +- include/trace/events/ext4.h | 135 +++++++++++++++++++++++++++++++------------- 3 files changed, 98 insertions(+), 41 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 937593e2f006..575faa090253 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4380,7 +4380,7 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); + trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); return err ? err : allocated; } diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 963d23dc1028..87b30cd357e7 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -624,7 +624,7 @@ cleanup: partial--; } out: - trace_ext4_ind_map_blocks_exit(inode, map, err); + trace_ext4_ind_map_blocks_exit(inode, flags, map, err); return err; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 72f523eb82e0..2068db241f22 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -19,6 +19,57 @@ struct extent_status; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) +#define show_mballoc_flags(flags) __print_flags(flags, "|", \ + { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ + { EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \ + { EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \ + { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ + { EXT4_MB_HINT_BEST, "HINT_BEST" }, \ + { EXT4_MB_HINT_DATA, "HINT_DATA" }, \ + { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ + { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ + { EXT4_MB_HINT_GOAL_ONLY, "HINT_GOAL_ONLY" }, \ + { EXT4_MB_HINT_TRY_GOAL, "HINT_TRY_GOAL" }, \ + { EXT4_MB_DELALLOC_RESERVED, "DELALLOC_RESV" }, \ + { EXT4_MB_STREAM_ALLOC, "STREAM_ALLOC" }, \ + { EXT4_MB_USE_ROOT_BLOCKS, "USE_ROOT_BLKS" }, \ + { EXT4_MB_USE_RESERVED, "USE_RESV" }) + +#define show_map_flags(flags) __print_flags(flags, "|", \ + { EXT4_GET_BLOCKS_CREATE, "CREATE" }, \ + { EXT4_GET_BLOCKS_UNINIT_EXT, "UNINIT" }, \ + { EXT4_GET_BLOCKS_DELALLOC_RESERVE, "DELALLOC" }, \ + { EXT4_GET_BLOCKS_PRE_IO, "PRE_IO" }, \ + { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ + { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ + { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ + { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ + { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \ + { EXT4_GET_BLOCKS_NO_PUT_HOLE, "NO_PUT_HOLE" }) + +#define show_mflags(flags) __print_flags(flags, "", \ + { EXT4_MAP_NEW, "N" }, \ + { EXT4_MAP_MAPPED, "M" }, \ + { EXT4_MAP_UNWRITTEN, "U" }, \ + { EXT4_MAP_BOUNDARY, "B" }, \ + { EXT4_MAP_UNINIT, "u" }, \ + { EXT4_MAP_FROM_CLUSTER, "C" }) + +#define show_free_flags(flags) __print_flags(flags, "|", \ + { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ + { EXT4_FREE_BLOCKS_FORGET, "FORGET" }, \ + { EXT4_FREE_BLOCKS_VALIDATED, "VALIDATED" }, \ + { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" }, \ + { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\ + { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) + +#define show_extent_status(status) __print_flags(status, "", \ + { (1 << 3), "W" }, \ + { (1 << 2), "U" }, \ + { (1 << 1), "D" }, \ + { (1 << 0), "H" }) + + TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), @@ -373,10 +424,10 @@ TRACE_EVENT(ext4_da_write_pages_extent, __entry->flags = map->m_flags; ), - TP_printk("dev %d,%d ino %lu lblk %llu len %u flags 0x%04x", + TP_printk("dev %d,%d ino %lu lblk %llu len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, - __entry->flags) + show_mflags(__entry->flags)) ); TRACE_EVENT(ext4_writepages_result, @@ -691,10 +742,10 @@ TRACE_EVENT(ext4_request_blocks, __entry->flags = ar->flags; ), - TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu " + TP_printk("dev %d,%d ino %lu flags %s len %u lblk %u goal %llu " "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->flags, + (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) @@ -733,10 +784,10 @@ TRACE_EVENT(ext4_allocate_blocks, __entry->flags = ar->flags; ), - TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u " + TP_printk("dev %d,%d ino %lu flags %s len %u block %llu lblk %u " "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->flags, + (unsigned long) __entry->ino, show_mballoc_flags(__entry->flags), __entry->len, __entry->block, __entry->logical, __entry->goal, __entry->lleft, __entry->lright, __entry->pleft, __entry->pright) @@ -766,11 +817,11 @@ TRACE_EVENT(ext4_free_blocks, __entry->mode = inode->i_mode; ), - TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d", + TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->block, __entry->count, - __entry->flags) + show_free_flags(__entry->flags)) ); TRACE_EVENT(ext4_sync_file_enter, @@ -921,7 +972,7 @@ TRACE_EVENT(ext4_mballoc_alloc, ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " - "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x " + "result %u/%d/%u@%u blks %u grps %u cr %u flags %s " "tail %u broken %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, @@ -932,7 +983,7 @@ TRACE_EVENT(ext4_mballoc_alloc, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical, __entry->found, __entry->groups, __entry->cr, - __entry->flags, __entry->tail, + show_mballoc_flags(__entry->flags), __entry->tail, __entry->buddy ? 1 << __entry->buddy : 0) ); @@ -1546,10 +1597,10 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, __entry->flags = flags; ), - TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u", + TP_printk("dev %d,%d ino %lu lblk %u len %u flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->lblk, __entry->len, __entry->flags) + __entry->lblk, __entry->len, show_map_flags(__entry->flags)) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, @@ -1567,47 +1618,53 @@ DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, ); DECLARE_EVENT_CLASS(ext4__map_blocks_exit, - TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int ret), + TP_PROTO(struct inode *inode, unsigned flags, struct ext4_map_blocks *map, + int ret), - TP_ARGS(inode, map, ret), + TP_ARGS(inode, flags, map, ret), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) + __field( unsigned int, flags ) __field( ext4_fsblk_t, pblk ) __field( ext4_lblk_t, lblk ) __field( unsigned int, len ) - __field( unsigned int, flags ) + __field( unsigned int, mflags ) __field( int, ret ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; + __entry->flags = flags; __entry->pblk = map->m_pblk; __entry->lblk = map->m_lblk; __entry->len = map->m_len; - __entry->flags = map->m_flags; + __entry->mflags = map->m_flags; __entry->ret = ret; ), - TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u flags %x ret %d", + TP_printk("dev %d,%d ino %lu flags %s lblk %u pblk %llu len %u " + "mflags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->lblk, __entry->pblk, - __entry->len, __entry->flags, __entry->ret) + show_map_flags(__entry->flags), __entry->lblk, __entry->pblk, + __entry->len, show_mflags(__entry->mflags), __entry->ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit, - TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int ret), + TP_PROTO(struct inode *inode, unsigned flags, + struct ext4_map_blocks *map, int ret), - TP_ARGS(inode, map, ret) + TP_ARGS(inode, flags, map, ret) ); DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit, - TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int ret), + TP_PROTO(struct inode *inode, unsigned flags, + struct ext4_map_blocks *map, int ret), - TP_ARGS(inode, map, ret) + TP_ARGS(inode, flags, map, ret) ); TRACE_EVENT(ext4_ext_load_extent, @@ -1779,12 +1836,12 @@ TRACE_EVENT(ext4_ext_handle_uninitialized_extents, __entry->newblk = newblock; ), - TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %x " + TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %s " "allocated %d newblock %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, - __entry->len, __entry->flags, + __entry->len, show_map_flags(__entry->flags), (unsigned int) __entry->allocated, (unsigned long long) __entry->newblk) ); @@ -1812,10 +1869,10 @@ TRACE_EVENT(ext4_get_implied_cluster_alloc_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d", + TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %s ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lblk, (unsigned long long) __entry->pblk, - __entry->len, __entry->flags, __entry->ret) + __entry->len, show_mflags(__entry->flags), __entry->ret) ); TRACE_EVENT(ext4_ext_put_in_cache, @@ -2146,7 +2203,7 @@ TRACE_EVENT(ext4_es_insert_extent, __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) - __field( unsigned long long, status ) + __field( char, status ) ), TP_fast_assign( @@ -2155,14 +2212,14 @@ TRACE_EVENT(ext4_es_insert_extent, __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); - __entry->status = ext4_es_status(es); + __entry->status = ext4_es_status(es) >> 60; ), - TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx", + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, - __entry->pblk, __entry->status) + __entry->pblk, show_extent_status(__entry->status)) ); TRACE_EVENT(ext4_es_remove_extent, @@ -2223,7 +2280,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_range_exit, __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) - __field( unsigned long long, status ) + __field( char, status ) ), TP_fast_assign( @@ -2232,14 +2289,14 @@ TRACE_EVENT(ext4_es_find_delayed_extent_range_exit, __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); - __entry->status = ext4_es_status(es); + __entry->status = ext4_es_status(es) >> 60; ), - TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx", + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, - __entry->pblk, __entry->status) + __entry->pblk, show_extent_status(__entry->status)) ); TRACE_EVENT(ext4_es_lookup_extent_enter, @@ -2276,7 +2333,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) - __field( unsigned long long, status ) + __field( char, status ) __field( int, found ) ), @@ -2286,16 +2343,16 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, __entry->lblk = es->es_lblk; __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); - __entry->status = ext4_es_status(es); + __entry->status = ext4_es_status(es) >> 60; __entry->found = found; ), - TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx", + TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->found, __entry->lblk, __entry->len, __entry->found ? __entry->pblk : 0, - __entry->found ? __entry->status : 0) + show_extent_status(__entry->found ? __entry->status : 0)) ); TRACE_EVENT(ext4_es_shrink_enter, -- cgit v1.2.3-70-g09d2 From 6ae06ff51eab5dcbbf959b05ce0f11003a305ba5 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 1 Jul 2013 08:12:41 -0400 Subject: ext4: optimize starting extent in ext4_ext_rm_leaf() Both hole punch and truncate use ext4_ext_rm_leaf() for removing blocks. Currently we choose the last extent as the starting point for removing blocks: ex = EXT_LAST_EXTENT(eh); This is OK for truncate but for hole punch we can optimize the extent selection as the path is already initialized. We could use this information to select proper starting extent. The code change in this patch will not affect truncate as for truncate path[depth].p_ext will always be NULL. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 575faa090253..7097b0f680e6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2504,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, return -EIO; } /* find where to start removing */ - ex = EXT_LAST_EXTENT(eh); + ex = path[depth].p_ext; + if (!ex) + ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); -- cgit v1.2.3-70-g09d2 From 8acd5e9b1217e58a57124d9e225afa12efeae20d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:19 -0400 Subject: ext4: fix error handling in ext4_ext_truncate() Previously ext4_ext_truncate() was ignoring potential error returns from ext4_es_remove_extent() and ext4_ext_remove_space(). This can lead to the on-diks extent tree and the extent status tree cache getting out of sync, which is particuarlly bad, and can lead to file system corruption and potential data loss. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7097b0f680e6..f57cc0e7f1bc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4405,9 +4405,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, -- cgit v1.2.3-70-g09d2 From c8e15130e1636f68d5165aa2605b8e9cba0f644c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:37 -0400 Subject: ext4: simplify calculation of blocks to free on error In ext4_ext_map_blocks(), if we have successfully allocated the data blocks, but then run into trouble inserting the extent into the extent tree, most likely due to an ENOSPC condition, determine the arguments to ext4_free_blocks() in a simpler way which is easier to prove to be correct. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f57cc0e7f1bc..593091537e76 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4261,8 +4261,8 @@ got_allocated_blocks: /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), fb_flags); + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), fb_flags); goto out2; } -- cgit v1.2.3-70-g09d2 From 76828c882630ced08b5ddce22cc0095b05de9bc5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 12:27:47 -0400 Subject: ext4: yield during large unlinks During large unlink operations on files with extents, we can use a lot of CPU time. This adds a cond_resched() call when starting to examine the next level of a multi-level extent tree. Multi-level extent trees are rare in the first place, and this should rarely be executed. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 593091537e76..cfdc51e30257 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2835,6 +2835,9 @@ again: err = -EIO; break; } + /* Yield here to deal with large extent trees. + * Should be a no-op if we did IO above. */ + cond_resched(); if (WARN_ON(i + 1 > depth)) { err = -EIO; break; -- cgit v1.2.3-70-g09d2 From 63b999685cb372e24eb73f255cd73547026370fd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 16 Jul 2013 10:28:47 -0400 Subject: ext4: call ext4_es_lru_add() after handling cache miss If there are no items in the extent status tree, ext4_es_lru_add() is a no-op. So it is not sufficient to call ext4_es_lru_add() before we try to lookup an entry in the extent status tree. We also need to call it at the end of ext4_ext_map_blocks(), after items have been added to the extent status tree. This could lead to inodes with that have extent status trees but which are not in the LRU list, which means they won't get considered for eviction by the es_shrinker. Signed-off-by: "Theodore Ts'o" Cc: Zheng Liu Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 5 +++-- fs/ext4/inode.c | 7 ++----- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cfdc51e30257..a61873808f76 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4385,8 +4385,9 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); - + trace_ext4_ext_map_blocks_exit(inode, flags, map, + err ? err : allocated); + ext4_es_lru_add(inode); return err ? err : allocated; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 98b9bff92a8a..ba33c67d6e48 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, "logical block %lu\n", inode->i_ino, flags, map->m_len, (unsigned long) map->m_lblk); - ext4_es_lru_add(inode); - /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { + ext4_es_lru_add(inode); if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { map->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk; @@ -1529,11 +1528,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, "logical block %lu\n", inode->i_ino, map->m_len, (unsigned long) map->m_lblk); - ext4_es_lru_add(inode); - /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, &es)) { - + ext4_es_lru_add(inode); if (ext4_es_is_hole(&es)) { retval = 0; down_read((&EXT4_I(inode)->i_data_sem)); -- cgit v1.2.3-70-g09d2 From 94eec0fc3520c759831763d866421b4d60b599b4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 29 Jul 2013 12:12:56 -0400 Subject: ext4: fix retry handling in ext4_ext_truncate() We tested for ENOMEM instead of -ENOMEM. Oops. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a61873808f76..72ba4705d4fa 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4412,7 +4412,7 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); - if (err == ENOMEM) { + if (err == -ENOMEM) { cond_resched(); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; -- cgit v1.2.3-70-g09d2