diff options
Diffstat (limited to 'fs/ext4/extents.c')
| -rw-r--r-- | fs/ext4/extents.c | 417 | 
1 files changed, 228 insertions, 189 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 236b834b4ca8..377309c1af65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,  	if (err <= 0)  		return err;  	err = ext4_truncate_restart_trans(handle, inode, needed); -	/* -	 * We have dropped i_data_sem so someone might have cached again -	 * an extent we are going to truncate. -	 */ -	ext4_ext_invalidate_cache(inode); +	if (err == 0) +		err = -EAGAIN;  	return err;  } @@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,  	if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {  		/*  		 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME -		 * block groups per flexgroup, reserve the first block  -		 * group for directories and special files.  Regular  +		 * block groups per flexgroup, reserve the first block +		 * group for directories and special files.  Regular  		 * files will start at the second block group.  This -		 * tends to speed up directory access and improves  +		 * tends to speed up directory access and improves  		 * fsck times.  		 */  		block_group &= ~(flex_size-1); @@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode,  	return 0;  corrupted: -	__ext4_error(inode->i_sb, function, -			"bad header/extent in inode #%lu: %s - magic %x, " +	ext4_error_inode(function, inode, +			"bad header/extent: %s - magic %x, "  			"entries %u, max %u(%u), depth %u(%u)", -			inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), +			error_msg, le16_to_cpu(eh->eh_magic),  			le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),  			max, le16_to_cpu(eh->eh_depth), depth); @@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode,  		merge_done = 1;  		WARN_ON(eh->eh_entries == 0);  		if (!eh->eh_entries) -			ext4_error(inode->i_sb, -				   "inode#%lu, eh->eh_entries = 0!", -				   inode->i_ino); +			EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");  	}  	return merge_done; @@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,  	struct ext4_ext_cache *cex;  	int ret = EXT4_EXT_CACHE_NO; -	/*  +	/*  	 * We borrow i_block_reservation_lock to protect i_cached_extent  	 */  	spin_lock(&EXT4_I(inode)->i_block_reservation_lock); @@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)  	int depth = ext_depth(inode);  	struct ext4_ext_path *path;  	handle_t *handle; -	int i = 0, err = 0; +	int i, err;  	ext_debug("truncate since %u\n", start); @@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)  	if (IS_ERR(handle))  		return PTR_ERR(handle); +again:  	ext4_ext_invalidate_cache(inode);  	/*  	 * We start scanning from right side, freeing all the blocks  	 * after i_size and walking into the tree depth-wise.  	 */ +	depth = ext_depth(inode);  	path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);  	if (path == NULL) {  		ext4_journal_stop(handle);  		return -ENOMEM;  	} +	path[0].p_depth = depth;  	path[0].p_hdr = ext_inode_hdr(inode);  	if (ext4_ext_check(inode, path[0].p_hdr, depth)) {  		err = -EIO;  		goto out;  	} -	path[0].p_depth = depth; +	i = err = 0;  	while (i >= 0 && err == 0) {  		if (i == depth) { @@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)  out:  	ext4_ext_drop_refs(path);  	kfree(path); +	if (err == -EAGAIN) +		goto again;  	ext4_journal_stop(handle);  	return err; @@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error)  /* FIXME!! we need to try to merge to left or right after zero-out  */  static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)  { -	int ret = -EIO; +	int ret;  	struct bio *bio;  	int blkbits, blocksize;  	sector_t ee_pblock; @@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)  			len = ee_len;  		bio = bio_alloc(GFP_NOIO, len); +		if (!bio) +			return -ENOMEM; +  		bio->bi_sector = ee_pblock;  		bio->bi_bdev   = inode->i_sb->s_bdev; @@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)  		submit_bio(WRITE, bio);  		wait_for_completion(&event); -		if (test_bit(BIO_UPTODATE, &bio->bi_flags)) -			ret = 0; -		else { -			ret = -EIO; -			break; +		if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { +			bio_put(bio); +			return -EIO;  		}  		bio_put(bio);  		ee_len    -= done;  		ee_pblock += done  << (blkbits - 9);  	} -	return ret; +	return 0;  }  #define EXT4_EXT_ZERO_LEN 7  /* - * This function is called by ext4_ext_get_blocks() if someone tries to write + * This function is called by ext4_ext_map_blocks() if someone tries to write   * to an uninitialized extent. It may result in splitting the uninitialized   * extent into multiple extents (upto three - one initialized and two   * uninitialized). @@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)   *   c> Splits in three extents: Somone is writing in middle of the extent   */  static int ext4_ext_convert_to_initialized(handle_t *handle, -						struct inode *inode, -						struct ext4_ext_path *path, -						ext4_lblk_t iblock, -						unsigned int max_blocks) +					   struct inode *inode, +					   struct ext4_map_blocks *map, +					   struct ext4_ext_path *path)  {  	struct ext4_extent *ex, newex, orig_ex;  	struct ext4_extent *ex1 = NULL;  	struct ext4_extent *ex2 = NULL;  	struct ext4_extent *ex3 = NULL;  	struct ext4_extent_header *eh; -	ext4_lblk_t ee_block; +	ext4_lblk_t ee_block, eof_block;  	unsigned int allocated, ee_len, depth;  	ext4_fsblk_t newblock;  	int err = 0;  	int ret = 0; +	int may_zeroout; + +	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" +		"block %llu, max_blocks %u\n", inode->i_ino, +		(unsigned long long)map->m_lblk, map->m_len); + +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> +		inode->i_sb->s_blocksize_bits; +	if (eof_block < map->m_lblk + map->m_len) +		eof_block = map->m_lblk + map->m_len;  	depth = ext_depth(inode);  	eh = path[depth].p_hdr;  	ex = path[depth].p_ext;  	ee_block = le32_to_cpu(ex->ee_block);  	ee_len = ext4_ext_get_actual_len(ex); -	allocated = ee_len - (iblock - ee_block); -	newblock = iblock - ee_block + ext_pblock(ex); +	allocated = ee_len - (map->m_lblk - ee_block); +	newblock = map->m_lblk - ee_block + ext_pblock(ex); +  	ex2 = ex;  	orig_ex.ee_block = ex->ee_block;  	orig_ex.ee_len   = cpu_to_le16(ee_len);  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); +	/* +	 * It is safe to convert extent to initialized via explicit +	 * zeroout only if extent is fully insde i_size or new_size. +	 */ +	may_zeroout = ee_block + ee_len <= eof_block; +  	err = ext4_ext_get_access(handle, inode, path + depth);  	if (err)  		goto out;  	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ -	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { +	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {  		err =  ext4_ext_zeroout(inode, &orig_ex);  		if (err)  			goto fix_extent_len; @@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		return allocated;  	} -	/* ex1: ee_block to iblock - 1 : uninitialized */ -	if (iblock > ee_block) { +	/* ex1: ee_block to map->m_lblk - 1 : uninitialized */ +	if (map->m_lblk > ee_block) {  		ex1 = ex; -		ex1->ee_len = cpu_to_le16(iblock - ee_block); +		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);  		ext4_ext_mark_uninitialized(ex1);  		ex2 = &newex;  	} @@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  	 * we insert ex3, if ex1 is NULL. This is to avoid temporary  	 * overlap of blocks.  	 */ -	if (!ex1 && allocated > max_blocks) -		ex2->ee_len = cpu_to_le16(max_blocks); +	if (!ex1 && allocated > map->m_len) +		ex2->ee_len = cpu_to_le16(map->m_len);  	/* ex3: to ee_block + ee_len : uninitialised */ -	if (allocated > max_blocks) { +	if (allocated > map->m_len) {  		unsigned int newdepth;  		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ -		if (allocated <= EXT4_EXT_ZERO_LEN) { +		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {  			/* -			 * iblock == ee_block is handled by the zerouout +			 * map->m_lblk == ee_block is handled by the zerouout  			 * at the beginning.  			 * Mark first half uninitialized.  			 * Mark second half initialized and zero out the @@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  			ext4_ext_dirty(handle, inode, path + depth);  			ex3 = &newex; -			ex3->ee_block = cpu_to_le32(iblock); +			ex3->ee_block = cpu_to_le32(map->m_lblk);  			ext4_ext_store_pblock(ex3, newblock);  			ex3->ee_len = cpu_to_le16(allocated);  			err = ext4_ext_insert_extent(handle, inode, path, @@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  				ex->ee_len   = orig_ex.ee_len;  				ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));  				ext4_ext_dirty(handle, inode, path + depth); -				/* blocks available from iblock */ +				/* blocks available from map->m_lblk */  				return allocated;  			} else if (err) @@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  				 */  				depth = ext_depth(inode);  				ext4_ext_drop_refs(path); -				path = ext4_ext_find_extent(inode, -								iblock, path); +				path = ext4_ext_find_extent(inode, map->m_lblk, +							    path);  				if (IS_ERR(path)) {  					err = PTR_ERR(path);  					return err; @@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  			return allocated;  		}  		ex3 = &newex; -		ex3->ee_block = cpu_to_le32(iblock + max_blocks); -		ext4_ext_store_pblock(ex3, newblock + max_blocks); -		ex3->ee_len = cpu_to_le16(allocated - max_blocks); +		ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); +		ext4_ext_store_pblock(ex3, newblock + map->m_len); +		ex3->ee_len = cpu_to_le16(allocated - map->m_len);  		ext4_ext_mark_uninitialized(ex3);  		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); -		if (err == -ENOSPC) { +		if (err == -ENOSPC && may_zeroout) {  			err =  ext4_ext_zeroout(inode, &orig_ex);  			if (err)  				goto fix_extent_len; @@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));  			ext4_ext_dirty(handle, inode, path + depth);  			/* zeroed the full extent */ -			/* blocks available from iblock */ +			/* blocks available from map->m_lblk */  			return allocated;  		} else if (err) @@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		 * update the extent length after successful insert of the  		 * split extent  		 */ -		orig_ex.ee_len = cpu_to_le16(ee_len - -						ext4_ext_get_actual_len(ex3)); +		ee_len -= ext4_ext_get_actual_len(ex3); +		orig_ex.ee_len = cpu_to_le16(ee_len); +		may_zeroout = ee_block + ee_len <= eof_block; +  		depth = newdepth;  		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, iblock, path); +		path = ext4_ext_find_extent(inode, map->m_lblk, path);  		if (IS_ERR(path)) {  			err = PTR_ERR(path);  			goto out; @@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		if (err)  			goto out; -		allocated = max_blocks; +		allocated = map->m_len;  		/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying  		 * to insert a extent in the middle zerout directly  		 * otherwise give the extent a chance to merge to left  		 */  		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && -							iblock != ee_block) { +			map->m_lblk != ee_block && may_zeroout) {  			err =  ext4_ext_zeroout(inode, &orig_ex);  			if (err)  				goto fix_extent_len; @@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));  			ext4_ext_dirty(handle, inode, path + depth);  			/* zero out the first half */ -			/* blocks available from iblock */ +			/* blocks available from map->m_lblk */  			return allocated;  		}  	} @@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  	 */  	if (ex1 && ex1 != ex) {  		ex1 = ex; -		ex1->ee_len = cpu_to_le16(iblock - ee_block); +		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);  		ext4_ext_mark_uninitialized(ex1);  		ex2 = &newex;  	} -	/* ex2: iblock to iblock + maxblocks-1 : initialised */ -	ex2->ee_block = cpu_to_le32(iblock); +	/* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ +	ex2->ee_block = cpu_to_le32(map->m_lblk);  	ext4_ext_store_pblock(ex2, newblock);  	ex2->ee_len = cpu_to_le16(allocated);  	if (ex2 != ex) @@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  	goto out;  insert:  	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); -	if (err == -ENOSPC) { +	if (err == -ENOSPC && may_zeroout) {  		err =  ext4_ext_zeroout(inode, &orig_ex);  		if (err)  			goto fix_extent_len; @@ -2904,7 +2923,7 @@ fix_extent_len:  }  /* - * This function is called by ext4_ext_get_blocks() from + * This function is called by ext4_ext_map_blocks() from   * ext4_get_blocks_dio_write() when DIO to write   * to an uninitialized extent.   * @@ -2927,9 +2946,8 @@ fix_extent_len:   */  static int ext4_split_unwritten_extents(handle_t *handle,  					struct inode *inode, +					struct ext4_map_blocks *map,  					struct ext4_ext_path *path, -					ext4_lblk_t iblock, -					unsigned int max_blocks,  					int flags)  {  	struct ext4_extent *ex, newex, orig_ex; @@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle,  	struct ext4_extent *ex2 = NULL;  	struct ext4_extent *ex3 = NULL;  	struct ext4_extent_header *eh; -	ext4_lblk_t ee_block; +	ext4_lblk_t ee_block, eof_block;  	unsigned int allocated, ee_len, depth;  	ext4_fsblk_t newblock;  	int err = 0; +	int may_zeroout; + +	ext_debug("ext4_split_unwritten_extents: inode %lu, logical" +		"block %llu, max_blocks %u\n", inode->i_ino, +		(unsigned long long)map->m_lblk, map->m_len); + +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> +		inode->i_sb->s_blocksize_bits; +	if (eof_block < map->m_lblk + map->m_len) +		eof_block = map->m_lblk + map->m_len; -	ext_debug("ext4_split_unwritten_extents: inode %lu," -		  "iblock %llu, max_blocks %u\n", inode->i_ino, -		  (unsigned long long)iblock, max_blocks);  	depth = ext_depth(inode);  	eh = path[depth].p_hdr;  	ex = path[depth].p_ext;  	ee_block = le32_to_cpu(ex->ee_block);  	ee_len = ext4_ext_get_actual_len(ex); -	allocated = ee_len - (iblock - ee_block); -	newblock = iblock - ee_block + ext_pblock(ex); +	allocated = ee_len - (map->m_lblk - ee_block); +	newblock = map->m_lblk - ee_block + ext_pblock(ex); +  	ex2 = ex;  	orig_ex.ee_block = ex->ee_block;  	orig_ex.ee_len   = cpu_to_le16(ee_len);  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));  	/* +	 * It is safe to convert extent to initialized via explicit +	 * zeroout only if extent is fully insde i_size or new_size. +	 */ +	may_zeroout = ee_block + ee_len <= eof_block; + +	/*   	 * If the uninitialized extent begins at the same logical   	 * block where the write begins, and the write completely   	 * covers the extent, then we don't need to split it.   	 */ -	if ((iblock == ee_block) && (allocated <= max_blocks)) +	if ((map->m_lblk == ee_block) && (allocated <= map->m_len))  		return allocated;  	err = ext4_ext_get_access(handle, inode, path + depth);  	if (err)  		goto out; -	/* ex1: ee_block to iblock - 1 : uninitialized */ -	if (iblock > ee_block) { +	/* ex1: ee_block to map->m_lblk - 1 : uninitialized */ +	if (map->m_lblk > ee_block) {  		ex1 = ex; -		ex1->ee_len = cpu_to_le16(iblock - ee_block); +		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);  		ext4_ext_mark_uninitialized(ex1);  		ex2 = &newex;  	} @@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle,  	 * we insert ex3, if ex1 is NULL. This is to avoid temporary  	 * overlap of blocks.  	 */ -	if (!ex1 && allocated > max_blocks) -		ex2->ee_len = cpu_to_le16(max_blocks); +	if (!ex1 && allocated > map->m_len) +		ex2->ee_len = cpu_to_le16(map->m_len);  	/* ex3: to ee_block + ee_len : uninitialised */ -	if (allocated > max_blocks) { +	if (allocated > map->m_len) {  		unsigned int newdepth;  		ex3 = &newex; -		ex3->ee_block = cpu_to_le32(iblock + max_blocks); -		ext4_ext_store_pblock(ex3, newblock + max_blocks); -		ex3->ee_len = cpu_to_le16(allocated - max_blocks); +		ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); +		ext4_ext_store_pblock(ex3, newblock + map->m_len); +		ex3->ee_len = cpu_to_le16(allocated - map->m_len);  		ext4_ext_mark_uninitialized(ex3);  		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); -		if (err == -ENOSPC) { +		if (err == -ENOSPC && may_zeroout) {  			err =  ext4_ext_zeroout(inode, &orig_ex);  			if (err)  				goto fix_extent_len; @@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,  			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));  			ext4_ext_dirty(handle, inode, path + depth);  			/* zeroed the full extent */ -			/* blocks available from iblock */ +			/* blocks available from map->m_lblk */  			return allocated;  		} else if (err) @@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle,  		 * update the extent length after successful insert of the  		 * split extent  		 */ -		orig_ex.ee_len = cpu_to_le16(ee_len - -						ext4_ext_get_actual_len(ex3)); +		ee_len -= ext4_ext_get_actual_len(ex3); +		orig_ex.ee_len = cpu_to_le16(ee_len); +		may_zeroout = ee_block + ee_len <= eof_block; +  		depth = newdepth;  		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, iblock, path); +		path = ext4_ext_find_extent(inode, map->m_lblk, path);  		if (IS_ERR(path)) {  			err = PTR_ERR(path);  			goto out; @@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,  		if (err)  			goto out; -		allocated = max_blocks; +		allocated = map->m_len;  	}  	/*  	 * If there was a change of depth as part of the @@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle,  	 */  	if (ex1 && ex1 != ex) {  		ex1 = ex; -		ex1->ee_len = cpu_to_le16(iblock - ee_block); +		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);  		ext4_ext_mark_uninitialized(ex1);  		ex2 = &newex;  	}  	/* -	 * ex2: iblock to iblock + maxblocks-1 : to be direct IO written, -	 * uninitialised still. +	 * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written +	 * using direct I/O, uninitialised still.  	 */ -	ex2->ee_block = cpu_to_le32(iblock); +	ex2->ee_block = cpu_to_le32(map->m_lblk);  	ext4_ext_store_pblock(ex2, newblock);  	ex2->ee_len = cpu_to_le16(allocated);  	ext4_ext_mark_uninitialized(ex2); @@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,  	goto out;  insert:  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); -	if (err == -ENOSPC) { +	if (err == -ENOSPC && may_zeroout) {  		err =  ext4_ext_zeroout(inode, &orig_ex);  		if (err)  			goto fix_extent_len; @@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,  static int  ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, -			ext4_lblk_t iblock, unsigned int max_blocks, +			struct ext4_map_blocks *map,  			struct ext4_ext_path *path, int flags, -			unsigned int allocated, struct buffer_head *bh_result, -			ext4_fsblk_t newblock) +			unsigned int allocated, ext4_fsblk_t newblock)  {  	int ret = 0;  	int err = 0; @@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,  	ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"  		  "block %llu, max_blocks %u, flags %d, allocated %u", -		  inode->i_ino, (unsigned long long)iblock, max_blocks, +		  inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,  		  flags, allocated);  	ext4_ext_show_leaf(inode, path);  	/* get_block() before submit the IO, split the extent */  	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { -		ret = ext4_split_unwritten_extents(handle, -						inode, path, iblock, -						max_blocks, flags); +		ret = ext4_split_unwritten_extents(handle, inode, map, +						   path, flags);  		/*  		 * Flag the inode(non aio case) or end_io struct (aio case)  		 * that this IO needs to convertion to written when IO is @@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,  		else  			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);  		if (ext4_should_dioread_nolock(inode)) -			set_buffer_uninit(bh_result); +			map->m_flags |= EXT4_MAP_UNINIT;  		goto out;  	}  	/* IO end_io complete, convert the filled extent to written */ @@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,  		 * the buffer head will be unmapped so that  		 * a read from the block returns 0s.  		 */ -		set_buffer_unwritten(bh_result); +		map->m_flags |= EXT4_MAP_UNWRITTEN;  		goto out1;  	}  	/* buffered write, writepage time, convert*/ -	ret = ext4_ext_convert_to_initialized(handle, inode, -						path, iblock, -						max_blocks); +	ret = ext4_ext_convert_to_initialized(handle, inode, map, path);  	if (ret >= 0)  		ext4_update_inode_fsync_trans(handle, inode, 1);  out: @@ -3226,7 +3256,7 @@ out:  		goto out2;  	} else  		allocated = ret; -	set_buffer_new(bh_result); +	map->m_flags |= EXT4_MAP_NEW;  	/*  	 * if we allocated more blocks than requested  	 * we need to make sure we unmap the extra block @@ -3234,11 +3264,11 @@ out:  	 * unmapped later when we find the buffer_head marked  	 * new.  	 */ -	if (allocated > max_blocks) { +	if (allocated > map->m_len) {  		unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, -					newblock + max_blocks, -					allocated - max_blocks); -		allocated = max_blocks; +					newblock + map->m_len, +					allocated - map->m_len); +		allocated = map->m_len;  	}  	/* @@ -3252,13 +3282,13 @@ out:  		ext4_da_update_reserve_space(inode, allocated, 0);  map_out: -	set_buffer_mapped(bh_result); +	map->m_flags |= EXT4_MAP_MAPPED;  out1: -	if (allocated > max_blocks) -		allocated = max_blocks; +	if (allocated > map->m_len) +		allocated = map->m_len;  	ext4_ext_show_leaf(inode, path); -	bh_result->b_bdev = inode->i_sb->s_bdev; -	bh_result->b_blocknr = newblock; +	map->m_pblk = newblock; +	map->m_len = allocated;  out2:  	if (path) {  		ext4_ext_drop_refs(path); @@ -3284,26 +3314,23 @@ out2:   *   * return < 0, error case.   */ -int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, -			ext4_lblk_t iblock, -			unsigned int max_blocks, struct buffer_head *bh_result, -			int flags) +int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, +			struct ext4_map_blocks *map, int flags)  {  	struct ext4_ext_path *path = NULL;  	struct ext4_extent_header *eh;  	struct ext4_extent newex, *ex, *last_ex;  	ext4_fsblk_t newblock; -	int err = 0, depth, ret, cache_type; +	int i, err = 0, depth, ret, cache_type;  	unsigned int allocated = 0;  	struct ext4_allocation_request ar;  	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; -	__clear_bit(BH_New, &bh_result->b_state);  	ext_debug("blocks %u/%u requested for inode %lu\n", -			iblock, max_blocks, inode->i_ino); +		  map->m_lblk, map->m_len, inode->i_ino);  	/* check in cache */ -	cache_type = ext4_ext_in_cache(inode, iblock, &newex); +	cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);  	if (cache_type) {  		if (cache_type == EXT4_EXT_CACHE_GAP) {  			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { @@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  			/* we should allocate requested block */  		} else if (cache_type == EXT4_EXT_CACHE_EXTENT) {  			/* block is already allocated */ -			newblock = iblock +			newblock = map->m_lblk  				   - le32_to_cpu(newex.ee_block)  				   + ext_pblock(&newex);  			/* number of remaining blocks in the extent */  			allocated = ext4_ext_get_actual_len(&newex) - -					(iblock - le32_to_cpu(newex.ee_block)); +				(map->m_lblk - le32_to_cpu(newex.ee_block));  			goto out;  		} else {  			BUG(); @@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  	}  	/* find extent for this block */ -	path = ext4_ext_find_extent(inode, iblock, NULL); +	path = ext4_ext_find_extent(inode, map->m_lblk, NULL);  	if (IS_ERR(path)) {  		err = PTR_ERR(path);  		path = NULL; @@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  	 */  	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {  		EXT4_ERROR_INODE(inode, "bad extent address " -				 "iblock: %d, depth: %d pblock %lld", -				 iblock, depth, path[depth].p_block); +				 "lblock: %lu, depth: %d pblock %lld", +				 (unsigned long) map->m_lblk, depth, +				 path[depth].p_block);  		err = -EIO;  		goto out2;  	} @@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  		 */  		ee_len = ext4_ext_get_actual_len(ex);  		/* if found extent covers block, simply return it */ -		if (in_range(iblock, ee_block, ee_len)) { -			newblock = iblock - ee_block + ee_start; +		if (in_range(map->m_lblk, ee_block, ee_len)) { +			newblock = map->m_lblk - ee_block + ee_start;  			/* number of remaining blocks in the extent */ -			allocated = ee_len - (iblock - ee_block); -			ext_debug("%u fit into %u:%d -> %llu\n", iblock, -					ee_block, ee_len, newblock); +			allocated = ee_len - (map->m_lblk - ee_block); +			ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, +				  ee_block, ee_len, newblock);  			/* Do not put uninitialized extent in the cache */  			if (!ext4_ext_is_uninitialized(ex)) { @@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  				goto out;  			}  			ret = ext4_ext_handle_uninitialized_extents(handle, -					inode, iblock, max_blocks, path, -					flags, allocated, bh_result, newblock); +					inode, map, path, flags, allocated, +					newblock);  			return ret;  		}  	} @@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  		 * put just found gap into cache to speed up  		 * subsequent requests  		 */ -		ext4_ext_put_gap_in_cache(inode, path, iblock); +		ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);  		goto out2;  	}  	/* @@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  	 */  	/* find neighbour allocated blocks */ -	ar.lleft = iblock; +	ar.lleft = map->m_lblk;  	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);  	if (err)  		goto out2; -	ar.lright = iblock; +	ar.lright = map->m_lblk;  	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);  	if (err)  		goto out2; @@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  	 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is  	 * EXT_UNINIT_MAX_LEN.  	 */ -	if (max_blocks > EXT_INIT_MAX_LEN && +	if (map->m_len > EXT_INIT_MAX_LEN &&  	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) -		max_blocks = EXT_INIT_MAX_LEN; -	else if (max_blocks > EXT_UNINIT_MAX_LEN && +		map->m_len = EXT_INIT_MAX_LEN; +	else if (map->m_len > EXT_UNINIT_MAX_LEN &&  		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) -		max_blocks = EXT_UNINIT_MAX_LEN; +		map->m_len = EXT_UNINIT_MAX_LEN; -	/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ -	newex.ee_block = cpu_to_le32(iblock); -	newex.ee_len = cpu_to_le16(max_blocks); +	/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ +	newex.ee_block = cpu_to_le32(map->m_lblk); +	newex.ee_len = cpu_to_le16(map->m_len);  	err = ext4_ext_check_overlap(inode, &newex, path);  	if (err)  		allocated = ext4_ext_get_actual_len(&newex);  	else -		allocated = max_blocks; +		allocated = map->m_len;  	/* allocate new block */  	ar.inode = inode; -	ar.goal = ext4_ext_find_goal(inode, path, iblock); -	ar.logical = iblock; +	ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); +	ar.logical = map->m_lblk;  	ar.len = allocated;  	if (S_ISREG(inode->i_mode))  		ar.flags = EXT4_MB_HINT_DATA; @@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  						     EXT4_STATE_DIO_UNWRITTEN);  		}  		if (ext4_should_dioread_nolock(inode)) -			set_buffer_uninit(bh_result); +			map->m_flags |= EXT4_MAP_UNINIT;  	} -	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { +	if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {  		if (unlikely(!eh->eh_entries)) {  			EXT4_ERROR_INODE(inode, -					 "eh->eh_entries == 0 ee_block %d", -					 ex->ee_block); +					 "eh->eh_entries == 0 and " +					 "EOFBLOCKS_FL set");  			err = -EIO;  			goto out2;  		}  		last_ex = EXT_LAST_EXTENT(eh); -		if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) -		    + ext4_ext_get_actual_len(last_ex)) -			EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; +		/* +		 * If the current leaf block was reached by looking at +		 * the last index block all the way down the tree, and +		 * we are extending the inode beyond the last extent +		 * in the current leaf block, then clear the +		 * EOFBLOCKS_FL flag. +		 */ +		for (i = depth-1; i >= 0; i--) { +			if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) +				break; +		} +		if ((i < 0) && +		    (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + +		     ext4_ext_get_actual_len(last_ex))) +			ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);  	}  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);  	if (err) { @@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  	/* previous routine could use block we allocated */  	newblock = ext_pblock(&newex);  	allocated = ext4_ext_get_actual_len(&newex); -	if (allocated > max_blocks) -		allocated = max_blocks; -	set_buffer_new(bh_result); +	if (allocated > map->m_len) +		allocated = map->m_len; +	map->m_flags |= EXT4_MAP_NEW;  	/*  	 * Update reserved blocks/metadata blocks after successful @@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,  	 * when it is _not_ an uninitialized extent.  	 */  	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { -		ext4_ext_put_in_cache(inode, iblock, allocated, newblock, +		ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,  						EXT4_EXT_CACHE_EXTENT);  		ext4_update_inode_fsync_trans(handle, inode, 1);  	} else  		ext4_update_inode_fsync_trans(handle, inode, 0);  out: -	if (allocated > max_blocks) -		allocated = max_blocks; +	if (allocated > map->m_len) +		allocated = map->m_len;  	ext4_ext_show_leaf(inode, path); -	set_buffer_mapped(bh_result); -	bh_result->b_bdev = inode->i_sb->s_bdev; -	bh_result->b_blocknr = newblock; +	map->m_flags |= EXT4_MAP_MAPPED; +	map->m_pblk = newblock; +	map->m_len = allocated;  out2:  	if (path) {  		ext4_ext_drop_refs(path); @@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode,  		 * can proceed even if the new size is the same as i_size.  		 */  		if (new_size > i_size_read(inode)) -			EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; +			ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);  	}  } @@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode,  long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)  {  	handle_t *handle; -	ext4_lblk_t block;  	loff_t new_size;  	unsigned int max_blocks;  	int ret = 0;  	int ret2 = 0;  	int retries = 0; -	struct buffer_head map_bh; +	struct ext4_map_blocks map;  	unsigned int credits, blkbits = inode->i_blkbits;  	/*  	 * currently supporting (pre)allocate mode for extent-based  	 * files _only_  	 */ -	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) +	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))  		return -EOPNOTSUPP;  	/* preallocation to directories is currently not supported */  	if (S_ISDIR(inode->i_mode))  		return -ENODEV; -	block = offset >> blkbits; +	map.m_lblk = offset >> blkbits;  	/*  	 * We can't just convert len to max_blocks because  	 * If blocksize = 4096 offset = 3072 and len = 2048  	 */  	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -							- block; +		- map.m_lblk;  	/*  	 * credits to insert 1 extent into extent tree  	 */  	credits = ext4_chunk_trans_blocks(inode, max_blocks);  	mutex_lock(&inode->i_mutex); +	ret = inode_newsize_ok(inode, (len + offset)); +	if (ret) { +		mutex_unlock(&inode->i_mutex); +		return ret; +	}  retry:  	while (ret >= 0 && ret < max_blocks) { -		block = block + ret; -		max_blocks = max_blocks - ret; +		map.m_lblk = map.m_lblk + ret; +		map.m_len = max_blocks = max_blocks - ret;  		handle = ext4_journal_start(inode, credits);  		if (IS_ERR(handle)) {  			ret = PTR_ERR(handle);  			break;  		} -		map_bh.b_state = 0; -		ret = ext4_get_blocks(handle, inode, block, -				      max_blocks, &map_bh, +		ret = ext4_map_blocks(handle, inode, &map,  				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);  		if (ret <= 0) {  #ifdef EXT4FS_DEBUG  			WARN_ON(ret <= 0); -			printk(KERN_ERR "%s: ext4_ext_get_blocks " +			printk(KERN_ERR "%s: ext4_ext_map_blocks "  				    "returned error inode#%lu, block=%u, "  				    "max_blocks=%u", __func__,  				    inode->i_ino, block, max_blocks); @@ -3697,14 +3739,14 @@ retry:  			ret2 = ext4_journal_stop(handle);  			break;  		} -		if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, +		if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,  						blkbits) >> blkbits))  			new_size = offset + len;  		else -			new_size = (block + ret) << blkbits; +			new_size = (map.m_lblk + ret) << blkbits;  		ext4_falloc_update_inode(inode, mode, new_size, -						buffer_new(&map_bh)); +					 (map.m_flags & EXT4_MAP_NEW));  		ext4_mark_inode_dirty(handle, inode);  		ret2 = ext4_journal_stop(handle);  		if (ret2) @@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,  				    ssize_t len)  {  	handle_t *handle; -	ext4_lblk_t block;  	unsigned int max_blocks;  	int ret = 0;  	int ret2 = 0; -	struct buffer_head map_bh; +	struct ext4_map_blocks map;  	unsigned int credits, blkbits = inode->i_blkbits; -	block = offset >> blkbits; +	map.m_lblk = offset >> blkbits;  	/*  	 * We can't just convert len to max_blocks because  	 * If blocksize = 4096 offset = 3072 and len = 2048  	 */ -	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -							- block; +	max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - +		      map.m_lblk);  	/*  	 * credits to insert 1 extent into extent tree  	 */  	credits = ext4_chunk_trans_blocks(inode, max_blocks);  	while (ret >= 0 && ret < max_blocks) { -		block = block + ret; -		max_blocks = max_blocks - ret; +		map.m_lblk += ret; +		map.m_len = (max_blocks -= ret);  		handle = ext4_journal_start(inode, credits);  		if (IS_ERR(handle)) {  			ret = PTR_ERR(handle);  			break;  		} -		map_bh.b_state = 0; -		ret = ext4_get_blocks(handle, inode, block, -				      max_blocks, &map_bh, +		ret = ext4_map_blocks(handle, inode, &map,  				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);  		if (ret <= 0) {  			WARN_ON(ret <= 0); -			printk(KERN_ERR "%s: ext4_ext_get_blocks " +			printk(KERN_ERR "%s: ext4_ext_map_blocks "  				    "returned error inode#%lu, block=%u, "  				    "max_blocks=%u", __func__, -				    inode->i_ino, block, max_blocks); +				    inode->i_ino, map.m_lblk, map.m_len);  		}  		ext4_mark_inode_dirty(handle, inode);  		ret2 = ext4_journal_stop(handle); @@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  	int error = 0;  	/* fallback to generic here if not in extents fmt */ -	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) +	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))  		return generic_block_fiemap(inode, fieinfo, start, len,  			ext4_get_block);  | 
