From d47992f86b307985b3215bcf141d56d1849d71df Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 21 May 2013 23:17:23 -0400 Subject: mm: change invalidatepage prototype to accept length Currently there is no way to truncate partial page where the end truncate point is not at the end of the page. This is because it was not needed and the functionality was enough for file system truncate operation to work properly. However more file systems now support punch hole feature and it can benefit from mm supporting truncating page just up to the certain point. Specifically, with this functionality truncate_inode_pages_range() can be changed so it supports truncating partial page at the end of the range (currently it will BUG_ON() if 'end' is not at the end of the page). This commit changes the invalidatepage() address space operation prototype to accept range to be invalidated and update all the instances for it. We also change the block_invalidatepage() in the same way and actually make a use of the new length argument implementing range invalidation. Actual file system implementations will follow except the file systems where the changes are really simple and should not change the behaviour in any way .Implementation for truncate_page_range() which will be able to accept page unaligned ranges will follow as well. Signed-off-by: Lukas Czerner Cc: Andrew Morton Cc: Hugh Dickins --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e7e7afb4a872..6bca9472f313 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2957,7 +2957,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || (page->index == end_index && !pg_offset)) { - page->mapping->a_ops->invalidatepage(page, 0); + page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); unlock_page(page); return 0; } -- cgit v1.2.3-70-g09d2 From 8d599ae1bfc035c38660cdc1a756ae4150d25e01 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 30 Apr 2013 15:22:23 +0000 Subject: btrfs: add debug check for extent_io range alignment The 'end' value must exactly cover the end of the interval, which means one byte less than the expected block alignment, or in case of a file smaller than one block, one byte less than the inode size. Signed-off-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b306b3a88fc7..03ca3ab95898 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -77,10 +77,29 @@ void btrfs_leak_debug_check(void) kmem_cache_free(extent_buffer_cache, eb); } } + +#define btrfs_debug_check_extent_io_range(inode, start, end) \ + __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end)) +static inline void __btrfs_debug_check_extent_io_range(const char *caller, + struct inode *inode, u64 start, u64 end) +{ + u64 isize = i_size_read(inode); + + if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { + printk_ratelimited(KERN_DEBUG + "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", + caller, + (unsigned long long)btrfs_ino(inode), + (unsigned long long)isize, + (unsigned long long)start, + (unsigned long long)end); + } +} #else #define btrfs_leak_debug_add(new, head) do {} while (0) #define btrfs_leak_debug_del(entry) do {} while (0) #define btrfs_leak_debug_check() do {} while (0) +#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) #endif #define BUFFER_LRU_MAX 64 @@ -522,6 +541,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int err; int clear = 0; + btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + if (delete) bits |= ~EXTENT_CTLBITS; bits |= EXTENT_FIRST_DELALLOC; @@ -677,6 +698,8 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct rb_node *node; + btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + spin_lock(&tree->lock); again: while (1) { @@ -769,6 +792,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u64 last_start; u64 last_end; + btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + bits |= EXTENT_FIRST_DELALLOC; again: if (!prealloc && (mask & __GFP_WAIT)) { @@ -989,6 +1014,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u64 last_start; u64 last_end; + btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + again: if (!prealloc && (mask & __GFP_WAIT)) { prealloc = alloc_extent_state(mask); -- cgit v1.2.3-70-g09d2 From a71754fc68f740b7ed46bb83123c63fbbc130611 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 17 Jun 2013 17:14:39 -0400 Subject: Btrfs: move btrfs_truncate_page to btrfs_cont_expand instead of btrfs_truncate This has plagued us forever and I'm so over working around it. When we truncate down to a non-page aligned offset we will call btrfs_truncate_page to zero out the end of the page and write it back to disk, this will keep us from exposing stale data if we truncate back up from that point. The problem with this is it requires data space to do this, and people don't really expect to get ENOSPC from truncate() for these sort of things. This also tends to bite the orphan cleanup stuff too which keeps people from mounting. To get around this we can just move this into btrfs_cont_expand() to make sure if we are truncating up from a non-page size aligned i_size we will zero out the rest of this page so that we don't expose stale data. This will give ENOSPC if you try to truncate() up or if you try to write past the end of isize, which is much more reasonable. This fixes xfstests generic/083 failing to mount because of the orphan cleanup failing. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 11 ++++++++++- fs/btrfs/file.c | 21 +++++++++++++++------ fs/btrfs/inode.c | 15 ++++++++++----- 3 files changed, 35 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 03ca3ab95898..a83d7019ede9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2477,11 +2477,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct extent_state *cached = NULL; struct extent_state *state; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " "mirror=%lu\n", (u64)bio->bi_sector, err, io_bio->mirror_num); - tree = &BTRFS_I(page->mapping->host)->io_tree; + tree = &BTRFS_I(inode)->io_tree; /* We always issue full-page reads, but if some block * in a page fails to read, blk_update_request() will @@ -2555,6 +2556,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err) unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); if (uptodate) { + loff_t i_size = i_size_read(inode); + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + + /* Zero out the end if this page straddles i_size */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index == end_index && offset) + zero_user_segment(page, offset, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { ClearPageUptodate(page); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 185af15ad9e4..5ffde5603686 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2173,12 +2173,6 @@ static long btrfs_fallocate(struct file *file, int mode, goto out_reserve_fail; } - /* - * wait for ordered IO before we have any locks. We'll loop again - * below with the locks held. - */ - btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); - mutex_lock(&inode->i_mutex); ret = inode_newsize_ok(inode, alloc_end); if (ret) @@ -2189,8 +2183,23 @@ static long btrfs_fallocate(struct file *file, int mode, alloc_start); if (ret) goto out; + } else { + /* + * If we are fallocating from the end of the file onward we + * need to zero out the end of the page if i_size lands in the + * middle of a page. + */ + ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); + if (ret) + goto out; } + /* + * wait for ordered IO before we have any locks. We'll loop again + * below with the locks held. + */ + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 509112da6118..b7fa96f72ecd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4253,6 +4253,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) u64 hole_size; int err = 0; + /* + * If our size started in the middle of a page we need to zero out the + * rest of the page before we expand the i_size, otherwise we could + * expose stale data. + */ + err = btrfs_truncate_page(inode, oldsize, 0, 0); + if (err) + return err; + if (size <= hole_start) return 0; @@ -7565,16 +7574,12 @@ static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_rsv *rsv; - int ret; + int ret = 0; int err = 0; struct btrfs_trans_handle *trans; u64 mask = root->sectorsize - 1; u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); - ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); - if (ret) - return ret; - btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); -- cgit v1.2.3-70-g09d2 From 7ee9e4405f264e9eda808aa5ca4522746a1af9c1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 21 Jun 2013 16:37:03 -0400 Subject: Btrfs: check if we can nocow if we don't have data space We always just try and reserve data space when we write, but if we are out of space but have prealloc'ed extents we should still successfully write. This patch will try and see if we can write to prealloc'ed space and if we can go ahead and allow the write to continue. With this patch we now pass xfstests generic/274. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 4 ++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/extent_io.c | 3 ++ fs/btrfs/extent_io.h | 1 + fs/btrfs/file.c | 125 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/inode.c | 40 ++++++++++------ 6 files changed, 148 insertions(+), 26 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b528a5509cb8..e795bf135e80 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); +noinline int can_nocow_extent(struct btrfs_trans_handle *trans, + struct inode *inode, u64 offset, u64 *len, + u64 *orig_start, u64 *orig_block_len, + u64 *ram_bytes); /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5154b91f6380..11ba82e43e8b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3666,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) data_sinfo = root->fs_info->data_sinfo; spin_lock(&data_sinfo->lock); + WARN_ON(data_sinfo->bytes_may_use < bytes); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", data_sinfo->flags, bytes, 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a83d7019ede9..f8586a957a02 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -543,6 +543,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); + if (bits & EXTENT_DELALLOC) + bits |= EXTENT_NORESERVE; + if (delete) bits |= ~EXTENT_CTLBITS; bits |= EXTENT_FIRST_DELALLOC; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 41fb81e7ec53..3b8c4e26e1da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -19,6 +19,7 @@ #define EXTENT_FIRST_DELALLOC (1 << 12) #define EXTENT_NEED_WAIT (1 << 13) #define EXTENT_DAMAGED (1 << 14) +#define EXTENT_NORESERVE (1 << 15) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5ffde5603686..2d70849cec92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1312,6 +1312,56 @@ fail: } +static noinline int check_can_nocow(struct inode *inode, loff_t pos, + size_t *write_bytes) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ordered_extent *ordered; + u64 lockstart, lockend; + u64 num_bytes; + int ret; + + lockstart = round_down(pos, root->sectorsize); + lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; + + while (1) { + lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); + ordered = btrfs_lookup_ordered_range(inode, lockstart, + lockend - lockstart + 1); + if (!ordered) { + break; + } + unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); + return PTR_ERR(trans); + } + + num_bytes = lockend - lockstart + 1; + ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, + NULL); + btrfs_end_transaction(trans, root); + if (ret <= 0) { + ret = 0; + } else { + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, + NULL, GFP_NOFS); + *write_bytes = min_t(size_t, *write_bytes, num_bytes); + } + + unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); + + return ret; +} + static noinline ssize_t __btrfs_buffered_write(struct file *file, struct iov_iter *i, loff_t pos) @@ -1319,10 +1369,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; + u64 release_bytes = 0; unsigned long first_index; size_t num_written = 0; int nrptrs; int ret = 0; + bool only_release_metadata = false; bool force_page_uptodate = false; nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / @@ -1343,6 +1395,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, offset); size_t num_pages = (write_bytes + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + size_t reserve_bytes; size_t dirty_pages; size_t copied; @@ -1357,11 +1410,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, break; } - ret = btrfs_delalloc_reserve_space(inode, - num_pages << PAGE_CACHE_SHIFT); + reserve_bytes = num_pages << PAGE_CACHE_SHIFT; + ret = btrfs_check_data_free_space(inode, reserve_bytes); + if (ret == -ENOSPC && + (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC))) { + ret = check_can_nocow(inode, pos, &write_bytes); + if (ret > 0) { + only_release_metadata = true; + /* + * our prealloc extent may be smaller than + * write_bytes, so scale down. + */ + num_pages = (write_bytes + offset + + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + reserve_bytes = num_pages << PAGE_CACHE_SHIFT; + ret = 0; + } else { + ret = -ENOSPC; + } + } + if (ret) break; + ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); + if (ret) { + if (!only_release_metadata) + btrfs_free_reserved_data_space(inode, + reserve_bytes); + break; + } + + release_bytes = reserve_bytes; + /* * This is going to setup the pages array with the number of * pages we want, so we don't really need to worry about the @@ -1370,11 +1453,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ret = prepare_pages(root, file, pages, num_pages, pos, first_index, write_bytes, force_page_uptodate); - if (ret) { - btrfs_delalloc_release_space(inode, - num_pages << PAGE_CACHE_SHIFT); + if (ret) break; - } copied = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, i); @@ -1404,30 +1484,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * managed to copy. */ if (num_pages > dirty_pages) { + release_bytes = (num_pages - dirty_pages) << + PAGE_CACHE_SHIFT; if (copied > 0) { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } - btrfs_delalloc_release_space(inode, - (num_pages - dirty_pages) << - PAGE_CACHE_SHIFT); + if (only_release_metadata) + btrfs_delalloc_release_metadata(inode, + release_bytes); + else + btrfs_delalloc_release_space(inode, + release_bytes); } + release_bytes = dirty_pages << PAGE_CACHE_SHIFT; if (copied > 0) { ret = btrfs_dirty_pages(root, inode, pages, dirty_pages, pos, copied, NULL); if (ret) { - btrfs_delalloc_release_space(inode, - dirty_pages << PAGE_CACHE_SHIFT); btrfs_drop_pages(pages, num_pages); break; } } + release_bytes = 0; btrfs_drop_pages(pages, num_pages); + if (only_release_metadata && copied > 0) { + u64 lockstart = round_down(pos, root->sectorsize); + u64 lockend = lockstart + + (dirty_pages << PAGE_CACHE_SHIFT) - 1; + + set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, EXTENT_NORESERVE, NULL, + NULL, GFP_NOFS); + only_release_metadata = false; + } + cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); @@ -1440,6 +1536,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, kfree(pages); + if (release_bytes) { + if (only_release_metadata) + btrfs_delalloc_release_metadata(inode, release_bytes); + else + btrfs_delalloc_release_space(inode, release_bytes); + } + return num_written ? num_written : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8edcdf6910f7..4d7c02258390 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1641,7 +1641,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, btrfs_delalloc_release_metadata(inode, len); if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID - && do_list) + && do_list && !(state->state & EXTENT_NORESERVE)) btrfs_free_reserved_data_space(inode, len); __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, @@ -6396,10 +6396,10 @@ out: * returns 1 when the nocow is safe, < 1 on error, 0 if the * block must be cow'd */ -static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, - u64 *orig_start, u64 *orig_block_len, - u64 *ram_bytes) +noinline int can_nocow_extent(struct btrfs_trans_handle *trans, + struct inode *inode, u64 offset, u64 *len, + u64 *orig_start, u64 *orig_block_len, + u64 *ram_bytes) { struct btrfs_path *path; int ret; @@ -6413,7 +6413,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, u64 num_bytes; int slot; int found_type; - + bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW); path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -6453,18 +6453,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, /* not a regular extent, must cow */ goto out; } + + if (!nocow && found_type == BTRFS_FILE_EXTENT_REG) + goto out; + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + if (disk_bytenr == 0) + goto out; + + if (btrfs_file_extent_compression(leaf, fi) || + btrfs_file_extent_encryption(leaf, fi) || + btrfs_file_extent_other_encoding(leaf, fi)) + goto out; + backref_offset = btrfs_file_extent_offset(leaf, fi); - *orig_start = key.offset - backref_offset; - *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); - *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); + if (orig_start) { + *orig_start = key.offset - backref_offset; + *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); + *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); + } extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); - if (extent_end < offset + *len) { - /* extent doesn't include our full range, must cow */ - goto out; - } if (btrfs_extent_readonly(root, disk_bytenr)) goto out; @@ -6708,8 +6718,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (IS_ERR(trans)) goto must_cow; - if (can_nocow_odirect(trans, inode, start, &len, &orig_start, - &orig_block_len, &ram_bytes) == 1) { + if (can_nocow_extent(trans, inode, start, &len, &orig_start, + &orig_block_len, &ram_bytes) == 1) { if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); em = create_pinned_em(inode, start, len, -- cgit v1.2.3-70-g09d2 From b76bb70136375c32d3b0bbbe2ebef738913d5b90 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Jul 2013 13:52:51 -0400 Subject: Btrfs: do not offset physical if we're compressed xfstest btrfs/276 was freaking out on slower boxes partly because fiemap was offsetting the physical based on the extent offset. This is perfectly fine with uncompressed extents, however the extent offset is into the uncompressed area, not the compressed. So we can return a physical value that isn't at all within the area we have allocated on disk. Fix this by returning the start of the extent if it is compressed no matter what the offset. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 583d98bd065e..fe443fece851 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4048,7 +4048,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } while (!end) { - u64 offset_in_extent; + u64 offset_in_extent = 0; /* break if the extent we found is outside the range */ if (em->start >= max || extent_map_end(em) < off) @@ -4064,9 +4064,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* * record the offset from the start of the extent - * for adjusting the disk offset below + * for adjusting the disk offset below. Only do this if the + * extent isn't compressed since our in ram offset may be past + * what we have actually allocated on disk. */ - offset_in_extent = em_start - em->start; + if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + offset_in_extent = em_start - em->start; em_end = extent_map_end(em); em_len = em_end - em_start; emflags = em->flags; -- cgit v1.2.3-70-g09d2