diff options
-rw-r--r-- | fs/btrfs/extent_io.c | 217 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 11 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 20 |
3 files changed, 168 insertions, 80 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 74ba2e1a3927..848dd54535f6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2449,7 +2449,7 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end) } static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode, - u64 start, u64 end) + u64 start) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct io_failure_record *failrec; @@ -2457,6 +2457,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + const u32 sectorsize = fs_info->sectorsize; int ret; u64 logical; @@ -2480,7 +2481,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode return ERR_PTR(-ENOMEM); failrec->start = start; - failrec->len = end - start + 1; + failrec->len = sectorsize; failrec->this_mirror = 0; failrec->bio_flags = 0; failrec->in_validation = 0; @@ -2519,12 +2520,13 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode free_extent_map(em); /* Set the bits in the private failure tree */ - ret = set_extent_bits(failure_tree, start, end, + ret = set_extent_bits(failure_tree, start, start + sectorsize - 1, EXTENT_LOCKED | EXTENT_DIRTY); if (ret >= 0) { ret = set_state_failrec(failure_tree, start, failrec); /* Set the bits in the inode's tree */ - ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED); + ret = set_extent_bits(tree, start, start + sectorsize - 1, + EXTENT_DAMAGED); } else if (ret < 0) { kfree(failrec); return ERR_PTR(ret); @@ -2639,11 +2641,11 @@ static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio) return false; } -blk_status_t btrfs_submit_read_repair(struct inode *inode, - struct bio *failed_bio, u32 bio_offset, - struct page *page, unsigned int pgoff, - u64 start, u64 end, int failed_mirror, - submit_bio_hook_t *submit_bio_hook) +int btrfs_repair_one_sector(struct inode *inode, + struct bio *failed_bio, u32 bio_offset, + struct page *page, unsigned int pgoff, + u64 start, int failed_mirror, + submit_bio_hook_t *submit_bio_hook) { struct io_failure_record *failrec; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -2661,16 +2663,22 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode, BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); - failrec = btrfs_get_io_failure_record(inode, start, end); + failrec = btrfs_get_io_failure_record(inode, start); if (IS_ERR(failrec)) - return errno_to_blk_status(PTR_ERR(failrec)); - - need_validation = btrfs_io_needs_validation(inode, failed_bio); + return PTR_ERR(failrec); + /* + * We will only submit repair for one sector, thus we don't need + * extra validation anymore. + * + * TODO: All those extra validation related code will be cleaned up + * later. + */ + need_validation = false; if (!btrfs_check_repairable(inode, need_validation, failrec, failed_mirror)) { free_io_failure(failure_tree, tree, failrec); - return BLK_STS_IOERR; + return -EIO; } repair_bio = btrfs_io_bio_alloc(1); @@ -2704,7 +2712,120 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode, free_io_failure(failure_tree, tree, failrec); bio_put(repair_bio); } - return status; + return blk_status_to_errno(status); +} + +static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); + + ASSERT(page_offset(page) <= start && + start + len <= page_offset(page) + PAGE_SIZE); + + /* + * For subapge metadata case, all btrfs_page_* helpers need page to + * have page::private populated. + * But we can have rare case where the last eb in the page is only + * referred by the IO, and it gets released immedately after it's + * read and verified. + * + * This can detach the page private completely. + * In that case, we can just skip the page status update completely, + * as the page has no eb anymore. + */ + if (fs_info->sectorsize < PAGE_SIZE && unlikely(!PagePrivate(page))) { + ASSERT(!is_data_inode(page->mapping->host)); + return; + } + if (uptodate) { + btrfs_page_set_uptodate(fs_info, page, start, len); + } else { + btrfs_page_clear_uptodate(fs_info, page, start, len); + btrfs_page_set_error(fs_info, page, start, len); + } + + if (fs_info->sectorsize == PAGE_SIZE) + unlock_page(page); + else if (is_data_inode(page->mapping->host)) + /* + * For subpage data, unlock the page if we're the last reader. + * For subpage metadata, page lock is not utilized for read. + */ + btrfs_subpage_end_reader(fs_info, page, start, len); +} + +static blk_status_t submit_read_repair(struct inode *inode, + struct bio *failed_bio, u32 bio_offset, + struct page *page, unsigned int pgoff, + u64 start, u64 end, int failed_mirror, + unsigned int error_bitmap, + submit_bio_hook_t *submit_bio_hook) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + const u32 sectorsize = fs_info->sectorsize; + const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits; + int error = 0; + int i; + + BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); + + /* We're here because we had some read errors or csum mismatch */ + ASSERT(error_bitmap); + + /* + * We only get called on buffered IO, thus page must be mapped and bio + * must not be cloned. + */ + ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED)); + + /* Iterate through all the sectors in the range */ + for (i = 0; i < nr_bits; i++) { + const unsigned int offset = i * sectorsize; + struct extent_state *cached = NULL; + bool uptodate = false; + int ret; + + if (!(error_bitmap & (1U << i))) { + /* + * This sector has no error, just end the page read + * and unlock the range. + */ + uptodate = true; + goto next; + } + + ret = btrfs_repair_one_sector(inode, failed_bio, + bio_offset + offset, + page, pgoff + offset, start + offset, + failed_mirror, submit_bio_hook); + if (!ret) { + /* + * We have submitted the read repair, the page release + * will be handled by the endio function of the + * submitted repair bio. + * Thus we don't need to do any thing here. + */ + continue; + } + /* + * Repair failed, just record the error but still continue. + * Or the remaining sectors will not be properly unlocked. + */ + if (!error) + error = ret; +next: + end_page_read(page, uptodate, start + offset, sectorsize); + if (uptodate) + set_extent_uptodate(&BTRFS_I(inode)->io_tree, + start + offset, + start + offset + sectorsize - 1, + &cached, GFP_ATOMIC); + unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree, + start + offset, + start + offset + sectorsize - 1, + &cached); + } + return errno_to_blk_status(error); } /* lots and lots of room for performance fixes in the end_bio funcs */ @@ -2862,30 +2983,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page) btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE); } -static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); - - ASSERT(page_offset(page) <= start && - start + len <= page_offset(page) + PAGE_SIZE); - - if (uptodate) { - btrfs_page_set_uptodate(fs_info, page, start, len); - } else { - btrfs_page_clear_uptodate(fs_info, page, start, len); - btrfs_page_set_error(fs_info, page, start, len); - } - - if (fs_info->sectorsize == PAGE_SIZE) - unlock_page(page); - else if (is_data_inode(page->mapping->host)) - /* - * For subpage data, unlock the page if we're the last reader. - * For subpage metadata, page lock is not utilized for read. - */ - btrfs_subpage_end_reader(fs_info, page, start, len); -} - /* * Find extent buffer for a givne bytenr. * @@ -2929,7 +3026,6 @@ static struct extent_buffer *find_extent_buffer_readpage( static void end_bio_extent_readpage(struct bio *bio) { struct bio_vec *bvec; - int uptodate = !bio->bi_status; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree, *failure_tree; struct processed_extent processed = { 0 }; @@ -2944,10 +3040,12 @@ static void end_bio_extent_readpage(struct bio *bio) ASSERT(!bio_flagged(bio, BIO_CLONED)); bio_for_each_segment_all(bvec, bio, iter_all) { + bool uptodate = !bio->bi_status; struct page *page = bvec->bv_page; struct inode *inode = page->mapping->host; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); const u32 sectorsize = fs_info->sectorsize; + unsigned int error_bitmap = (unsigned int)-1; u64 start; u64 end; u32 len; @@ -2982,14 +3080,16 @@ static void end_bio_extent_readpage(struct bio *bio) mirror = io_bio->mirror_num; if (likely(uptodate)) { - if (is_data_inode(inode)) - ret = btrfs_verify_data_csum(io_bio, + if (is_data_inode(inode)) { + error_bitmap = btrfs_verify_data_csum(io_bio, bio_offset, page, start, end); - else + ret = error_bitmap; + } else { ret = btrfs_validate_metadata_buffer(io_bio, page, start, end, mirror); + } if (ret) - uptodate = 0; + uptodate = false; else clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, tree, start, @@ -3001,27 +3101,18 @@ static void end_bio_extent_readpage(struct bio *bio) goto readpage_ok; if (is_data_inode(inode)) { - /* - * The generic bio_readpage_error handles errors the - * following way: If possible, new read requests are - * created and submitted and will end up in - * end_bio_extent_readpage as well (if we're lucky, - * not in the !uptodate case). In that case it returns - * 0 and we just go on with the next page in our bio. - * If it can't handle the error it will return -EIO and - * we remain responsible for that page. + * btrfs_submit_read_repair() will handle all the good + * and bad sectors, we just continue to the next bvec. */ - if (!btrfs_submit_read_repair(inode, bio, bio_offset, - page, - start - page_offset(page), - start, end, mirror, - btrfs_submit_data_bio)) { - uptodate = !bio->bi_status; - ASSERT(bio_offset + len > bio_offset); - bio_offset += len; - continue; - } + submit_read_repair(inode, bio, bio_offset, page, + start - page_offset(page), start, + end, mirror, error_bitmap, + btrfs_submit_data_bio); + + ASSERT(bio_offset + len > bio_offset); + bio_offset += len; + continue; } else { struct extent_buffer *eb; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 227215a5722c..bbb4ef4d0934 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -296,12 +296,11 @@ struct io_failure_record { int in_validation; }; - -blk_status_t btrfs_submit_read_repair(struct inode *inode, - struct bio *failed_bio, u32 bio_offset, - struct page *page, unsigned int pgoff, - u64 start, u64 end, int failed_mirror, - submit_bio_hook_t *submit_bio_hook); +int btrfs_repair_one_sector(struct inode *inode, + struct bio *failed_bio, u32 bio_offset, + struct page *page, unsigned int pgoff, + u64 start, int failed_mirror, + submit_bio_hook_t *submit_bio_hook); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS bool find_lock_delalloc_range(struct inode *inode, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3211469eb90f..c70ae0a5df92 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7945,19 +7945,17 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, btrfs_ino(BTRFS_I(inode)), pgoff); } else { - blk_status_t status; + int ret; ASSERT((start - io_bio->logical) < UINT_MAX); - status = btrfs_submit_read_repair(inode, - &io_bio->bio, - start - io_bio->logical, - bvec.bv_page, pgoff, - start, - start + sectorsize - 1, - io_bio->mirror_num, - submit_dio_repair_bio); - if (status) - err = status; + ret = btrfs_repair_one_sector(inode, + &io_bio->bio, + start - io_bio->logical, + bvec.bv_page, pgoff, + start, io_bio->mirror_num, + submit_dio_repair_bio); + if (ret) + err = errno_to_blk_status(ret); } start += sectorsize; ASSERT(bio_offset + sectorsize > bio_offset); |