diff options
-rw-r--r-- | MAINTAINERS | 6 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 84 | ||||
-rw-r--r-- | fs/btrfs/backref.h | 1 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-io-tree.c | 15 | ||||
-rw-r--r-- | fs/btrfs/send.c | 5 | ||||
-rw-r--r-- | fs/btrfs/send.h | 5 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 3 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 17 | ||||
-rw-r--r-- | fs/erofs/zdata.h | 6 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 22 |
11 files changed, 114 insertions, 52 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 07938eab80dd..ca063a504026 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4459,13 +4459,15 @@ M: Josef Bacik <josef@toxicpanda.com> M: David Sterba <dsterba@suse.com> L: linux-btrfs@vger.kernel.org S: Maintained -W: http://btrfs.wiki.kernel.org/ -Q: http://patchwork.kernel.org/project/linux-btrfs/list/ +W: https://btrfs.readthedocs.io +W: https://btrfs.wiki.kernel.org/ +Q: https://patchwork.kernel.org/project/linux-btrfs/list/ C: irc://irc.libera.chat/btrfs T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git F: Documentation/filesystems/btrfs.rst F: fs/btrfs/ F: include/linux/btrfs* +F: include/trace/events/btrfs.h F: include/uapi/linux/btrfs* BTTV VIDEO4LINUX DRIVER diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dce3a16996b9..4ec18ceb2f21 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -138,6 +138,7 @@ struct share_check { u64 root_objectid; u64 inum; int share_count; + bool have_delayed_delete_refs; }; static inline int extent_is_shared(struct share_check *sc) @@ -820,16 +821,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_key key; - struct btrfs_key tmp_op_key; struct rb_node *n; int count; int ret = 0; - if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); - spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, @@ -855,10 +851,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; + struct btrfs_key *key_ptr = NULL; + + if (head->extent_op && head->extent_op->update_key) { + btrfs_disk_key_to_cpu(&key, &head->extent_op->key); + key_ptr = &key; + } ref = btrfs_delayed_node_to_tree_ref(node); ret = add_indirect_ref(fs_info, preftrees, ref->root, - &tmp_op_key, ref->level + 1, + key_ptr, ref->level + 1, node->bytenr, count, sc, GFP_ATOMIC); break; @@ -884,13 +886,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* - * Found a inum that doesn't match our known inum, we - * know it's shared. + * If we have a share check context and a reference for + * another inode, we can't exit immediately. This is + * because even if this is a BTRFS_ADD_DELAYED_REF + * reference we may find next a BTRFS_DROP_DELAYED_REF + * which cancels out this ADD reference. + * + * If this is a DROP reference and there was no previous + * ADD reference, then we need to signal that when we + * process references from the extent tree (through + * add_inline_refs() and add_keyed_refs()), we should + * not exit early if we find a reference for another + * inode, because one of the delayed DROP references + * may cancel that reference in the extent tree. */ - if (sc && sc->inum && ref->objectid != sc->inum) { - ret = BACKREF_FOUND_SHARED; - goto out; - } + if (sc && count < 0) + sc->have_delayed_delete_refs = true; ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, @@ -920,7 +931,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); -out: + spin_unlock(&head->lock); return ret; } @@ -1023,7 +1034,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1033,6 +1045,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); + break; } default: @@ -1122,7 +1135,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1522,6 +1536,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache { struct btrfs_backref_shared_cache_entry *entry; + if (!cache->use_cache) + return false; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return false; @@ -1557,6 +1574,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache return false; *is_shared = entry->is_shared; + /* + * If the node at this level is shared, than all nodes below are also + * shared. Currently some of the nodes below may be marked as not shared + * because we have just switched from one leaf to another, and switched + * also other nodes above the leaf and below the current level, so mark + * them as shared. + */ + if (*is_shared) { + for (int i = 0; i < level; i++) { + cache->entries[i].is_shared = true; + cache->entries[i].gen = entry->gen; + } + } return true; } @@ -1573,6 +1603,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, struct btrfs_backref_shared_cache_entry *entry; u64 gen; + if (!cache->use_cache) + return; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return; @@ -1648,6 +1681,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, + .have_delayed_delete_refs = false, }; int level; @@ -1669,6 +1703,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); + cache->use_cache = true; while (1) { bool is_shared; bool cached; @@ -1698,6 +1733,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, extent_gen > btrfs_root_last_snapshot(&root->root_item)) break; + /* + * If our data extent was not directly shared (without multiple + * reference items), than it might have a single reference item + * with a count > 1 for the same offset, which means there are 2 + * (or more) file extent items that point to the data extent - + * this happens when a file extent item needs to be split and + * then one item gets moved to another leaf due to a b+tree leaf + * split when inserting some item. In this case the file extent + * items may be located in different leaves and therefore some + * of the leaves may be referenced through shared subtrees while + * others are not. Since our extent buffer cache only works for + * a single path (by far the most common case and simpler to + * deal with), we can not use it if we have multiple leaves + * (which implies multiple paths). + */ + if (level == -1 && tmp->nnodes > 1) + cache->use_cache = false; + if (level >= 0) store_backref_shared_cache(cache, root, bytenr, level, false); @@ -1713,6 +1766,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; } shared.share_count = 0; + shared.have_delayed_delete_refs = false; cond_resched(); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 52ae6957b414..8e69584d538d 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache { * a given data extent should never exceed the maximum b+tree height. */ struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; + bool use_cache; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 32c415cfbdfe..deebc8ddbd93 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -774,10 +774,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - /* REVIEW */ if (wait && caching_ctl) ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); - /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */ if (caching_ctl) btrfs_put_caching_control(caching_ctl); diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 618275af19c4..83cb0378096f 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, int err; u64 failed_start; - while (1) { + err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, + cached_state, NULL, GFP_NOFS); + while (err == -EEXIST) { + if (failed_start != start) + clear_extent_bit(tree, start, failed_start - 1, + EXTENT_LOCKED, cached_state); + + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, cached_state, NULL, GFP_NOFS); - if (err == -EEXIST) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else - break; - WARN_ON(start > end); } return err; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ef4167072b8..ec6e1752af2c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -348,6 +348,7 @@ static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd) switch (sctx->proto) { case 1: return cmd <= BTRFS_SEND_C_MAX_V1; case 2: return cmd <= BTRFS_SEND_C_MAX_V2; + case 3: return cmd <= BTRFS_SEND_C_MAX_V3; default: return false; } } @@ -6469,7 +6470,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret < 0) goto out; } - if (sctx->cur_inode_needs_verity) { + + if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY) + && sctx->cur_inode_needs_verity) { ret = process_verity(sctx); if (ret < 0) goto out; diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 0a4537775e0c..f7585cfa7e52 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -10,7 +10,12 @@ #include <linux/types.h> #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" +/* Conditional support for the upcoming protocol version. */ +#ifdef CONFIG_BTRFS_DEBUG +#define BTRFS_SEND_STREAM_VERSION 3 +#else #define BTRFS_SEND_STREAM_VERSION 2 +#endif /* * In send stream v1, no command is larger than 64K. In send stream v2, no limit diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 998cd26a1b3b..fe05bc51f9f2 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -590,14 +590,17 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, struct super_block *psb = erofs_pseudo_mnt->mnt_sb; mutex_lock(&erofs_domain_cookies_lock); + spin_lock(&psb->s_inode_list_lock); list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { ctx = inode->i_private; if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) continue; igrab(inode); + spin_unlock(&psb->s_inode_list_lock); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } + spin_unlock(&psb->s_inode_list_lock); ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); mutex_unlock(&erofs_domain_cookies_lock); return ctx; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 559380a535af..c7f24fc7efd5 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -813,15 +813,14 @@ retry: ++spiltted; if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; - - if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && - !(map->m_flags & EROFS_MAP_PARTIAL_REF) && - fe->pcl->length == map->m_llen) - fe->pcl->partial = false; if (fe->pcl->length < offset + end - map->m_la) { fe->pcl->length = offset + end - map->m_la; fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK; } + if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && + fe->pcl->length == map->m_llen) + fe->pcl->partial = false; next_part: /* shorten the remaining extent to update progress */ map->m_llen = offset + cur - map->m_la; @@ -888,15 +887,13 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) { unsigned int pgnr; - struct page *oldpage; pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; DBG_BUGON(pgnr >= be->nr_pages); - oldpage = be->decompressed_pages[pgnr]; - be->decompressed_pages[pgnr] = bvec->page; - - if (!oldpage) + if (!be->decompressed_pages[pgnr]) { + be->decompressed_pages[pgnr] = bvec->page; return; + } } /* (cold path) one pcluster is requested multiple times */ diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index e7f04c4fbb81..d98c95212985 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -126,10 +126,10 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) } /* - * bit 31: I/O error occurred on this page - * bit 0 - 30: remaining parts to complete this page + * bit 30: I/O error occurred on this page + * bit 0 - 29: remaining parts to complete this page */ -#define Z_EROFS_PAGE_EIO (1 << 31) +#define Z_EROFS_PAGE_EIO (1 << 30) static inline void z_erofs_onlinepage_init(struct page *page) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 44c27ef39c43..0bb66927e3d0 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -57,8 +57,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); - kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), - EROFS_KMAP_ATOMIC); + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); if (IS_ERR(kaddr)) { err = PTR_ERR(kaddr); goto out_unlock; @@ -73,7 +72,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); vi->z_tailextent_headlcn = 0; - goto unmap_done; + goto done; } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; @@ -85,7 +84,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; - goto unmap_done; + goto out_put_metabuf; } vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); @@ -95,7 +94,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto unmap_done; + goto out_put_metabuf; } if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ @@ -103,12 +102,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto unmap_done; + goto out_put_metabuf; } -unmap_done: - erofs_put_metabuf(&buf); - if (err) - goto out_unlock; if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { struct erofs_map_blocks map = { @@ -127,7 +122,7 @@ unmap_done: err = -EFSCORRUPTED; } if (err < 0) - goto out_unlock; + goto out_put_metabuf; } if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && @@ -141,11 +136,14 @@ unmap_done: EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&map.buf); if (err < 0) - goto out_unlock; + goto out_put_metabuf; } +done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); +out_put_metabuf: + erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; |