diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-09-04 05:38:30 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:10:12 -0400 |
commit | 5902cc283c060f0a006ee9b2f2a64855a09399b4 (patch) | |
tree | c7d2e909fa088255d845fe205394397581ed4bb3 | |
parent | 1809b8cba756d32bd6e976ed4ee64efdf66c6d94 (diff) |
bcachefs: New io_misc.c helpers
This pulls the non vfs specific parts of truncate and finsert/fcollapse
out of fs-io.c, and moves them to io_misc.c.
This is prep work for logging these operations, to make them atomic in
the event of a crash.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/fs-io.c | 209 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/io_misc.c | 226 | ||||
-rw-r--r-- | fs/bcachefs/io_misc.h | 3 |
5 files changed, 250 insertions, 192 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 0b0b3b0d6c7d..b36513eb3d16 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -391,33 +391,12 @@ static int bch2_extend(struct mnt_idmap *idmap, return bch2_setattr_nonsize(idmap, inode, iattr); } -static int bch2_truncate_finish_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; - return 0; -} - -static int bch2_truncate_start_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, void *p) -{ - u64 *new_i_size = p; - - bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; - bi->bi_size = *new_i_size; - return 0; -} - -int bch2_truncate(struct mnt_idmap *idmap, +int bchfs_truncate(struct mnt_idmap *idmap, struct bch_inode_info *inode, struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; struct bch_inode_unpacked inode_u; - u64 new_i_size = iattr->ia_size; s64 i_sectors_delta = 0; int ret = 0; @@ -466,6 +445,8 @@ int bch2_truncate(struct mnt_idmap *idmap, if (unlikely(ret < 0)) goto err; + truncate_setsize(&inode->v, iattr->ia_size); + /* * When extending, we're going to write the new i_size to disk * immediately so we need to flush anything above the current on disk @@ -487,32 +468,22 @@ int bch2_truncate(struct mnt_idmap *idmap, if (ret) goto err; - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, - &new_i_size, 0); - mutex_unlock(&inode->ei_update_lock); + ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); - if (unlikely(ret)) + if (unlikely(ret)) { + /* + * If we error here, VFS caches are now inconsistent with btree + */ + set_bit(EI_INODE_ERROR, &inode->ei_flags); goto err; - - truncate_setsize(&inode->v, iattr->ia_size); - - ret = bch2_fpunch(c, inode_inum(inode), - round_up(iattr->ia_size, block_bytes(c)) >> 9, - U64_MAX, &i_sectors_delta); - bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + } bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && !bch2_journal_error(&c->journal), c, "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", inode->v.i_ino, (u64) inode->v.i_blocks, inode->ei_inode.bi_sectors); - if (unlikely(ret)) - goto err; - - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); - mutex_unlock(&inode->ei_update_lock); ret = bch2_setattr_nonsize(idmap, inode, iattr); err: @@ -577,175 +548,33 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; - struct bkey_buf copy; - struct btree_trans trans; - struct btree_iter src, dst, del; - loff_t shift, new_size; - u64 src_start; + s64 i_sectors_delta = 0; int ret = 0; if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; if (insert) { - if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) - return -EFBIG; - if (offset >= inode->v.i_size) return -EINVAL; - - src_start = U64_MAX; - shift = len; } else { if (offset + len >= inode->v.i_size) return -EINVAL; - - src_start = offset + len; - shift = -len; } - new_size = inode->v.i_size + shift; - ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); if (ret) return ret; - if (insert) { - i_size_write(&inode->v, new_size); - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, new_size, - ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - } else { - s64 i_sectors_delta = 0; - - ret = bch2_fpunch(c, inode_inum(inode), - offset >> 9, (offset + len) >> 9, - &i_sectors_delta); - bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + if (insert) + i_size_write(&inode->v, inode->v.i_size + len); - if (ret) - return ret; - } - - bch2_bkey_buf_init(©); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, - POS(inode->v.i_ino, src_start >> 9), - BTREE_ITER_INTENT); - bch2_trans_copy_iter(&dst, &src); - bch2_trans_copy_iter(&del, &src); - - while (ret == 0 || - bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(c, 0); - struct bkey_i delete; - struct bkey_s_c k; - struct bpos next_pos; - struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); - struct bpos atomic_end; - unsigned trigger_flags = 0; - u32 snapshot; - - bch2_trans_begin(&trans); - - ret = bch2_subvolume_get_snapshot(&trans, - inode->ei_subvol, &snapshot); - if (ret) - continue; - - bch2_btree_iter_set_snapshot(&src, snapshot); - bch2_btree_iter_set_snapshot(&dst, snapshot); - bch2_btree_iter_set_snapshot(&del, snapshot); - - bch2_trans_begin(&trans); - - k = insert - ? bch2_btree_iter_peek_prev(&src) - : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); - if ((ret = bkey_err(k))) - continue; - - if (!k.k || k.k->p.inode != inode->v.i_ino) - break; - - if (insert && - bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) - break; -reassemble: - bch2_bkey_buf_reassemble(©, c, k); - - if (insert && - bkey_lt(bkey_start_pos(k.k), move_pos)) - bch2_cut_front(move_pos, copy.k); - - copy.k->k.p.offset += shift >> 9; - bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); - - ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); - if (ret) - continue; - - if (!bkey_eq(atomic_end, copy.k->k.p)) { - if (insert) { - move_pos = atomic_end; - move_pos.offset -= shift >> 9; - goto reassemble; - } else { - bch2_cut_back(atomic_end, copy.k); - } - } - - bkey_init(&delete.k); - delete.k.p = copy.k->k.p; - delete.k.size = copy.k->k.size; - delete.k.p.offset -= shift >> 9; - bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); - - next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; - - if (copy.k->k.size != k.k->size) { - /* We might end up splitting compressed extents: */ - unsigned nr_ptrs = - bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); - - ret = bch2_disk_reservation_get(c, &disk_res, - copy.k->k.size, nr_ptrs, - BCH_DISK_RESERVATION_NOFAIL); - BUG_ON(ret); - } - - ret = bch2_btree_iter_traverse(&del) ?: - bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: - bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: - bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_NOFAIL); - bch2_disk_reservation_put(c, &disk_res); - - if (!ret) - bch2_btree_iter_set_pos(&src, next_pos); - } - bch2_trans_iter_exit(&trans, &del); - bch2_trans_iter_exit(&trans, &dst); - bch2_trans_iter_exit(&trans, &src); - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(©, c); - - if (ret) - return ret; + ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, + insert, &i_sectors_delta); + if (!ret && !insert) + i_size_write(&inode->v, inode->v.i_size - len); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); - mutex_lock(&inode->ei_update_lock); - if (!insert) { - i_size_write(&inode->v, new_size); - ret = bch2_write_inode_size(c, inode, new_size, - ATTR_MTIME|ATTR_CTIME); - } else { - /* We need an inode update to update bi_journal_seq for fsync: */ - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_MTIME|ATTR_CTIME); - } - mutex_unlock(&inode->ei_update_lock); return ret; } diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index bc6e8439d40b..ca70346e68dc 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -165,7 +165,7 @@ int __must_check bch2_write_inode_size(struct bch_fs *, int bch2_fsync(struct file *, loff_t, loff_t, int); -int bch2_truncate(struct mnt_idmap *, +int bchfs_truncate(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0648874d54f3..0def3a57bd6d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -798,7 +798,7 @@ static int bch2_setattr(struct mnt_idmap *idmap, return ret; return iattr->ia_valid & ATTR_SIZE - ? bch2_truncate(idmap, inode, iattr) + ? bchfs_truncate(idmap, inode, iattr) : bch2_setattr_nonsize(idmap, inode, iattr); } diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index c04e5dacfc8d..1afea613df4a 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -9,7 +9,10 @@ #include "btree_update.h" #include "buckets.h" #include "clock.h" +#include "error.h" #include "extents.h" +#include "extent_update.h" +#include "inode.h" #include "io_misc.h" #include "io_write.h" #include "subvolume.h" @@ -213,3 +216,226 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, return ret; } + +static int truncate_set_isize(struct btree_trans *trans, + subvol_inum inum, + u64 new_i_size) +{ + struct btree_iter iter = { NULL }; + struct bch_inode_unpacked inode_u; + int ret; + + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?: + (inode_u.bi_size = new_i_size, 0) ?: + bch2_inode_write(trans, &iter, &inode_u); + + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) +{ + struct btree_trans trans; + struct btree_iter fpunch_iter; + int ret; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + bch2_trans_iter_init(&trans, &fpunch_iter, BTREE_ID_extents, + POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), + BTREE_ITER_INTENT); + + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + truncate_set_isize(&trans, inum, new_i_size)); + if (ret) + goto err; + + ret = bch2_fpunch_at(&trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + if (ret) + goto err; +err: + bch2_trans_iter_exit(&trans, &fpunch_iter); + bch2_trans_exit(&trans); + + bch2_fs_fatal_err_on(ret, c, "%s: error truncating %u:%llu: %s", + __func__, inum.subvol, inum.inum, bch2_err_str(ret)); + return ret; +} + +static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) +{ + struct btree_iter iter; + struct bch_inode_unpacked inode_u; + int ret; + + offset <<= 9; + len <<= 9; + + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT); + if (ret) + return ret; + + if (len > 0) { + if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { + ret = -EFBIG; + goto err; + } + + if (offset >= inode_u.bi_size) { + ret = -EINVAL; + goto err; + } + } + + inode_u.bi_size += len; + inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); + + ret = bch2_inode_write(trans, &iter, &inode_u); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, + u64 offset, u64 len, bool insert, + s64 *i_sectors_delta) +{ + struct bkey_buf copy; + struct btree_trans trans; + struct btree_iter src = { NULL }, dst = { NULL }, del = { NULL }; + s64 shift = insert ? len : -len; + int ret = 0; + + bch2_bkey_buf_init(©); + bch2_trans_init(&trans, c, 0, 1024); + + bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, + POS(inum.inum, U64_MAX), + BTREE_ITER_INTENT); + bch2_trans_copy_iter(&dst, &src); + bch2_trans_copy_iter(&del, &src); + + if (insert) { + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + adjust_i_size(&trans, inum, offset, len)); + if (ret) + goto err; + } else { + bch2_btree_iter_set_pos(&src, POS(inum.inum, offset)); + + ret = bch2_fpunch_at(&trans, &src, inum, offset + len, i_sectors_delta); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto err; + + bch2_btree_iter_set_pos(&src, POS(inum.inum, offset + len)); + } + + while (ret == 0 || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(c, 0); + struct bkey_i delete; + struct bkey_s_c k; + struct bpos next_pos; + struct bpos move_pos = POS(inum.inum, offset); + struct bpos atomic_end; + unsigned trigger_flags = 0; + u32 snapshot; + + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + continue; + + bch2_btree_iter_set_snapshot(&src, snapshot); + bch2_btree_iter_set_snapshot(&dst, snapshot); + bch2_btree_iter_set_snapshot(&del, snapshot); + + bch2_trans_begin(&trans); + + k = insert + ? bch2_btree_iter_peek_prev(&src) + : bch2_btree_iter_peek_upto(&src, POS(inum.inum, U64_MAX)); + if ((ret = bkey_err(k))) + continue; + + if (!k.k || k.k->p.inode != inum.inum) + break; + + if (insert && + bkey_le(k.k->p, POS(inum.inum, offset))) + break; +reassemble: + bch2_bkey_buf_reassemble(©, c, k); + + if (insert && + bkey_lt(bkey_start_pos(k.k), move_pos)) + bch2_cut_front(move_pos, copy.k); + + copy.k->k.p.offset += shift; + bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); + + ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); + if (ret) + continue; + + if (!bkey_eq(atomic_end, copy.k->k.p)) { + if (insert) { + move_pos = atomic_end; + move_pos.offset -= shift; + goto reassemble; + } else { + bch2_cut_back(atomic_end, copy.k); + } + } + + bkey_init(&delete.k); + delete.k.p = copy.k->k.p; + delete.k.size = copy.k->k.size; + delete.k.p.offset -= shift; + bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); + + next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; + + if (copy.k->k.size != k.k->size) { + /* We might end up splitting compressed extents: */ + unsigned nr_ptrs = + bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); + + ret = bch2_disk_reservation_get(c, &disk_res, + copy.k->k.size, nr_ptrs, + BCH_DISK_RESERVATION_NOFAIL); + BUG_ON(ret); + } + + ret = bch2_btree_iter_traverse(&del) ?: + bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: + bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: + bch2_trans_commit(&trans, &disk_res, NULL, + BTREE_INSERT_NOFAIL); + bch2_disk_reservation_put(c, &disk_res); + + if (!ret) + bch2_btree_iter_set_pos(&src, next_pos); + } + + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto err; + + if (!insert) { + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + adjust_i_size(&trans, inum, offset, -len)); + } else { + /* We need an inode update to update bi_journal_seq for fsync: */ + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + adjust_i_size(&trans, inum, 0, 0)); + } +err: + bch2_trans_iter_exit(&trans, &del); + bch2_trans_iter_exit(&trans, &dst); + bch2_trans_iter_exit(&trans, &src); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(©, c); + return ret; +} diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index 46e9ce3251d6..894a7a04ba4b 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -9,4 +9,7 @@ int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, subvol_inum, u64, s64 *); int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); +int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *); +int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *); + #endif /* _BCACHEFS_IO_MISC_H */ |