diff options
author | Jonathan Cameron <Jonathan.Cameron@huawei.com> | 2024-10-12 11:22:25 +0100 |
---|---|---|
committer | Jonathan Cameron <Jonathan.Cameron@huawei.com> | 2024-10-12 11:43:29 +0100 |
commit | 220c71dafaa28cbb75fd785670cf68a758347026 (patch) | |
tree | 47a974cd3ea71ba59116037b56a509c41a430b63 /fs | |
parent | 6a9262edff8ea44e9968b6b271c36d81c6a1f841 (diff) | |
parent | 8cf0b93919e13d1e8d4466eb4080a4c4d9d66d7b (diff) |
Merge tag 'v6.12-rc2' into test2
Linux 6.12-rc2
Resolved movement of asm/unaligned.h to linux/unaligned.h
Diffstat (limited to 'fs')
119 files changed, 994 insertions, 657 deletions
diff --git a/fs/adfs/map.c b/fs/adfs/map.c index a81de80c45c1..a0ce272b4098 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -6,7 +6,7 @@ */ #include <linux/slab.h> #include <linux/statfs.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "adfs.h" /* diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 9c65ffb8a523..a06296c8827d 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -134,13 +134,4 @@ struct afs_uvldbentry__xdr { __be32 spares9; }; -struct afs_address_list { - refcount_t usage; - unsigned int version; - unsigned int nr_addrs; - struct sockaddr_rxrpc addrs[]; -}; - -extern void afs_put_address_list(struct afs_address_list *alist); - #endif /* AFS_VL_H */ diff --git a/fs/afs/file.c b/fs/afs/file.c index 492d857a3fa0..6762eff97517 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -420,6 +420,7 @@ const struct netfs_request_ops afs_req_ops = { .begin_writeback = afs_begin_writeback, .prepare_write = afs_prepare_write, .issue_write = afs_issue_write, + .retry_request = afs_retry_request, }; static void afs_add_open_mmap(struct afs_vnode *vnode) diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 3546b087e791..428721bbe4f6 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -201,7 +201,7 @@ void afs_wait_for_operation(struct afs_operation *op) } } - if (op->call_responded) + if (op->call_responded && op->server) set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags); if (!afs_op_error(op)) { diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 580de4adaaf6..b516d05b0fef 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -506,10 +506,10 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta finish_wait(&server->probe_wq, &wait); dont_wait: - if (estate->responsive_set & ~exclude) - return 1; if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) return 0; + if (estate->responsive_set & ~exclude) + return 1; if (is_intr && signal_pending(current)) return -ERESTARTSYS; if (timo == 0) diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ed09d4d4c211..d612983d6f38 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -632,8 +632,10 @@ iterate_address: wait_for_more_probe_results: error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); - if (!error) + if (error == 1) goto iterate_address; + if (!error) + goto restart_from_beginning; /* We've now had a failure to respond on all of a server's addresses - * immediately probe them again and consider retrying the server. @@ -644,10 +646,13 @@ wait_for_more_probe_results: error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { - case 0: + case 1: op->flags &= ~AFS_OPERATION_RETRY_SERVER; - trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + trace_afs_rotate(op, afs_rotate_trace_retry_server, 1); goto retry_server; + case 0: + trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); + goto restart_from_beginning; case -ERESTARTSYS: afs_op_set_error(op, error); goto failed; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 645b5ed4babb..4e4a448f6931 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -332,7 +332,6 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); - a->fragmentation_lru = swab64(a->fragmentation_lru); a->stripe_sectors = swab32(a->stripe_sectors); bps = alloc_v4_backpointers(a); @@ -347,6 +346,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c { struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); + struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL; prt_newline(out); printbuf_indent_add(out, 2); @@ -364,9 +364,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); - prt_printf(out, "fragmentation %llu\n", a->fragmentation_lru); + + if (ca) + prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); printbuf_indent_sub(out, 2); + + bch2_dev_put(ca); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) @@ -882,12 +886,13 @@ int bch2_trigger_alloc(struct btree_trans *trans, goto err; } - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, ca); - if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + old_lru = alloc_lru_idx_fragmentation(*old_a, ca); + new_lru = alloc_lru_idx_fragmentation(*new_a, ca); + if (old_lru != new_lru) { ret = bch2_lru_change(trans, BCH_LRU_FRAGMENTATION_START, bucket_to_u64(new.k->p), - old_a->fragmentation_lru, new_a->fragmentation_lru); + old_lru, new_lru); if (ret) goto err; } @@ -1629,18 +1634,22 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; + struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); + if (!ca) + return 0; + a = bch2_alloc_to_v4(alloc_k, &a_convert); - if (a->fragmentation_lru) { + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); + if (lru_idx) { ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, - a->fragmentation_lru, - alloc_k, last_flushed); + lru_idx, alloc_k, last_flushed); if (ret) - return ret; + goto err; } if (a->data_type != BCH_DATA_cached) - return 0; + goto err; if (fsck_err_on(!a->io_time[READ], trans, alloc_key_cached_but_read_time_zero, @@ -1669,6 +1678,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, goto err; err: fsck_err: + bch2_dev_put(ca); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index f754a2951d8a..befdaa95c515 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -70,7 +70,7 @@ struct bch_alloc_v4 { __u32 stripe; __u32 nr_external_backpointers; /* end of fields in original version of alloc_v4 */ - __u64 fragmentation_lru; + __u64 _fragmentation_lru; /* obsolete */ __u32 stripe_sectors; __u32 pad; } __packed __aligned(8); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 203ee627cab5..84832c2d4df9 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -223,7 +223,7 @@ struct bkey { #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ struct bpos p; __u32 size; /* extent size, in sectors */ - struct bversion version; + struct bversion bversion; __u8 pad[1]; #endif diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index d1f6092624d8..9a4a83d6fd2d 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -13,7 +13,7 @@ #include "trace.h" #include "util.h" -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/console.h> #include <linux/random.h> #include <linux/prefetch.h> diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 660d2fa02da2..771154e3a291 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -828,8 +828,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, return ret; } - gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); - if (fsck_err_on(new.data_type != gc.data_type, trans, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" @@ -857,7 +855,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); copy_bucket_field(alloc_key_stripe_wrong, stripe); copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); - copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1a74a1a252ee..9bf471fa4361 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -832,7 +832,8 @@ revert_fs_usage: for (struct jset_entry *entry2 = trans->journal_entries; entry2 != entry; entry2 = vstruct_next(entry2)) - if (jset_entry_is_key(entry2) && entry2->start->k.type == KEY_TYPE_accounting) { + if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && + entry2->start->k.type == KEY_TYPE_accounting) { struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 3a16b535b6c3..7a79f695ba2e 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -393,6 +393,14 @@ int __bch2_fsck_err(struct bch_fs *c, !(flags & FSCK_CAN_IGNORE))) ret = -BCH_ERR_fsck_errors_not_fixed; + bool exiting = + test_bit(BCH_FS_fsck_running, &c->flags) && + (ret != -BCH_ERR_fsck_fix && + ret != -BCH_ERR_fsck_ignore); + + if (exiting) + print = true; + if (print) { if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s\n", out->buf); @@ -400,9 +408,7 @@ int __bch2_fsck_err(struct bch_fs *c, bch2_print_string_as_lines(KERN_ERR, out->buf); } - if (test_bit(BCH_FS_fsck_running, &c->flags) && - (ret != -BCH_ERR_fsck_fix && - ret != -BCH_ERR_fsck_ignore)) + if (exiting) bch_err(c, "Unable to continue, halting"); else if (suppressing) bch_err(c, "Ratelimiting new instances of previous error"); @@ -430,10 +436,17 @@ err: int __bch2_bkey_fsck_err(struct bch_fs *c, struct bkey_s_c k, - enum bch_fsck_flags flags, + enum bch_validate_flags validate_flags, enum bch_sb_error_id err, const char *fmt, ...) { + if (validate_flags & BCH_VALIDATE_silent) + return -BCH_ERR_fsck_delete_bkey; + + unsigned fsck_flags = 0; + if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; + struct printbuf buf = PRINTBUF; va_list args; @@ -445,7 +458,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, va_end(args); prt_str(&buf, ": delete?"); - int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 21ee7211b03e..6551ada926b6 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -167,10 +167,11 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) +enum bch_validate_flags; __printf(5, 6) int __bch2_bkey_fsck_err(struct bch_fs *, struct bkey_s_c, - enum bch_fsck_flags, + enum bch_validate_flags, enum bch_sb_error_id, const char *, ...); @@ -180,11 +181,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *, */ #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - if ((flags & BCH_VALIDATE_silent)) { \ - ret = -BCH_ERR_fsck_delete_bkey; \ - goto fsck_err; \ - } \ - int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\ + int _ret = __bch2_bkey_fsck_err(c, k, flags, \ BCH_FSCK_ERR_##_err_type, \ _err_msg, ##__VA_ARGS__); \ if (_ret != -BCH_ERR_fsck_fix && \ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4a1bb07a2574..5bfc26d58270 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -174,20 +174,24 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { .automatic_shrinking = true, }; -static void __wait_on_freeing_inode(struct inode *inode) +struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) { - wait_queue_head_t *wq; - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); - wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); + return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); } -struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +static void __wait_on_freeing_inode(struct bch_fs *c, + struct bch_inode_info *inode, + subvol_inum inum) { - return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); + wait_queue_head_t *wq; + DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); + wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->v.i_lock); + + if (__bch2_inode_hash_find(c, inum) == inode) + schedule_timeout(HZ * 10); + finish_wait(wq, &wait.wq_entry); } static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans, @@ -204,10 +208,10 @@ repeat: } if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { if (!trans) { - __wait_on_freeing_inode(&inode->v); + __wait_on_freeing_inode(c, inode, inum); } else { bch2_trans_unlock(trans); - __wait_on_freeing_inode(&inode->v); + __wait_on_freeing_inode(c, inode, inum); int ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); @@ -232,6 +236,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod &inode->hash, bch2_vfs_inodes_params); BUG_ON(ret); inode->v.i_hash.pprev = NULL; + /* + * This pairs with the bch2_inode_hash_find() -> + * __wait_on_freeing_inode() path + */ + inode_wake_up_bit(&inode->v, __I_NEW); } } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0d8b782b63fb..b8a6ceb0cc7a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -28,8 +28,8 @@ static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, inode->bi_dir_offset == d.k->p.offset; } -static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, - struct bch_inode_unpacked *inode) +static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, + struct bch_inode_unpacked *inode) { if (d.v->d_type == DT_SUBVOL ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol @@ -137,16 +137,15 @@ found: return ret; } -static int lookup_inode(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode, - u32 *snapshot) +static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode) { struct btree_iter iter; struct bkey_s_c k; int ret; k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, inode_nr, *snapshot), 0); + SPOS(0, inode_nr, snapshot), 0); ret = bkey_err(k); if (ret) goto err; @@ -154,8 +153,6 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, ret = bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) : -BCH_ERR_ENOENT_inode; - if (!ret) - *snapshot = iter.pos.snapshot; err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -250,8 +247,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - u32 root_inode_snapshot = snapshot; - ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); + ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", root_inum.inum, le32_to_cpu(st.master_subvol)); if (ret) @@ -277,17 +273,23 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - ret = lookup_inode(trans, inum, lostfound, &snapshot); + ret = lookup_inode(trans, inum, snapshot, lostfound); bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); return ret; create_lostfound: /* + * we always create lost+found in the root snapshot; we don't want + * different branches of the snapshot tree to have different lost+found + */ + snapshot = le32_to_cpu(st.root_snapshot); + /* * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); + bch_notice(c, "creating lost+found in subvol %llu snapshot %u", + root_inum.subvol, le32_to_cpu(st.root_snapshot)); u64 now = bch2_current_time(c); struct btree_iter lostfound_iter = { NULL }; @@ -296,6 +298,7 @@ create_lostfound: bch2_inode_init_early(c, lostfound); bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); lostfound->bi_dir = root_inode.bi_inum; + lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); root_inode.bi_nlink++; @@ -323,9 +326,7 @@ err: return ret; } -static int reattach_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 inode_snapshot) +static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bch_fs *c = trans->c; struct bch_hash_info dir_hash; @@ -333,7 +334,7 @@ static int reattach_inode(struct btree_trans *trans, char name_buf[20]; struct qstr name; u64 dir_offset = 0; - u32 dirent_snapshot = inode_snapshot; + u32 dirent_snapshot = inode->bi_snapshot; int ret; if (inode->bi_subvol) { @@ -354,14 +355,18 @@ static int reattach_inode(struct btree_trans *trans, if (ret) return ret; - if (S_ISDIR(inode->bi_mode)) { - lostfound.bi_nlink++; + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); - ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); - if (ret) - return ret; + /* ensure lost+found inode is also present in inode snapshot */ + if (!inode->bi_subvol) { + BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot)); + lostfound.bi_snapshot = inode->bi_snapshot; } + ret = __bch2_fsck_write_inode(trans, &lostfound); + if (ret) + return ret; + dir_hash = bch2_hash_info_init(c, &lostfound); name = (struct qstr) QSTR(name_buf); @@ -383,7 +388,7 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; - return __bch2_fsck_write_inode(trans, inode, inode_snapshot); + return __bch2_fsck_write_inode(trans, inode); } static int remove_backpointer(struct btree_trans *trans, @@ -422,7 +427,7 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume if (ret) return ret; - ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); return ret; } @@ -540,8 +545,9 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); new_inode.bi_size = i_size; new_inode.bi_inum = inum; + new_inode.bi_snapshot = snapshot; - return __bch2_fsck_write_inode(trans, &new_inode, snapshot); + return __bch2_fsck_write_inode(trans, &new_inode); } struct snapshots_seen { @@ -1024,6 +1030,7 @@ static int check_inode(struct btree_trans *trans, bool full) { struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -1057,7 +1064,41 @@ static int check_inode(struct btree_trans *trans, trans, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); - return -BCH_ERR_fsck_repair_unimplemented; + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err_noprint; + } + + if (u.bi_dir || u.bi_dir_offset) { + ret = check_inode_dirent_inode(trans, &u, &do_update); + if (ret) + goto err; + } + + if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf))) { + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; + } + + if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) { + /* Check for this early so that check_unreachable_inode() will reattach it */ + + ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot); + if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty) + goto err; + + fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty, + "dir unlinked but not empty\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf)); + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; + ret = 0; } if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && @@ -1070,15 +1111,15 @@ static int check_inode(struct btree_trans *trans, u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; if (!bpos_eq(new_min_pos, POS_MIN)) bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); - return 0; + goto err_noprint; } if (u.bi_flags & BCH_INODE_unlinked) { @@ -1095,7 +1136,7 @@ static int check_inode(struct btree_trans *trans, */ ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) - return ret; + goto err_noprint; fsck_err_on(!ret, trans, unlinked_inode_not_on_deleted_list, @@ -1106,13 +1147,13 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; } else { - if (fsck_err_on(bch2_inode_is_open(c, k.k->p), + if (fsck_err_on(!bch2_inode_is_open(c, k.k->p), trans, inode_unlinked_and_not_open, "inode %llu%u unlinked and not open", u.bi_inum, u.bi_snapshot)) { ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); - return ret; + goto err_noprint; } } } @@ -1177,12 +1218,6 @@ static int check_inode(struct btree_trans *trans, do_update = true; } - if (u.bi_dir || u.bi_dir_offset) { - ret = check_inode_dirent_inode(trans, &u, &do_update); - if (ret) - goto err; - } - if (fsck_err_on(u.bi_parent_subvol && (u.bi_subvol == 0 || u.bi_subvol == BCACHEFS_ROOT_SUBVOL), @@ -1224,14 +1259,16 @@ static int check_inode(struct btree_trans *trans, } do_update: if (do_update) { - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; } err: fsck_err: bch_err_fn(c, ret); +err_noprint: + printbuf_exit(&buf); return ret; } @@ -1347,7 +1384,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal w->last_pos.inode, i->snapshot, i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1789,7 +1826,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1810,8 +1847,7 @@ noinline_for_stack static int check_dirent_inode_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -1821,6 +1857,32 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; + if (!target->bi_dir && + !target->bi_dir_offset) { + fsck_err_on(S_ISDIR(target->bi_mode), + trans, inode_dir_missing_backpointer, + "directory with missing backpointer\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + fsck_err_on(target->bi_flags & BCH_INODE_unlinked, + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_flags &= ~BCH_INODE_unlinked; + target->bi_dir = d.k->p.inode; + target->bi_dir_offset = d.k->p.offset; + return __bch2_fsck_write_inode(trans, target); + } + if (bch2_inode_should_have_bp(target) && !fsck_err(trans, inode_wrong_backpointer, "dirent points to inode that does not point back:\n %s", @@ -1830,15 +1892,8 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, buf.buf))) goto err; - if (!target->bi_dir && - !target->bi_dir_offset) { - target->bi_dir = d.k->p.inode; - target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); - } - struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, - SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); + SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot)); ret = bkey_err(bp_dirent); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -1851,14 +1906,14 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, "inode %llu:%u has wrong backpointer:\n" "got %llu:%llu\n" "should be %llu:%llu", - target->bi_inum, target_snapshot, + target->bi_inum, target->bi_snapshot, target->bi_dir, target->bi_dir_offset, d.k->p.inode, d.k->p.offset)) { target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); goto out; } @@ -1873,7 +1928,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, trans, inode_dir_multiple_links, "%s %llu:%u with multiple links\n%s", S_ISDIR(target->bi_mode) ? "directory" : "subvolume", - target->bi_inum, target_snapshot, buf.buf)) { + target->bi_inum, target->bi_snapshot, buf.buf)) { ret = __remove_dirent(trans, d.k->p); goto out; } @@ -1886,10 +1941,10 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (fsck_err_on(backpointer_exists && !target->bi_nlink, trans, inode_multiple_links_but_nlink_0, "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", - target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { + target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { target->bi_nlink++; target->bi_flags &= ~BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); if (ret) goto err; } @@ -1906,15 +1961,14 @@ noinline_for_stack static int check_dirent_target(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct bkey_i_dirent *n; struct printbuf buf = PRINTBUF; int ret = 0; - ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); + ret = check_dirent_inode_dirent(trans, iter, d, target); if (ret) goto err; @@ -2073,7 +2127,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u64 target_inum = le64_to_cpu(s.v->inode); u32 target_snapshot = le32_to_cpu(s.v->snapshot); - ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); + ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -2089,13 +2143,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * target_inum, subvol_root.bi_parent_subvol, parent_subvol)) { subvol_root.bi_parent_subvol = parent_subvol; - ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); + subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &subvol_root); if (ret) goto err; } - ret = check_dirent_target(trans, iter, d, &subvol_root, - target_snapshot); + ret = check_dirent_target(trans, iter, d, &subvol_root); if (ret) goto err; out: @@ -2188,8 +2242,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, } darray_for_each(target->inodes, i) { - ret = check_dirent_target(trans, iter, d, - &i->inode, i->snapshot); + ret = check_dirent_target(trans, iter, d, &i->inode); if (ret) goto err; } @@ -2330,7 +2383,7 @@ static int check_root_trans(struct btree_trans *trans) goto err; } - ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); + ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode); if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -2343,8 +2396,9 @@ static int check_root_trans(struct btree_trans *trans) bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); root_inode.bi_inum = inum; + root_inode.bi_snapshot = snapshot; - ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); + ret = __bch2_fsck_write_inode(trans, &root_inode); bch_err_msg(c, ret, "writing root inode"); } err: @@ -2511,7 +2565,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf))) - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); goto out; } @@ -2557,7 +2611,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino if (ret) break; - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); } break; @@ -2787,7 +2841,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], bch2_inode_nlink_get(&u), link->count)) { bch2_inode_nlink_set(&u, link->count); - ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); } fsck_err: return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 753c208896c3..3e5bc01961b8 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -20,7 +20,7 @@ #include <linux/random.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #define x(name, ...) #name, const char * const bch2_inode_opts[] = { @@ -327,22 +327,20 @@ int bch2_inode_unpack(struct bkey_s_c k, : bch2_inode_unpack_slowpath(k, unpacked); } -int bch2_inode_peek_nowarn(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, - subvol_inum inum, unsigned flags) +int __bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags, + bool warn) { - struct bkey_s_c k; u32 snapshot; - int ret; - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn); if (ret) return ret; - k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, - SPOS(0, inum.inum, snapshot), - flags|BTREE_ITER_cached); + struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, + SPOS(0, inum.inum, snapshot), + flags|BTREE_ITER_cached); ret = bkey_err(k); if (ret) return ret; @@ -357,20 +355,12 @@ int bch2_inode_peek_nowarn(struct btree_trans *trans, return 0; err: + if (warn) + bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); bch2_trans_iter_exit(trans, iter); return ret; } -int bch2_inode_peek(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, - subvol_inum inum, unsigned flags) -{ - int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); - bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); - return ret; -} - int bch2_inode_write_flags(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, @@ -387,9 +377,7 @@ int bch2_inode_write_flags(struct btree_trans *trans, return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); } -int __bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bkey_inode_buf *inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); @@ -398,19 +386,17 @@ int __bch2_fsck_write_inode(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode); - inode_p->inode.k.p.snapshot = snapshot; + inode_p->inode.k.p.snapshot = inode->bi_snapshot; return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &inode_p->inode.k_i, BTREE_UPDATE_internal_snapshot_node); } -int bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fsck_write_inode(trans, inode, snapshot)); + __bch2_fsck_write_inode(trans, inode)); bch_err_fn(trans->c, ret); return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 695abd707cb6..9c1f67705684 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -97,10 +97,26 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); -int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, subvol_inum, unsigned); -int bch2_inode_peek(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, subvol_inum, unsigned); +int __bch2_inode_peek(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, subvol_inum, unsigned, bool); + +static inline int bch2_inode_peek_nowarn(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags) +{ + return __bch2_inode_peek(trans, iter, inode, inum, flags, false); +} + +static inline int bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags) +{ + return __bch2_inode_peek(trans, iter, inode, inum, flags, true); + int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); + return ret; +} int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, enum btree_iter_update_trigger_flags); @@ -112,8 +128,8 @@ static inline int bch2_inode_write(struct btree_trans *trans, return bch2_inode_write_flags(trans, iter, inode, 0); } -int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); -int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); +int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); +int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 177ed331c00b..307ed0a45184 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -224,13 +224,14 @@ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, str static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, - u64 new_i_size) + u64 new_i_size, + bool warn) { struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent) ?: + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); @@ -247,10 +248,11 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); + bool warn_errors = i_sectors_delta != NULL; int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - truncate_set_isize(trans, inum, new_i_size)); + truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); if (ret) goto err; @@ -263,8 +265,8 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: - bch2_logged_op_finish(trans, op_k); - bch_err_fn(c, ret); + if (warn_errors) + bch_err_fn(c, ret); return ret; } @@ -288,9 +290,14 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); - int ret = bch2_trans_run(c, - bch2_logged_op_start(trans, &op.k_i) ?: - __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); + struct btree_trans *trans = bch2_trans_get(c); + int ret = bch2_logged_op_start(trans, &op.k_i); + if (ret) + goto out; + ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; +out: + bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; @@ -308,7 +315,8 @@ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, stru prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } -static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) +static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, + u64 offset, s64 len, bool warn) { struct btree_iter iter; struct bch_inode_unpacked inode_u; @@ -317,7 +325,7 @@ static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset offset <<= 9; len <<= 9; - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); if (ret) return ret; @@ -357,12 +365,22 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; + u32 snapshot; + bool warn_errors = i_sectors_delta != NULL; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; + /* + * check for missing subvolume before fpunch, as in resume we don't want + * it to be a fatal error + */ + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); + if (ret) + return ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); @@ -373,7 +391,7 @@ case LOGGED_OP_FINSERT_start: if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, src_offset, len) ?: + adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; @@ -396,11 +414,11 @@ case LOGGED_OP_FINSERT_shift_extents: struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); - u32 snapshot; bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, + warn_errors); if (ret) goto btree_err; @@ -463,12 +481,12 @@ btree_err: if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, src_offset, shift) ?: + adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, 0, 0) ?: + adjust_i_size(trans, inum, 0, 0, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } @@ -477,9 +495,9 @@ case LOGGED_OP_FINSERT_finish: break; } err: - bch_err_fn(c, ret); - bch2_logged_op_finish(trans, op_k); bch2_trans_iter_exit(trans, &iter); + if (warn_errors) + bch_err_fn(c, ret); return ret; } @@ -508,9 +526,14 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); - int ret = bch2_trans_run(c, - bch2_logged_op_start(trans, &op.k_i) ?: - __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); + struct btree_trans *trans = bch2_trans_get(c); + int ret = bch2_logged_op_start(trans, &op.k_i); + if (ret) + goto out; + ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; +out: + bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 6f4a4e1083c9..60e00702d1a4 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -34,8 +34,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; - const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); - struct bkey_buf sk; u32 restart_count = trans->restart_count; struct printbuf buf = PRINTBUF; int ret = 0; @@ -46,13 +44,15 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (!fn) - return 0; - + struct bkey_buf sk; bch2_bkey_buf_init(&sk); bch2_bkey_buf_reassemble(&sk, c, k); - fn->resume(trans, sk.k); + const struct bch_logged_op_fn *fn = logged_op_fn(sk.k->k.type); + if (fn) + fn->resume(trans, sk.k); + + ret = bch2_logged_op_finish(trans, sk.k); bch2_bkey_buf_exit(&sk, c); fsck_err: @@ -93,7 +93,7 @@ int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) __bch2_logged_op_start(trans, k)); } -void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) +int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0)); @@ -113,4 +113,6 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) buf.buf, bch2_err_str(ret)); printbuf_exit(&buf); } + + return ret; } diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h index 4d1e786a27a8..30ae9ef737dd 100644 --- a/fs/bcachefs/logged_ops.h +++ b/fs/bcachefs/logged_ops.h @@ -15,6 +15,6 @@ static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i int bch2_resume_logged_ops(struct bch_fs *); int bch2_logged_op_start(struct btree_trans *, struct bkey_i *); -void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); +int bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); #endif /* _BCACHEFS_LOGGED_OPS_H */ diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 96f2f4f8c397..10857eccdeaf 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "bkey_buf.h" #include "btree_iter.h" #include "btree_update.h" #include "btree_write_buffer.h" @@ -118,7 +119,7 @@ fsck_err: static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, - struct bpos *last_flushed_pos) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -132,11 +133,13 @@ static int bch2_check_lru_key(struct btree_trans *trans, u64 idx; int ret; - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), + struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_pos); + + if (fsck_err_on(!ca, trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) - return bch2_btree_delete_at(trans, lru_iter, 0); + return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); ret = bkey_err(k); @@ -150,18 +153,15 @@ static int bch2_check_lru_key(struct btree_trans *trans, idx = alloc_lru_idx_read(*a); break; case BCH_LRU_fragmentation: - idx = a->fragmentation_lru; + idx = alloc_lru_idx_fragmentation(*a, ca); break; } if (lru_k.k->type != KEY_TYPE_set || lru_pos_time(lru_k.k->p) != idx) { - if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { - *last_flushed_pos = lru_k.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed); + if (ret) + goto err; if (fsck_err(trans, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" @@ -171,12 +171,12 @@ static int bch2_check_lru_key(struct btree_trans *trans, lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) - ret = bch2_btree_delete_at(trans, lru_iter, 0); + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); } -out: err: fsck_err: bch2_trans_iter_exit(trans, &iter); + bch2_dev_put(ca); printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; @@ -184,12 +184,18 @@ fsck_err: int bch2_check_lrus(struct bch_fs *c) { - struct bpos last_flushed_pos = POS_MIN; + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, - bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); + bch2_check_lru_key(trans, &iter, k, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7d3920e03742..8c456d8b8b99 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -692,7 +692,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, a = bch2_alloc_to_v4(k, &a_convert); dirty_sectors = bch2_bucket_sectors_dirty(*a); bucket_size = ca->mi.bucket_size; - fragmentation = a->fragmentation_lru; + fragmentation = alloc_lru_idx_fragmentation(*a, ca); ret = bch2_btree_write_buffer_tryflush(trans); bch_err_msg(c, ret, "flushing btree write buffer"); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index d86565bf07c8..d658be90f737 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -73,6 +73,7 @@ move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b) static int bch2_bucket_is_movable(struct btree_trans *trans, struct move_bucket *b, u64 time) { + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; struct bch_alloc_v4 _a; @@ -90,14 +91,19 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; + struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); + if (!ca) + goto out; + a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; b->sectors = bch2_bucket_sectors_dirty(*a); + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); - ret = data_type_movable(a->data_type) && - a->fragmentation_lru && - a->fragmentation_lru <= time; + ret = lru_idx && lru_idx <= time; + bch2_dev_put(ca); +out: bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index ed5dca5e1161..4135b1ea2fec 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -115,8 +115,8 @@ enum bch_fsck_flags { x(alloc_key_data_type_inconsistency, 101, 0) \ x(alloc_key_to_missing_dev_bucket, 102, 0) \ x(alloc_key_cached_inconsistency, 103, 0) \ - x(alloc_key_cached_but_read_time_zero, 104, 0) \ - x(alloc_key_to_missing_lru_entry, 105, 0) \ + x(alloc_key_cached_but_read_time_zero, 104, FSCK_AUTOFIX) \ + x(alloc_key_to_missing_lru_entry, 105, FSCK_AUTOFIX) \ x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \ x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \ x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \ @@ -129,20 +129,20 @@ enum bch_fsck_flags { x(freespace_key_wrong, 115, 0) \ x(freespace_hole_missing, 116, 0) \ x(bucket_gens_val_size_bad, 117, 0) \ - x(bucket_gens_key_wrong, 118, 0) \ - x(bucket_gens_hole_wrong, 119, 0) \ - x(bucket_gens_to_invalid_dev, 120, 0) \ - x(bucket_gens_to_invalid_buckets, 121, 0) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \ + x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ + x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ + x(bucket_gens_to_invalid_dev, 120, FSCK_AUTOFIX) \ + x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ x(need_discard_freespace_key_bad, 124, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_to_missing_device, 126, 0) \ x(backpointer_to_missing_alloc, 127, 0) \ x(backpointer_to_missing_ptr, 128, 0) \ - x(lru_entry_at_time_0, 129, 0) \ - x(lru_entry_to_invalid_bucket, 130, 0) \ - x(lru_entry_bad, 131, 0) \ + x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ + x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ + x(lru_entry_bad, 131, FSCK_AUTOFIX) \ x(btree_ptr_val_too_big, 132, 0) \ x(btree_ptr_v2_val_too_big, 133, 0) \ x(btree_ptr_has_non_ptr, 134, 0) \ @@ -158,9 +158,9 @@ enum bch_fsck_flags { x(ptr_after_last_bucket, 144, 0) \ x(ptr_before_first_bucket, 145, 0) \ x(ptr_spans_multiple_buckets, 146, 0) \ - x(ptr_to_missing_backpointer, 147, 0) \ - x(ptr_to_missing_alloc_key, 148, 0) \ - x(ptr_to_missing_replicas_entry, 149, 0) \ + x(ptr_to_missing_backpointer, 147, FSCK_AUTOFIX) \ + x(ptr_to_missing_alloc_key, 148, FSCK_AUTOFIX) \ + x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ x(ptr_gen_newer_than_bucket_gen, 152, 0) \ @@ -194,7 +194,7 @@ enum bch_fsck_flags { x(snapshot_skiplist_not_normalized, 180, 0) \ x(snapshot_skiplist_bad, 181, 0) \ x(snapshot_should_not_have_subvol, 182, 0) \ - x(snapshot_to_bad_snapshot_tree, 183, 0) \ + x(snapshot_to_bad_snapshot_tree, 183, FSCK_AUTOFIX) \ x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ @@ -211,6 +211,7 @@ enum bch_fsck_flags { x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ + x(inode_unlinked_but_has_dirent, 285, 0) \ x(inode_checksum_type_invalid, 199, 0) \ x(inode_compression_type_invalid, 200, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \ @@ -219,6 +220,8 @@ enum bch_fsck_flags { x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ + x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ + x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -295,7 +298,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 284, 0) + x(MAX, 287, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c index dc1a27cc31cd..a1cc44e66c7e 100644 --- a/fs/bcachefs/siphash.c +++ b/fs/bcachefs/siphash.c @@ -45,7 +45,7 @@ */ #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/bitops.h> #include <linux/string.h> diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 6845dde1b339..91d8187ee168 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -102,7 +102,8 @@ static int check_subvol(struct btree_trans *trans, inode.bi_inum, inode.bi_snapshot, inode.bi_subvol, subvol.k->p.offset)) { inode.bi_subvol = subvol.k->p.offset; - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; } @@ -331,8 +332,8 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); } -int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, - u32 *snapid) +int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid, bool warn) { struct btree_iter iter; struct bkey_s_c_subvolume subvol; @@ -343,7 +344,8 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, BTREE_ITER_cached|BTREE_ITER_with_updates, subvolume); ret = bkey_err(subvol); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + + bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c, "missing subvolume %u", subvolid); if (likely(!ret)) @@ -352,6 +354,12 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, return ret; } +int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid) +{ + return __bch2_subvolume_get_snapshot(trans, subvolid, snapid, true); +} + static int bch2_subvolume_reparent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index e62f876541fe..f897d106e142 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -26,6 +26,8 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, int bch2_subvol_has_children(struct btree_trans *, u32); int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); +int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, + u32 *, bool); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); int bch2_subvol_is_ro_trans(struct btree_trans *, u32); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 42f565c76181..e0a876cbaa6b 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -222,7 +222,7 @@ u64 bch2_read_flag_list(const char *opt, const char * const list[]) break; } - ret |= 1 << flag; + ret |= BIT_ULL(flag); } kfree(d); diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c index a9ebcd82c602..6a78553d9b0c 100644 --- a/fs/bcachefs/varint.c +++ b/fs/bcachefs/varint.c @@ -3,7 +3,7 @@ #include <linux/bitops.h> #include <linux/math.h> #include <linux/string.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #ifdef CONFIG_VALGRIND #include <valgrind/memcheck.h> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index cd6d5bbb4b9d..390808ce935d 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -39,7 +39,7 @@ #include <linux/vmalloc.h> #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/cacheflush.h> #include <asm/page.h> #include <asm/flat.h> diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c index 79026917db19..e3716516ca38 100644 --- a/fs/btrfs/accessors.c +++ b/fs/btrfs/accessors.c @@ -3,7 +3,7 @@ * Copyright (C) 2007 Oracle. All rights reserved. */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "messages.h" #include "extent_io.h" #include "fs.h" diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h index b2eb9cde2c5d..7a7e0ef69973 100644 --- a/fs/btrfs/accessors.h +++ b/fs/btrfs/accessors.h @@ -3,7 +3,7 @@ #ifndef BTRFS_ACCESSORS_H #define BTRFS_ACCESSORS_H -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/align.h> diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e2f478ecd7fd..f8e1d5b2c512 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3179,10 +3179,14 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) btrfs_backref_cleanup_node(cache, node); } - cache->last_trans = 0; - - for (i = 0; i < BTRFS_MAX_LEVEL; i++) - ASSERT(list_empty(&cache->pending[i])); + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + while (!list_empty(&cache->pending[i])) { + node = list_first_entry(&cache->pending[i], + struct btrfs_backref_node, + list); + btrfs_backref_cleanup_node(cache, node); + } + } ASSERT(list_empty(&cache->pending_edge)); ASSERT(list_empty(&cache->useless_node)); ASSERT(list_empty(&cache->changed)); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25d768e67e37..4ad5db619b00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -17,7 +17,7 @@ #include <linux/error-injection.h> #include <linux/crc32c.h> #include <linux/sched/mm.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" @@ -4256,6 +4256,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_cleanup_defrag_inodes(fs_info); /* + * Wait for any fixup workers to complete. + * If we don't wait for them here and they are still running by the time + * we call kthread_stop() against the cleaner kthread further below, we + * get an use-after-free on the cleaner because the fixup worker adds an + * inode to the list of delayed iputs and then attempts to wakeup the + * cleaner kthread, which was already stopped and destroyed. We parked + * already the cleaner, but below we run all pending delayed iputs. + */ + btrfs_flush_workqueue(fs_info->fixup_workers); + + /* * After we parked the cleaner kthread, ordered extents may have * completed and created new delayed iputs. If one of the async reclaim * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index edac499fd83d..5618ca02934a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -32,7 +32,7 @@ #include <linux/migrate.h> #include <linux/sched/mm.h> #include <linux/iomap.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fsverity.h> #include "misc.h" #include "ctree.h" @@ -3111,6 +3111,11 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_update_inode_fallback(trans, inode); if (ret) /* -ENOMEM or corruption */ btrfs_abort_transaction(trans, ret); + + ret = btrfs_insert_raid_extent(trans, ordered_extent); + if (ret) + btrfs_abort_transaction(trans, ret); + goto out; } diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index 77752eec125d..363fd28c0268 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -239,7 +239,8 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, vaf.fmt = fmt; vaf.va = &args; - if (__ratelimit(ratelimit)) { + /* Do not ratelimit if CONFIG_BTRFS_DEBUG is enabled. */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG) || __ratelimit(ratelimit)) { if (fs_info) { char statestr[STATE_STRING_BUF_LEN]; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ea4ed85919ec..f3834f8d26b4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -232,70 +232,6 @@ static struct btrfs_backref_node *walk_down_backref( return NULL; } -static void update_backref_node(struct btrfs_backref_cache *cache, - struct btrfs_backref_node *node, u64 bytenr) -{ - struct rb_node *rb_node; - rb_erase(&node->rb_node, &cache->rb_root); - node->bytenr = bytenr; - rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node); - if (rb_node) - btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST); -} - -/* - * update backref cache after a transaction commit - */ -static int update_backref_cache(struct btrfs_trans_handle *trans, - struct btrfs_backref_cache *cache) -{ - struct btrfs_backref_node *node; - int level = 0; - - if (cache->last_trans == 0) { - cache->last_trans = trans->transid; - return 0; - } - - if (cache->last_trans == trans->transid) - return 0; - - /* - * detached nodes are used to avoid unnecessary backref - * lookup. transaction commit changes the extent tree. - * so the detached nodes are no longer useful. - */ - while (!list_empty(&cache->detached)) { - node = list_entry(cache->detached.next, - struct btrfs_backref_node, list); - btrfs_backref_cleanup_node(cache, node); - } - - while (!list_empty(&cache->changed)) { - node = list_entry(cache->changed.next, - struct btrfs_backref_node, list); - list_del_init(&node->list); - BUG_ON(node->pending); - update_backref_node(cache, node, node->new_bytenr); - } - - /* - * some nodes can be left in the pending list if there were - * errors during processing the pending nodes. - */ - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { - list_for_each_entry(node, &cache->pending[level], list) { - BUG_ON(!node->pending); - if (node->bytenr == node->new_bytenr) - continue; - update_backref_node(cache, node, node->new_bytenr); - } - } - - cache->last_trans = 0; - return 1; -} - static bool reloc_root_is_dead(const struct btrfs_root *root) { /* @@ -551,9 +487,6 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, struct btrfs_backref_edge *new_edge; struct rb_node *rb_node; - if (cache->last_trans > 0) - update_backref_cache(trans, cache); - rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct btrfs_backref_node, rb_node); @@ -923,7 +856,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, btrfs_grab_root(reloc_root); /* root->reloc_root will stay until current relocation finished */ - if (fs_info->reloc_ctl->merge_reloc_tree && + if (fs_info->reloc_ctl && fs_info->reloc_ctl->merge_reloc_tree && btrfs_root_refs(root_item) == 0) { set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); /* @@ -3698,11 +3631,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) break; } restart: - if (update_backref_cache(trans, &rc->backref_cache)) { - btrfs_end_transaction(trans); - trans = NULL; - continue; - } + if (rc->backref_cache.last_trans != trans->transid) + btrfs_backref_release_cache(&rc->backref_cache); + rc->backref_cache.last_trans = trans->transid; ret = find_next_extent(rc, path, &key); if (ret < 0) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7f48ba6c1c77..27306d98ec43 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -346,8 +346,10 @@ struct name_cache_entry { u64 parent_gen; int ret; int need_later_update; + /* Name length without NUL terminator. */ int name_len; - char name[] __counted_by(name_len); + /* Not NUL terminated. */ + char name[] __counted_by(name_len) __nonstring; }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ @@ -2388,7 +2390,7 @@ out_cache: /* * Store the result of the lookup in the name cache. */ - nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); + nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL); if (!nce) { ret = -ENOMEM; goto out; @@ -2400,7 +2402,7 @@ out_cache: nce->parent_gen = *parent_gen; nce->name_len = fs_path_len(dest); nce->ret = ret; - strcpy(nce->name, dest->start); + memcpy(nce->name, dest->start, nce->name_len); if (ino < sctx->send_progress) nce->need_later_update = 0; @@ -6187,8 +6189,29 @@ static int send_write_or_clone(struct send_ctx *sctx, if (ret < 0) return ret; - if (clone_root->offset + num_bytes == info.size) + if (clone_root->offset + num_bytes == info.size) { + /* + * The final size of our file matches the end offset, but it may + * be that its current size is larger, so we have to truncate it + * to any value between the start offset of the range and the + * final i_size, otherwise the clone operation is invalid + * because it's unaligned and it ends before the current EOF. + * We do this truncate to the final i_size when we finish + * processing the inode, but it's too late by then. And here we + * truncate to the start offset of the range because it's always + * sector size aligned while if it were the final i_size it + * would result in dirtying part of a page, filling part of a + * page with zeroes and then having the clone operation at the + * receiver trigger IO and wait for it due to the dirty page. + */ + if (sctx->parent_root != NULL) { + ret = send_truncate(sctx, sctx->cur_ino, + sctx->cur_inode_gen, offset); + if (ret < 0) + return ret; + } goto clone_data; + } write_data: ret = send_extent_data(sctx, path, offset, num_bytes); diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index c6399513c66f..aca2861f2187 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -5,7 +5,7 @@ #include <linux/kthread.h> #include <linux/uuid.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "messages.h" #include "ctree.h" #include "transaction.h" diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f53977169db4..2b3f9935dbb4 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -595,14 +595,12 @@ static bool cachefiles_open_file(struct cachefiles_object *object, * write and readdir but not lookup or open). */ touch_atime(&file->f_path); - dput(dentry); return true; check_failed: fscache_cookie_lookup_negative(object->cookie); cachefiles_unmark_inode_in_use(object, file); fput(file); - dput(dentry); if (ret == -ESTALE) return cachefiles_create_file(object); return false; @@ -611,7 +609,6 @@ error_fput: fput(file); error: cachefiles_do_unmark_inode_in_use(object, d_inode(dentry)); - dput(dentry); return false; } @@ -654,7 +651,9 @@ bool cachefiles_look_up_object(struct cachefiles_object *object) goto new_file; } - if (!cachefiles_open_file(object, dentry)) + ret = cachefiles_open_file(object, dentry); + dput(dentry); + if (!ret) return false; _leave(" = t [%lu]", file_inode(object->file)->i_ino); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 53fef258c2bc..c2a9e2cc03de 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -489,8 +489,11 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize; out: - if (ret < 0) + if (ret < 0) { + if (got) + ceph_put_cap_refs(ceph_inode(inode), got); kfree(priv); + } return ret; } @@ -2145,7 +2148,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, } pool_ns_len = pool_ns ? pool_ns->len : 0; - perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS); + perm = kmalloc(struct_size(perm, pool_ns, pool_ns_len + 1), GFP_NOFS); if (!perm) { err = -ENOMEM; goto out_unlock; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index a79f163ae4ed..44451749c544 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -3,7 +3,7 @@ #include <linux/exportfs.h> #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "super.h" #include "mds_client.h" diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2508aa8950b7..037eac35a9e0 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -5,7 +5,7 @@ #include <linux/ceph/ceph_debug.h> #include <linux/ceph/osd_client.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/backing-dev.h> #include <linux/completion.h> #include <linux/exportfs.h> diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 6681a71625f0..206835e31efa 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -18,7 +18,7 @@ * information about these ioctls. */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/skcipher.h> #include <linux/key-type.h> #include <linux/random.h> @@ -1305,11 +1305,15 @@ int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, struct iomap_iter iter = { .inode = inode, .pos = pos, - .len = len, .flags = IOMAP_WRITE | IOMAP_UNSHARE | IOMAP_DAX, }; + loff_t size = i_size_read(inode); int ret; + if (pos < 0 || pos >= size) + return 0; + + iter.len = min(len, size - pos); while ((ret = iomap_iter(&iter, ops)) > 0) iter.processed = dax_unshare_iter(&iter); return ret; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index d39a1a69fecc..827278525fd9 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -21,7 +21,7 @@ #include <linux/file.h> #include <linux/scatterlist.h> #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/kernel.h> #include <linux/xattr.h> #include "ecryptfs_kernel.h" diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5ed1e4cf6c0b..cbdf82f0183f 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -21,7 +21,7 @@ #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/fileattr.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "ecryptfs_kernel.h" static int lock_parent(struct dentry *dentry, diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 287e5d407f08..ceda5555971a 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -19,7 +19,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/xattr.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "ecryptfs_kernel.h" /* diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index e980e29873a5..1253a8456e59 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -4,7 +4,7 @@ * https://www.huawei.com/ */ #include "internal.h" -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <trace/events/erofs.h> struct z_erofs_maprecorder { diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c index 7cc200d89821..d5ce0ae660ba 100644 --- a/fs/exfat/cache.c +++ b/fs/exfat/cache.c @@ -11,7 +11,7 @@ */ #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/buffer_head.h> #include "exfat_raw.h" diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 56b870d9cc0d..773c320d68f3 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -4,7 +4,7 @@ */ #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 1ac011088ce7..d47896a89596 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -6,7 +6,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/buffer_head.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "exfat_raw.h" #include "exfat_fs.h" diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index eaa5f5b51f50..b33664f6ce2a 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -379,7 +379,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl */ static int ext4_fc_track_template( handle_t *handle, struct inode *inode, - int (*__fc_track_fn)(struct inode *, void *, bool), + int (*__fc_track_fn)(handle_t *handle, struct inode *, void *, bool), void *args, int enqueue) { bool update = false; @@ -396,7 +396,7 @@ static int ext4_fc_track_template( ext4_fc_reset_inode(inode); ei->i_sync_tid = tid; } - ret = __fc_track_fn(inode, args, update); + ret = __fc_track_fn(handle, inode, args, update); mutex_unlock(&ei->i_fc_lock); if (!enqueue) @@ -420,7 +420,8 @@ struct __track_dentry_update_args { }; /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ -static int __track_dentry_update(struct inode *inode, void *arg, bool update) +static int __track_dentry_update(handle_t *handle, struct inode *inode, + void *arg, bool update) { struct ext4_fc_dentry_update *node; struct ext4_inode_info *ei = EXT4_I(inode); @@ -435,14 +436,14 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) if (IS_ENCRYPTED(dir)) { ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, - NULL); + handle); mutex_lock(&ei->i_fc_lock); return -EOPNOTSUPP; } node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -454,7 +455,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -576,7 +577,8 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) } /* __track_fn for inode tracking */ -static int __track_inode(struct inode *inode, void *arg, bool update) +static int __track_inode(handle_t *handle, struct inode *inode, void *arg, + bool update) { if (update) return -EEXIST; @@ -614,7 +616,8 @@ struct __track_range_args { }; /* __track_fn for tracking data updates */ -static int __track_range(struct inode *inode, void *arg, bool update) +static int __track_range(handle_t *handle, struct inode *inode, void *arg, + bool update) { struct ext4_inode_info *ei = EXT4_I(inode); ext4_lblk_t oldstart; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e04eb08b9060..a2704f064361 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -230,8 +230,8 @@ struct ext4_new_flex_group_data { #define MAX_RESIZE_BG 16384 /* - * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of - * @flexbg_size. + * alloc_flex_gd() allocates an ext4_new_flex_group_data that satisfies the + * resizing from @o_group to @n_group, its size is typically @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure. */ @@ -239,25 +239,27 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size, ext4_group_t o_group, ext4_group_t n_group) { ext4_group_t last_group; + unsigned int max_resize_bg; struct ext4_new_flex_group_data *flex_gd; flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); if (flex_gd == NULL) goto out3; - if (unlikely(flexbg_size > MAX_RESIZE_BG)) - flex_gd->resize_bg = MAX_RESIZE_BG; - else - flex_gd->resize_bg = flexbg_size; + max_resize_bg = umin(flexbg_size, MAX_RESIZE_BG); + flex_gd->resize_bg = max_resize_bg; /* Avoid allocating large 'groups' array if not needed */ last_group = o_group | (flex_gd->resize_bg - 1); if (n_group <= last_group) - flex_gd->resize_bg = 1 << fls(n_group - o_group + 1); + flex_gd->resize_bg = 1 << fls(n_group - o_group); else if (n_group - last_group < flex_gd->resize_bg) - flex_gd->resize_bg = 1 << max(fls(last_group - o_group + 1), + flex_gd->resize_bg = 1 << max(fls(last_group - o_group), fls(n_group - last_group)); + if (WARN_ON_ONCE(flex_gd->resize_bg > max_resize_bg)) + flex_gd->resize_bg = max_resize_bg; + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e0e1956dcdd3..7647e9f6e190 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2559,6 +2559,8 @@ retry: error = ext4_xattr_set_handle(handle, inode, name_index, name, value, value_len, flags); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, + handle); error2 = ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(sb, &retries)) @@ -2566,7 +2568,6 @@ retry: if (error == 0) error = error2; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); return error; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1136539a57a8..47a5c806cf16 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -5,7 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/signal.h> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9756f0f2b7f7..e4d81b8705d1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -5,7 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/mm.h> diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 75722bbd6b5f..3852bb66358c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -19,7 +19,7 @@ #include <linux/uio.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/random.h> #include <linux/iversion.h> #include "fat.h" diff --git a/fs/file.c b/fs/file.c index 5125607d040a..eb093e736972 100644 --- a/fs/file.c +++ b/fs/file.c @@ -272,59 +272,45 @@ static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt) return test_bit(fd, fdt->open_fds); } -static unsigned int count_open_files(struct fdtable *fdt) -{ - unsigned int size = fdt->max_fds; - unsigned int i; - - /* Find the last open fd */ - for (i = size / BITS_PER_LONG; i > 0; ) { - if (fdt->open_fds[--i]) - break; - } - i = (i + 1) * BITS_PER_LONG; - return i; -} - /* * Note that a sane fdtable size always has to be a multiple of * BITS_PER_LONG, since we have bitmaps that are sized by this. * - * 'max_fds' will normally already be properly aligned, but it - * turns out that in the close_range() -> __close_range() -> - * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end - * up having a 'max_fds' value that isn't already aligned. - * - * Rather than make close_range() have to worry about this, - * just make that BITS_PER_LONG alignment be part of a sane - * fdtable size. Becuase that's really what it is. + * punch_hole is optional - when close_range() is asked to unshare + * and close, we don't need to copy descriptors in that range, so + * a smaller cloned descriptor table might suffice if the last + * currently opened descriptor falls into that range. */ -static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) +static unsigned int sane_fdtable_size(struct fdtable *fdt, struct fd_range *punch_hole) { - unsigned int count; - - count = count_open_files(fdt); - if (max_fds < NR_OPEN_DEFAULT) - max_fds = NR_OPEN_DEFAULT; - return ALIGN(min(count, max_fds), BITS_PER_LONG); + unsigned int last = find_last_bit(fdt->open_fds, fdt->max_fds); + + if (last == fdt->max_fds) + return NR_OPEN_DEFAULT; + if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) { + last = find_last_bit(fdt->open_fds, punch_hole->from); + if (last == punch_hole->from) + return NR_OPEN_DEFAULT; + } + return ALIGN(last + 1, BITS_PER_LONG); } /* - * Allocate a new files structure and copy contents from the - * passed in files structure. - * errorp will be valid only when the returned files_struct is NULL. + * Allocate a new descriptor table and copy contents from the passed in + * instance. Returns a pointer to cloned table on success, ERR_PTR() + * on failure. For 'punch_hole' see sane_fdtable_size(). */ -struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) +struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_hole) { struct files_struct *newf; struct file **old_fds, **new_fds; unsigned int open_files, i; struct fdtable *old_fdt, *new_fdt; + int error; - *errorp = -ENOMEM; newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) - goto out; + return ERR_PTR(-ENOMEM); atomic_set(&newf->count, 1); @@ -341,7 +327,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); - open_files = sane_fdtable_size(old_fdt, max_fds); + open_files = sane_fdtable_size(old_fdt, punch_hole); /* * Check whether we need to allocate a larger fd array and fd set. @@ -354,14 +340,14 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { - *errorp = -ENOMEM; + error = -ENOMEM; goto out_release; } /* beyond sysctl_nr_open; nothing to do */ if (unlikely(new_fdt->max_fds < open_files)) { __free_fdtable(new_fdt); - *errorp = -EMFILE; + error = -EMFILE; goto out_release; } @@ -372,7 +358,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); - open_files = sane_fdtable_size(old_fdt, max_fds); + open_files = sane_fdtable_size(old_fdt, punch_hole); } copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG); @@ -406,8 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int out_release: kmem_cache_free(files_cachep, newf); -out: - return NULL; + return ERR_PTR(error); } static struct fdtable *close_files(struct files_struct * files) @@ -748,37 +733,25 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) if (fd > max_fd) return -EINVAL; - if (flags & CLOSE_RANGE_UNSHARE) { - int ret; - unsigned int max_unshare_fds = NR_OPEN_MAX; + if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(&cur_fds->count) > 1) { + struct fd_range range = {fd, max_fd}, *punch_hole = ⦥ /* * If the caller requested all fds to be made cloexec we always * copy all of the file descriptors since they still want to * use them. */ - if (!(flags & CLOSE_RANGE_CLOEXEC)) { - /* - * If the requested range is greater than the current - * maximum, we're closing everything so only copy all - * file descriptors beneath the lowest file descriptor. - */ - rcu_read_lock(); - if (max_fd >= last_fd(files_fdtable(cur_fds))) - max_unshare_fds = fd; - rcu_read_unlock(); - } - - ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); - if (ret) - return ret; + if (flags & CLOSE_RANGE_CLOEXEC) + punch_hole = NULL; + fds = dup_fd(cur_fds, punch_hole); + if (IS_ERR(fds)) + return PTR_ERR(fds); /* * We used to share our file descriptor table, and have now * created a private one, make sure we're using it below. */ - if (fds) - swap(cur_fds, fds); + swap(cur_fds, fds); } if (flags & CLOSE_RANGE_CLOEXEC) diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index ce9346099c72..9592ffcb44e5 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -12,7 +12,7 @@ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/cdrom.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index f5a2476c47bf..237c1c23e855 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -21,7 +21,7 @@ #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/blkdev.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "hpfs.h" diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 11ea747228ae..78ebd265f425 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1321,7 +1321,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) return length; /* - * Don't bother with holes or unwritten extents. + * Don't bother with delalloc reservations, holes or unwritten extents. * * Note that we use srcmap directly instead of iomap_iter_srcmap as * unsharing requires providing a separate source map, and the presence @@ -1330,6 +1330,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) * fork for XFS. */ if (iter->srcmap.type == IOMAP_HOLE || + iter->srcmap.type == IOMAP_DELALLOC || iter->srcmap.type == IOMAP_UNWRITTEN) return length; @@ -1374,11 +1375,15 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, struct iomap_iter iter = { .inode = inode, .pos = pos, - .len = len, .flags = IOMAP_WRITE | IOMAP_UNSHARE, }; + loff_t size = i_size_read(inode); int ret; + if (pos < 0 || pos >= size) + return 0; + + iter.len = min(len, size - pos); while ((ret = iomap_iter(&iter, ops)) > 0) iter.processed = iomap_unshare_iter(&iter); return ret; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index dcdc191ed183..2d55207c9a99 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -3,7 +3,7 @@ #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/iso_fs.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> enum isofs_file_format { isofs_file_normal = 0, diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 87a0f207df0b..b8fc732e1c67 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -18,7 +18,7 @@ #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "netns.h" diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 63280791de3b..78fe5796b2b2 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -102,7 +102,7 @@ void netfs_clear_buffer(struct netfs_io_request *rreq) while ((p = rreq->buffer)) { rreq->buffer = p->next; - for (int slot = 0; slot < folioq_nr_slots(p); slot++) { + for (int slot = 0; slot < folioq_count(p); slot++) { struct folio *folio = folioq_folio(p, slot); if (!folio) continue; diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 0929d9fd4ce7..bf6d507578e5 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -317,6 +317,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, struct netfs_io_stream *stream; struct netfs_group *fgroup; /* TODO: Use this with ceph */ struct netfs_folio *finfo; + size_t iter_off = 0; size_t fsize = folio_size(folio), flen = fsize, foff = 0; loff_t fpos = folio_pos(folio), i_size; bool to_eof = false, streamw = false; @@ -472,7 +473,12 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (choose_s < 0) break; stream = &wreq->io_streams[choose_s]; - wreq->io_iter.iov_offset = stream->submit_off; + + /* Advance the iterator(s). */ + if (stream->submit_off > iter_off) { + iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off); + iter_off = stream->submit_off; + } atomic64_set(&wreq->issued_to, fpos + stream->submit_off); stream->submit_extendable_to = fsize - stream->submit_off; @@ -487,8 +493,8 @@ static int netfs_write_folio(struct netfs_io_request *wreq, debug = true; } - wreq->io_iter.iov_offset = 0; - iov_iter_advance(&wreq->io_iter, fsize); + if (fsize > iter_off) + iov_iter_advance(&wreq->io_iter, fsize - iter_off); atomic64_set(&wreq->issued_to, fpos + fsize); if (!debug) @@ -503,6 +509,30 @@ static int netfs_write_folio(struct netfs_io_request *wreq, } /* + * End the issuing of writes, letting the collector know we're done. + */ +static void netfs_end_issue_write(struct netfs_io_request *wreq) +{ + bool needs_poke = true; + + smp_wmb(); /* Write subreq lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); + + for (int s = 0; s < NR_IO_STREAMS; s++) { + struct netfs_io_stream *stream = &wreq->io_streams[s]; + + if (!stream->active) + continue; + if (!list_empty(&stream->subrequests)) + needs_poke = false; + netfs_issue_write(wreq, stream); + } + + if (needs_poke) + netfs_wake_write_collector(wreq, false); +} + +/* * Write some of the pending data back to the server */ int netfs_writepages(struct address_space *mapping, @@ -553,10 +583,7 @@ int netfs_writepages(struct address_space *mapping, break; } while ((folio = writeback_iter(mapping, wbc, folio, &error))); - for (int s = 0; s < NR_IO_STREAMS; s++) - netfs_issue_write(wreq, &wreq->io_streams[s]); - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); + netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); @@ -644,10 +671,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr if (writethrough_cache) netfs_write_folio(wreq, wbc, writethrough_cache); - netfs_issue_write(wreq, &wreq->io_streams[0]); - netfs_issue_write(wreq, &wreq->io_streams[1]); - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); + netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); @@ -693,13 +717,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t break; } - netfs_issue_write(wreq, upload); - - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); - if (list_empty(&upload->subrequests)) - netfs_wake_write_collector(wreq, false); - + netfs_end_issue_write(wreq); _leave(" = %d", error); return error; } diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 19bb88c7eebd..ea1ca374cdab 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -792,7 +792,7 @@ nfsd_file_cache_init(void) } nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + 0); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); diff --git a/fs/nls/nls_ucs2_utils.c b/fs/nls/nls_ucs2_utils.c index d4564b79d7bf..b81c298e4966 100644 --- a/fs/nls/nls_ucs2_utils.c +++ b/fs/nls/nls_ucs2_utils.c @@ -13,7 +13,7 @@ #include <linux/fs.h> #include <linux/module.h> #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "nls_ucs2_utils.h" MODULE_DESCRIPTION("NLS UCS-2"); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 46440fbb8662..d5dbef7f5c95 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -406,8 +406,7 @@ static int __init dnotify_init(void) SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); dnotify_sysctl_init(); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 13454e5fd3fb..9644bc72e457 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1480,7 +1480,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ group = fsnotify_alloc_group(&fanotify_fsnotify_ops, - FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS); + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) { return PTR_ERR(group); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 272c8a1dab3c..82ae8254c068 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -183,8 +183,10 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); /* Did either inode/sb/mount subscribe for events with parent/name? */ - marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); - marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); + marks_mask |= fsnotify_parent_needed_mask( + READ_ONCE(inode->i_fsnotify_mask)); + marks_mask |= fsnotify_parent_needed_mask( + READ_ONCE(inode->i_sb->s_fsnotify_mask)); marks_mask |= fsnotify_parent_needed_mask(mnt_mask); /* Did they subscribe for this event with parent/name info? */ @@ -195,8 +197,8 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, __u32 mask) { - __u32 marks_mask = inode->i_fsnotify_mask | mnt_mask | - inode->i_sb->s_fsnotify_mask; + __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | + READ_ONCE(inode->i_sb->s_fsnotify_mask); return mask & marks_mask & ALL_FSNOTIFY_EVENTS; } @@ -213,7 +215,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { const struct path *path = fsnotify_data_path(data, data_type); - __u32 mnt_mask = path ? real_mount(path->mnt)->mnt_fsnotify_mask : 0; + __u32 mnt_mask = path ? + READ_ONCE(real_mount(path->mnt)->mnt_fsnotify_mask) : 0; struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -557,13 +560,13 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, (!inode2 || !inode2->i_fsnotify_marks)) return 0; - marks_mask = sb->s_fsnotify_mask; + marks_mask = READ_ONCE(sb->s_fsnotify_mask); if (mnt) - marks_mask |= mnt->mnt_fsnotify_mask; + marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); if (inode) - marks_mask |= inode->i_fsnotify_mask; + marks_mask |= READ_ONCE(inode->i_fsnotify_mask); if (inode2) - marks_mask |= inode2->i_fsnotify_mask; + marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); /* diff --git a/fs/notify/group.c b/fs/notify/group.c index 1de6631a3925..18446b7b0d49 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -115,7 +115,6 @@ static struct fsnotify_group *__fsnotify_alloc_group( const struct fsnotify_ops *ops, int flags, gfp_t gfp) { - static struct lock_class_key nofs_marks_lock; struct fsnotify_group *group; group = kzalloc(sizeof(struct fsnotify_group), gfp); @@ -136,16 +135,6 @@ static struct fsnotify_group *__fsnotify_alloc_group( group->ops = ops; group->flags = flags; - /* - * For most backends, eviction of inode with a mark is not expected, - * because marks hold a refcount on the inode against eviction. - * - * Use a different lockdep class for groups that support evictable - * inode marks, because with evictable marks, mark_mutex is NOT - * fs-reclaim safe - the mutex is taken when evicting inodes. - */ - if (flags & FSNOTIFY_GROUP_NOFS) - lockdep_set_class(&group->mark_mutex, &nofs_marks_lock); return group; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index c7e451d5bd51..0794dcaf1e47 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -569,7 +569,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* more bits in old than in new? */ int dropped = (old_mask & ~new_mask); /* more bits in this fsn_mark than the inode's mask? */ - int do_inode = (new_mask & ~inode->i_fsnotify_mask); + int do_inode = (new_mask & ~READ_ONCE(inode->i_fsnotify_mask)); /* update the inode with this new fsn_mark */ if (dropped || do_inode) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 5e170e713088..c45b222cf9c1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -128,7 +128,7 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) return 0; - return *fsnotify_conn_mask_p(conn); + return READ_ONCE(*fsnotify_conn_mask_p(conn)); } static void fsnotify_get_sb_watched_objects(struct super_block *sb) @@ -245,7 +245,11 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) want_iref = true; } - *fsnotify_conn_mask_p(conn) = new_mask; + /* + * We use WRITE_ONCE() to prevent silly compiler optimizations from + * confusing readers not holding conn->lock with partial updates. + */ + WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); return fsnotify_update_iref(conn, want_iref); } diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h index dd7ced000d0e..f0cad9c4a289 100644 --- a/fs/ntfs3/lib/decompress_common.h +++ b/fs/ntfs3/lib/decompress_common.h @@ -12,7 +12,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> /* "Force inline" macro (not required, but helpful for performance) */ diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index e2df7eeadc7a..3d4b883a7660 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -53,7 +53,7 @@ #include <linux/exportfs.h> #include <linux/hashtable.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "orangefs-dev-proto.h" diff --git a/fs/pidfs.c b/fs/pidfs.c index 7ffdc88dfb52..80675b6bf884 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -120,6 +120,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct nsproxy *nsp __free(put_nsproxy) = NULL; struct pid *pid = pidfd_pid(file); struct ns_common *ns_common = NULL; + struct pid_namespace *pid_ns; if (arg) return -EINVAL; @@ -202,7 +203,9 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PIDFD_GET_PID_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { rcu_read_lock(); - ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task))); + pid_ns = task_active_pid_ns(task); + if (pid_ns) + ns_common = to_ns_common(get_pid_ns(pid_ns)); rcu_read_unlock(); } break; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 72c53129c952..d39ee5f6c075 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -12,7 +12,7 @@ #include <linux/highmem.h> #include <linux/slab.h> #include <linux/uaccess.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/writeback.h> diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index f0e1f29f20ee..12fc20af8e17 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -11,7 +11,7 @@ #include <linux/sched.h> #include <linux/bug.h> #include <linux/workqueue.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/bitops.h> #include <linux/proc_fs.h> #include <linux/buffer_head.h> diff --git a/fs/smb/client/cifsacl.h b/fs/smb/client/cifsacl.h index 6529478b7f48..31b51a8fc256 100644 --- a/fs/smb/client/cifsacl.h +++ b/fs/smb/client/cifsacl.h @@ -55,7 +55,7 @@ struct smb3_sd { #define ACL_CONTROL_SI 0x0800 /* SACL Auto-Inherited */ #define ACL_CONTROL_DI 0x0400 /* DACL Auto-Inherited */ #define ACL_CONTROL_SC 0x0200 /* SACL computed through inheritance */ -#define ACL_CONTROL_DC 0x0100 /* DACL computed through inheritence */ +#define ACL_CONTROL_DC 0x0100 /* DACL computed through inheritance */ #define ACL_CONTROL_SS 0x0080 /* Create server ACL */ #define ACL_CONTROL_DT 0x0040 /* DACL provided by trusted source */ #define ACL_CONTROL_SD 0x0020 /* SACL defaulted */ diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 2d851f596a72..7a43daacc815 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -239,7 +239,7 @@ int cifs_verify_signature(struct smb_rqst *rqst, cifs_dbg(FYI, "dummy signature received for smb command 0x%x\n", cifs_pdu->Command); - /* save off the origiginal signature so we can modify the smb and check + /* save off the original signature so we can modify the smb and check its signature against what the server sent */ memcpy(server_response_sig, cifs_pdu->Signature.SecuritySignature, 8); @@ -700,6 +700,7 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server) cifs_free_hash(&server->secmech.aes_cmac); cifs_free_hash(&server->secmech.hmacsha256); cifs_free_hash(&server->secmech.md5); + cifs_free_hash(&server->secmech.sha512); if (!SERVER_IS_CHAN(server)) { if (server->secmech.enc) { diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 2a2523c93944..000e1ef3beea 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -161,7 +161,7 @@ __u32 cifs_lock_secret; /* * Bumps refcount for cifs super block. - * Note that it should be only called if a referece to VFS super block is + * Note that it should be only called if a reference to VFS super block is * already held, e.g. in open-type syscalls context. Otherwise it can race with * atomic_dec_and_test in deactivate_locked_super. */ @@ -289,7 +289,7 @@ static void cifs_kill_sb(struct super_block *sb) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); /* - * We ned to release all dentries for the cached directories + * We need to release all dentries for the cached directories * before we kill the sb. */ if (cifs_sb->root) { @@ -313,8 +313,17 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; int rc = 0; + const char *full_path; + void *page; xid = get_xid(); + page = alloc_dentry_path(); + + full_path = build_path_from_dentry(dentry, page); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + goto statfs_out; + } if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0) buf->f_namelen = @@ -330,8 +339,10 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = 0; /* unlimited */ if (server->ops->queryfs) - rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); + rc = server->ops->queryfs(xid, tcon, full_path, cifs_sb, buf); +statfs_out: + free_dentry_path(page); free_xid(xid); return rc; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 315aac5dec05..5041b1ffc244 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -180,6 +180,7 @@ struct session_key { struct cifs_secmech { struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ + struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ @@ -480,7 +481,7 @@ struct smb_version_operations { __u16 net_fid, struct cifsInodeInfo *cifs_inode); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, - struct cifs_sb_info *, struct kstatfs *); + const char *, struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); @@ -774,7 +775,7 @@ struct TCP_Server_Info { } compression; __u16 signing_algorithm; __le16 cipher_type; - /* save initital negprot hash */ + /* save initial negprot hash */ __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; bool signing_negotiated; /* true if valid signing context rcvd from server */ bool posix_ext_supported; diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index c3b6263060b0..ee78bb6741d6 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -10,7 +10,7 @@ #define _CIFSPDU_H #include <net/sock.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "../common/smbfsctl.h" #define CIFS_PROT 0 @@ -781,7 +781,7 @@ typedef struct smb_com_logoff_andx_rsp { __u16 ByteCount; } __attribute__((packed)) LOGOFF_ANDX_RSP; -typedef union smb_com_tree_disconnect { /* as an altetnative can use flag on +typedef union smb_com_tree_disconnect { /* as an alternative can use flag on tree_connect PDU to effect disconnect */ /* tdis is probably simplest SMB PDU */ struct { @@ -2406,7 +2406,7 @@ struct cifs_posix_ace { /* access control entry (ACE) */ __le64 cifs_uid; /* or gid */ } __attribute__((packed)); -struct cifs_posix_acl { /* access conrol list (ACL) */ +struct cifs_posix_acl { /* access control list (ACL) */ __le16 version; __le16 access_entry_count; /* access ACL - count of entries */ __le16 default_entry_count; /* default ACL - count of entries */ diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 131f20b91c3e..c6f15dbe860a 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1215,7 +1215,7 @@ openRetry: req->CreateDisposition = cpu_to_le32(disposition); req->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK); - /* BB Expirement with various impersonation levels and verify */ + /* BB Experiment with various impersonation levels and verify */ req->ImpersonationLevel = cpu_to_le32(SECURITY_IMPERSONATION); req->SecurityFlags = SECURITY_CONTEXT_TRACKING|SECURITY_EFFECTIVE_ONLY; @@ -3018,7 +3018,7 @@ static void cifs_init_ace(struct cifs_posix_ace *cifs_ace, /** * posix_acl_to_cifs - convert ACLs from POSIX ACL to cifs format - * @parm_data: ACLs in cifs format to conver to + * @parm_data: ACLs in cifs format to convert to * @acl: ACLs in POSIX ACL format to convert from * @acl_type: the type of POSIX ACLs stored in @acl * @@ -3995,7 +3995,7 @@ findFirstRetry: name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, PATH_MAX, nls_codepage, remap); - /* We can not add the asterik earlier in case + /* We can not add the asterisk earlier in case it got remapped to 0xF03A as if it were part of the directory name instead of a wildcard */ name_len *= 2; diff --git a/fs/smb/client/compress/lz77.c b/fs/smb/client/compress/lz77.c index 553e253ada29..96e8a8057a77 100644 --- a/fs/smb/client/compress/lz77.c +++ b/fs/smb/client/compress/lz77.c @@ -9,7 +9,7 @@ #include <linux/slab.h> #include <linux/sizes.h> #include <linux/count_zeros.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "lz77.h" diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 78b59c4ef3ce..a58a3333ecc3 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2502,7 +2502,7 @@ refind_writable: } } } - /* couldn't find useable FH with same pid, try any available */ + /* couldn't find usable FH with same pid, try any available */ if (!any_available) { any_available = true; goto refind_writable; diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 69f9d938b336..890d6d9d4a59 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -260,7 +260,7 @@ struct smb3_fs_context { unsigned int min_offload; unsigned int retrans; bool sockopt_tcp_nodelay:1; - /* attribute cache timemout for files and directories in jiffies */ + /* attribute cache timeout for files and directories in jiffies */ unsigned long acregmax; unsigned long acdirmax; /* timeout for deferred close of files in jiffies */ diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 647f9bedd9fc..eff3f57235ee 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -629,10 +629,16 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, &symlink_len_utf16, &symlink_buf_utf16, &buf_type); + /* + * Check that read buffer has valid length and does not + * contain UTF-16 null codepoint (via UniStrnlen() call) + * because Linux cannot process symlink with null byte. + */ if ((rc == 0) && (symlink_len_utf16 > 0) && (symlink_len_utf16 < fattr->cf_eof-8 + 1) && - (symlink_len_utf16 % 2 == 0)) { + (symlink_len_utf16 % 2 == 0) && + (UniStrnlen((wchar_t *)symlink_buf_utf16, symlink_len_utf16/2) == symlink_len_utf16/2)) { fattr->cf_symlink_target = cifs_strndup_from_utf16(symlink_buf_utf16, symlink_len_utf16, diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 054f10ebf65a..4373dd64b66d 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -254,7 +254,7 @@ free_rsp_buf(int resp_buftype, void *rsp) } /* NB: MID can not be set if treeCon not passed in, in that - case it is responsbility of caller to set the mid */ + case it is responsibility of caller to set the mid */ void header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , const struct cifs_tcon *treeCon, int word_count diff --git a/fs/smb/client/netmisc.c b/fs/smb/client/netmisc.c index 1b52e6ac431c..2a8d71221e5e 100644 --- a/fs/smb/client/netmisc.c +++ b/fs/smb/client/netmisc.c @@ -1003,7 +1003,7 @@ struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) year is 2**7, the last year is 1980+127, which means we need only consider 2 special case years, ie the years 2000 and 2100, and only adjust for the lack of leap year for the year 2100, as 2000 was a - leap year (divisable by 400) */ + leap year (divisible by 400) */ if (year >= 120) /* the year 2100 */ days = days - 1; /* do not count leap year for the year 2100 */ diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index ebe1cb30e18e..b3a8f9c6fcff 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -553,7 +553,7 @@ static void cifs_fill_dirent_std(struct cifs_dirent *de, const FIND_FILE_STANDARD_INFO *info) { de->name = &info->FileName[0]; - /* one byte length, no endianess conversion */ + /* one byte length, no endianness conversion */ de->namelen = info->FileNameLength; de->resume_key = info->ResumeKey; } @@ -815,7 +815,7 @@ static bool emit_cached_dirents(struct cached_dirents *cde, * However, this sequence of ->pos values may have holes * in it, for example dot-dirs returned from the server * are suppressed. - * Handle this bu forcing ctx->pos to be the same as the + * Handle this by forcing ctx->pos to be the same as the * ->pos of the current dirent we emit from the cache. * This means that when we emit these entries from the cache * we now emit them with the same ->pos value as in the diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 3b48a093cfb1..c848b5e88d32 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -320,22 +320,51 @@ static int parse_reparse_posix(struct reparse_posix_data *buf, unsigned int len; u64 type; + len = le16_to_cpu(buf->ReparseDataLength); + if (len < sizeof(buf->InodeType)) { + cifs_dbg(VFS, "srv returned malformed nfs buffer\n"); + return -EIO; + } + + len -= sizeof(buf->InodeType); + switch ((type = le64_to_cpu(buf->InodeType))) { case NFS_SPECFILE_LNK: - len = le16_to_cpu(buf->ReparseDataLength); + if (len == 0 || (len % 2)) { + cifs_dbg(VFS, "srv returned malformed nfs symlink buffer\n"); + return -EIO; + } + /* + * Check that buffer does not contain UTF-16 null codepoint + * because Linux cannot process symlink with null byte. + */ + if (UniStrnlen((wchar_t *)buf->DataBuffer, len/2) != len/2) { + cifs_dbg(VFS, "srv returned null byte in nfs symlink target location\n"); + return -EIO; + } data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer, len, true, cifs_sb->local_nls); if (!data->symlink_target) return -ENOMEM; - convert_delimiter(data->symlink_target, '/'); cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target); break; case NFS_SPECFILE_CHR: case NFS_SPECFILE_BLK: + /* DataBuffer for block and char devices contains two 32-bit numbers */ + if (len != 8) { + cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type); + return -EIO; + } + break; case NFS_SPECFILE_FIFO: case NFS_SPECFILE_SOCK: + /* DataBuffer for fifos and sockets is empty */ + if (len != 0) { + cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type); + return -EIO; + } break; default: cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n", @@ -482,12 +511,18 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, u32 tag = data->reparse.tag; if (tag == IO_REPARSE_TAG_NFS && buf) { + if (le16_to_cpu(buf->ReparseDataLength) < sizeof(buf->InodeType)) + return false; switch (le64_to_cpu(buf->InodeType)) { case NFS_SPECFILE_CHR: + if (le16_to_cpu(buf->ReparseDataLength) != sizeof(buf->InodeType) + 8) + return false; fattr->cf_mode |= S_IFCHR; fattr->cf_rdev = reparse_mkdev(buf->DataBuffer); break; case NFS_SPECFILE_BLK: + if (le16_to_cpu(buf->ReparseDataLength) != sizeof(buf->InodeType) + 8) + return false; fattr->cf_mode |= S_IFBLK; fattr->cf_rdev = reparse_mkdev(buf->DataBuffer); break; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 03c0b484a4b5..3216f786908f 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -624,7 +624,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, * to sign packets before we generate the channel signing key * (we sign with the session key) */ - rc = smb3_crypto_shash_allocate(chan->server); + rc = smb311_crypto_shash_allocate(chan->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); mutex_unlock(&ses->session_mutex); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index e03c91a49650..9a6ece66c4d3 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -909,7 +909,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, static int cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, struct kstatfs *buf) + const char *path, struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc = -EOPNOTSUPP; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index b992117377e9..4e9e225520a6 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1205,9 +1205,12 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsFileInfo *cfile; struct inode *new = NULL; + int out_buftype[4] = {}; + struct kvec out_iov[4] = {}; struct kvec in_iov[2]; int cmds[2]; int rc; + int i; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, SYNCHRONIZE | DELETE | @@ -1228,7 +1231,7 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, cmds[1] = SMB2_OP_POSIX_QUERY_INFO; cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, - in_iov, cmds, 2, cfile, NULL, NULL, NULL); + in_iov, cmds, 2, cfile, out_iov, out_buftype, NULL); if (!rc) { rc = smb311_posix_get_inode_info(&new, full_path, data, sb, xid); @@ -1237,12 +1240,29 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, cmds[1] = SMB2_OP_QUERY_INFO; cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, - in_iov, cmds, 2, cfile, NULL, NULL, NULL); + in_iov, cmds, 2, cfile, out_iov, out_buftype, NULL); if (!rc) { rc = cifs_get_inode_info(&new, full_path, data, sb, xid, NULL); } } + + + /* + * If CREATE was successful but SMB2_OP_SET_REPARSE failed then + * remove the intermediate object created by CREATE. Otherwise + * empty object stay on the server when reparse call failed. + */ + if (rc && + out_iov[0].iov_base != NULL && out_buftype[0] != CIFS_NO_BUFFER && + ((struct smb2_hdr *)out_iov[0].iov_base)->Status == STATUS_SUCCESS && + (out_iov[1].iov_base == NULL || out_buftype[1] == CIFS_NO_BUFFER || + ((struct smb2_hdr *)out_iov[1].iov_base)->Status != STATUS_SUCCESS)) + smb2_unlink(xid, tcon, full_path, cifs_sb, NULL); + + for (i = 0; i < ARRAY_SIZE(out_buftype); i++) + free_rsp_buf(out_buftype[i], out_iov[i].iov_base); + return rc ? ERR_PTR(rc) : new; } diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index bdeb12ff53e3..f3c4b70b77b9 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -906,41 +906,41 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, || (hdr->Status != cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)))) return 0; + ok: - rc = cifs_alloc_hash("sha512", &sha512); - if (rc) { - cifs_dbg(VFS, "%s: Could not allocate SHA512 shash, rc=%d\n", __func__, rc); + rc = smb311_crypto_shash_allocate(server); + if (rc) return rc; - } + sha512 = server->secmech.sha512; rc = crypto_shash_init(sha512); if (rc) { - cifs_dbg(VFS, "%s: Could not init SHA512 shash, rc=%d\n", __func__, rc); - goto err_free; + cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__); + return rc; } rc = crypto_shash_update(sha512, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); if (rc) { - cifs_dbg(VFS, "%s: Could not update SHA512 shash, rc=%d\n", __func__, rc); - goto err_free; + cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__); + return rc; } for (i = 0; i < nvec; i++) { rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len); if (rc) { - cifs_dbg(VFS, "%s: Could not update SHA512 shash, rc=%d\n", __func__, rc); - goto err_free; + cifs_dbg(VFS, "%s: Could not update sha512 shash\n", + __func__); + return rc; } } rc = crypto_shash_final(sha512, ses->preauth_sha_hash); if (rc) { - cifs_dbg(VFS, "%s: Could not finalize SHA12 shash, rc=%d\n", __func__, rc); - goto err_free; + cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n", + __func__); + return rc; } -err_free: - cifs_free_hash(&sha512); return 0; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 177173072bfa..6b385fce3f2a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2177,7 +2177,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, NULL, 0 /* no input data */, max_response_size, (char **)&retbuf, &ret_data_len); - cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n", + cifs_dbg(FYI, "enum snapshots ioctl returned %d and ret buflen is %d\n", rc, ret_data_len); if (rc) return rc; @@ -2838,7 +2838,7 @@ out_free_path: static int smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, struct kstatfs *buf) + const char *path, struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { struct smb2_query_info_rsp *rsp; struct smb2_fs_full_size_info *info = NULL; @@ -2847,7 +2847,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, int rc; - rc = smb2_query_info_compound(xid, tcon, "", + rc = smb2_query_info_compound(xid, tcon, path, FILE_READ_ATTRIBUTES, FS_FULL_SIZE_INFORMATION, SMB2_O_INFO_FILESYSTEM, @@ -2875,28 +2875,33 @@ qfs_exit: static int smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, struct kstatfs *buf) + const char *path, struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc; - __le16 srch_path = 0; /* Null - open root of share */ + __le16 *utf16_path = NULL; u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct cifs_open_parms oparms; struct cifs_fid fid; if (!tcon->posix_extensions) - return smb2_queryfs(xid, tcon, cifs_sb, buf); + return smb2_queryfs(xid, tcon, path, cifs_sb, buf); oparms = (struct cifs_open_parms) { .tcon = tcon, - .path = "", + .path = path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = &fid, }; - rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (utf16_path == NULL) + return -ENOMEM; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); + kfree(utf16_path); if (rc) return rc; @@ -3583,7 +3588,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, /* * At this point, we are trying to fallocate an internal * regions of a sparse file. Since smb2 does not have a - * fallocate command we have two otions on how to emulate this. + * fallocate command we have two options on how to emulate this. * We can either turn the entire file to become non-sparse * which we only do if the fallocate is for virtually * the whole file, or we can overwrite the region with zeroes diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 02828b9c3cb3..b2f16a7b696d 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2986,7 +2986,7 @@ replay_again: SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId); - /* Eventually save off posix specific response info and timestaps */ + /* Eventually save off posix specific response info and timestamps */ err_free_rsp_buf: free_rsp_buf(resp_buftype, rsp); @@ -4581,7 +4581,7 @@ smb2_readv_callback(struct mid_q_entry *mid) } #ifdef CONFIG_CIFS_SMB_DIRECT /* - * If this rdata has a memmory registered, the MR can be freed + * If this rdata has a memory registered, the MR can be freed * MR needs to be freed as soon as I/O finishes to prevent deadlock * because they have limited number and are used for future I/Os */ diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 56a896ff7cd9..c7e1b149877a 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -291,7 +291,7 @@ extern int smb2_validate_and_copy_iov(unsigned int offset, extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); -extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server); +extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); extern int smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index f7e04c40d22e..b486b14bb330 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -26,7 +26,8 @@ #include "../common/smb2status.h" #include "smb2glob.h" -int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) +static int +smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; int rc; @@ -45,6 +46,33 @@ err: return rc; } +int +smb311_crypto_shash_allocate(struct TCP_Server_Info *server) +{ + struct cifs_secmech *p = &server->secmech; + int rc = 0; + + rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); + if (rc) + return rc; + + rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); + if (rc) + goto err; + + rc = cifs_alloc_hash("sha512", &p->sha512); + if (rc) + goto err; + + return 0; + +err: + cifs_free_hash(&p->aes_cmac); + cifs_free_hash(&p->hmacsha256); + return rc; +} + + static int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { @@ -668,7 +696,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) shdr->Command); /* - * Save off the origiginal signature so we can modify the smb and check + * Save off the original signature so we can modify the smb and check * our calculated signature against what the server sent. */ memcpy(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 0c64b37e2660..b0b7254661e9 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -219,7 +219,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: - /* This happenes when we fail the negotiation */ + /* This happens when we fail the negotiation */ if (info->transport_status == SMBD_NEGOTIATE_FAILED) { info->transport_status = SMBD_DISCONNECTED; wake_up(&info->conn_wait); @@ -1344,7 +1344,7 @@ void smbd_destroy(struct TCP_Server_Info *server) * are not locked by srv_mutex. It is possible some processes are * blocked on transport srv_mutex while holding memory registration. * Release the transport srv_mutex to allow them to hit the failure - * path when sending data, and then release memory registartions. + * path when sending data, and then release memory registrations. */ log_rdma_event(INFO, "freeing mr list\n"); wake_up_interruptible_all(&info->wait_mr); diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 83f239f376f0..c08e3665150d 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -111,7 +111,7 @@ struct smbd_connection { /* Used by transport to wait until all MRs are returned */ wait_queue_head_t wait_for_mr_cleanup; - /* Activity accoutning */ + /* Activity accounting */ atomic_t send_pending; wait_queue_head_t wait_send_pending; wait_queue_head_t wait_post_send; diff --git a/fs/smb/common/smbfsctl.h b/fs/smb/common/smbfsctl.h index a94d658b88e8..4b379e84c46b 100644 --- a/fs/smb/common/smbfsctl.h +++ b/fs/smb/common/smbfsctl.h @@ -140,20 +140,21 @@ /* Used by the DFS filter See MS-DFSC */ #define IO_REPARSE_TAG_DFSR 0x80000012 #define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B -/* See section MS-FSCC 2.1.2.4 */ +/* Native SMB symlinks since Windows Vista, see MS-FSCC 2.1.2.4 */ #define IO_REPARSE_TAG_SYMLINK 0xA000000C #define IO_REPARSE_TAG_DEDUP 0x80000013 #define IO_REPARSE_APPXSTREAM 0xC0000014 -/* NFS symlinks, Win 8/SMB3 and later */ +/* NFS special files used by Windows NFS server since Windows Server 2012, see MS-FSCC 2.1.2.6 */ #define IO_REPARSE_TAG_NFS 0x80000014 /* * AzureFileSync - see * https://docs.microsoft.com/en-us/azure/storage/files/storage-sync-cloud-tiering */ #define IO_REPARSE_TAG_AZ_FILE_SYNC 0x8000001e +/* Native Win32 AF_UNIX sockets since Windows 10 April 2018 Update, used also by WSL */ +#define IO_REPARSE_TAG_AF_UNIX 0x80000023 /* WSL reparse tags */ #define IO_REPARSE_TAG_LX_SYMLINK 0xA000001D -#define IO_REPARSE_TAG_AF_UNIX 0x80000023 #define IO_REPARSE_TAG_LX_FIFO 0x80000024 #define IO_REPARSE_TAG_LX_CHR 0x80000025 #define IO_REPARSE_TAG_LX_BLK 0x80000026 diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7460089c186f..797b0f24097b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4883,7 +4883,7 @@ static void get_file_alternate_info(struct ksmbd_work *work, spin_unlock(&dentry->d_lock); file_info->FileNameLength = cpu_to_le32(conv_len); rsp->OutputBufferLength = - cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len); + cpu_to_le32(struct_size(file_info, FileName, conv_len)); } static int get_file_stream_info(struct ksmbd_work *work, @@ -7562,7 +7562,6 @@ static int fsctl_copychunk(struct ksmbd_work *work, ci_rsp->TotalBytesWritten = cpu_to_le32(ksmbd_server_side_copy_max_total_size()); - chunks = (struct srv_copychunk *)&ci_req->Chunks[0]; chunk_count = le32_to_cpu(ci_req->ChunkCount); if (chunk_count == 0) goto out; @@ -7570,12 +7569,12 @@ static int fsctl_copychunk(struct ksmbd_work *work, /* verify the SRV_COPYCHUNK_COPY packet */ if (chunk_count > ksmbd_server_side_copy_max_chunk_count() || - input_count < offsetof(struct copychunk_ioctl_req, Chunks) + - chunk_count * sizeof(struct srv_copychunk)) { + input_count < struct_size(ci_req, Chunks, chunk_count)) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; return -EINVAL; } + chunks = &ci_req->Chunks[0]; for (i = 0; i < chunk_count; i++) { if (le32_to_cpu(chunks[i].Length) == 0 || le32_to_cpu(chunks[i].Length) > ksmbd_server_side_copy_max_chunk_size()) diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 73aff20e22d0..649dacf7e8c4 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -190,13 +190,6 @@ struct resume_key_ioctl_rsp { __u8 Context[4]; /* ignored, Windows sets to 4 bytes of zero */ } __packed; -struct copychunk_ioctl_req { - __le64 ResumeKey[3]; - __le32 ChunkCount; - __le32 Reserved; - __u8 Chunks[]; /* array of srv_copychunk */ -} __packed; - struct srv_copychunk { __le64 SourceOffset; __le64 TargetOffset; @@ -204,6 +197,13 @@ struct srv_copychunk { __le32 Reserved; } __packed; +struct copychunk_ioctl_req { + __le64 ResumeKey[3]; + __le32 ChunkCount; + __le32 Reserved; + struct srv_copychunk Chunks[] __counted_by_le(ChunkCount); +} __packed; + struct copychunk_ioctl_rsp { __le32 ChunksWritten; __le32 ChunkBytesWritten; diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 44c87e300c16..17c76713c6d0 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1405,8 +1405,8 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, /* build rdma_rw_ctx for each descriptor */ desc_buf = buf; for (i = 0; i < desc_num; i++) { - msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) + - sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL); + msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), + GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; diff --git a/fs/smb/server/unicode.c b/fs/smb/server/unicode.c index 43ed29ee44ea..217106ff7b82 100644 --- a/fs/smb/server/unicode.c +++ b/fs/smb/server/unicode.c @@ -8,7 +8,7 @@ */ #include <linux/fs.h> #include <linux/slab.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "glob.h" #include "unicode.h" #include "smb_common.h" diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index d8fc11765d61..807c493ed0cd 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -370,6 +370,7 @@ static void udf_table_free_blocks(struct super_block *sb, struct extent_position oepos, epos; int8_t etype; struct udf_inode_info *iinfo; + int ret = 0; mutex_lock(&sbi->s_alloc_mutex); iinfo = UDF_I(table); @@ -383,8 +384,12 @@ static void udf_table_free_blocks(struct super_block *sb, epos.block = oepos.block = iinfo->i_location; epos.bh = oepos.bh = NULL; - while (count && - (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while (count) { + ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) + goto error_return; + if (ret == 0) + break; if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == start)) { if ((0x3FFFFFFF - elen) < @@ -459,11 +464,8 @@ static void udf_table_free_blocks(struct super_block *sb, adsize = sizeof(struct short_ad); else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(struct long_ad); - else { - brelse(oepos.bh); - brelse(epos.bh); + else goto error_return; - } if (epos.offset + (2 * adsize) > sb->s_blocksize) { /* Steal a block from the extent being free'd */ @@ -479,10 +481,10 @@ static void udf_table_free_blocks(struct super_block *sb, __udf_add_aext(table, &epos, &eloc, elen, 1); } +error_return: brelse(epos.bh); brelse(oepos.bh); -error_return: mutex_unlock(&sbi->s_alloc_mutex); return; } @@ -498,6 +500,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, struct extent_position epos; int8_t etype = -1; struct udf_inode_info *iinfo; + int ret = 0; if (first_block >= sbi->s_partmaps[partition].s_partition_len) return 0; @@ -516,11 +519,14 @@ static int udf_table_prealloc_blocks(struct super_block *sb, epos.bh = NULL; eloc.logicalBlockNum = 0xFFFFFFFF; - while (first_block != eloc.logicalBlockNum && - (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while (first_block != eloc.logicalBlockNum) { + ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) + goto err_out; + if (ret == 0) + break; udf_debug("eloc=%u, elen=%u, first_block=%u\n", eloc.logicalBlockNum, elen, first_block); - ; /* empty loop body */ } if (first_block == eloc.logicalBlockNum) { @@ -539,6 +545,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, alloc_count = 0; } +err_out: brelse(epos.bh); if (alloc_count) @@ -560,6 +567,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, struct extent_position epos, goal_epos; int8_t etype; struct udf_inode_info *iinfo = UDF_I(table); + int ret = 0; *err = -ENOSPC; @@ -583,8 +591,10 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, epos.block = iinfo->i_location; epos.bh = goal_epos.bh = NULL; - while (spread && - (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while (spread) { + ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); + if (ret <= 0) + break; if (goal >= eloc.logicalBlockNum) { if (goal < eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) @@ -612,9 +622,11 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, brelse(epos.bh); - if (spread == 0xFFFFFFFF) { + if (ret < 0 || spread == 0xFFFFFFFF) { brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); + if (ret < 0) + *err = ret; return 0; } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 93153665eb37..632453aa3893 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -166,13 +166,19 @@ static struct buffer_head *udf_fiiter_bread_blk(struct udf_fileident_iter *iter) */ static int udf_fiiter_advance_blk(struct udf_fileident_iter *iter) { + int8_t etype = -1; + int err = 0; + iter->loffset++; if (iter->loffset < DIV_ROUND_UP(iter->elen, 1<<iter->dir->i_blkbits)) return 0; iter->loffset = 0; - if (udf_next_aext(iter->dir, &iter->epos, &iter->eloc, &iter->elen, 1) - != (EXT_RECORDED_ALLOCATED >> 30)) { + err = udf_next_aext(iter->dir, &iter->epos, &iter->eloc, + &iter->elen, &etype, 1); + if (err < 0) + return err; + else if (err == 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) { if (iter->pos == iter->dir->i_size) { iter->elen = 0; return 0; @@ -240,6 +246,7 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir, { struct udf_inode_info *iinfo = UDF_I(dir); int err = 0; + int8_t etype; iter->dir = dir; iter->bh[0] = iter->bh[1] = NULL; @@ -259,9 +266,9 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir, goto out; } - if (inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos, - &iter->eloc, &iter->elen, &iter->loffset) != - (EXT_RECORDED_ALLOCATED >> 30)) { + err = inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos, + &iter->eloc, &iter->elen, &iter->loffset, &etype); + if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) { if (pos == dir->i_size) return 0; udf_err(dir->i_sb, @@ -457,6 +464,7 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter) sector_t block; uint32_t old_elen = iter->elen; int err; + int8_t etype; if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)) return -EINVAL; @@ -471,8 +479,9 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter) udf_fiiter_update_elen(iter, old_elen); return err; } - if (inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen, - &iter->loffset) != (EXT_RECORDED_ALLOCATED >> 30)) { + err = inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen, + &iter->loffset, &etype); + if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) { udf_err(iter->dir->i_sb, "block %llu not allocated in directory (ino %lu)\n", (unsigned long long)block, iter->dir->i_ino); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index eaee57b91c6c..70c907fe8af9 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -404,7 +404,7 @@ struct udf_map_rq { static int udf_map_block(struct inode *inode, struct udf_map_rq *map) { - int err; + int ret; struct udf_inode_info *iinfo = UDF_I(inode); if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)) @@ -416,18 +416,24 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map) uint32_t elen; sector_t offset; struct extent_position epos = {}; + int8_t etype; down_read(&iinfo->i_data_sem); - if (inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset) - == (EXT_RECORDED_ALLOCATED >> 30)) { + ret = inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset, + &etype); + if (ret < 0) + goto out_read; + if (ret > 0 && etype == (EXT_RECORDED_ALLOCATED >> 30)) { map->pblk = udf_get_lb_pblock(inode->i_sb, &eloc, offset); map->oflags |= UDF_BLK_MAPPED; + ret = 0; } +out_read: up_read(&iinfo->i_data_sem); brelse(epos.bh); - return 0; + return ret; } down_write(&iinfo->i_data_sem); @@ -438,9 +444,9 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map) if (((loff_t)map->lblk) << inode->i_blkbits >= iinfo->i_lenExtents) udf_discard_prealloc(inode); udf_clear_extent_cache(inode); - err = inode_getblk(inode, map); + ret = inode_getblk(inode, map); up_write(&iinfo->i_data_sem); - return err; + return ret; } static int __udf_get_block(struct inode *inode, sector_t block, @@ -543,6 +549,7 @@ static int udf_do_extend_file(struct inode *inode, } else { struct kernel_lb_addr tmploc; uint32_t tmplen; + int8_t tmptype; udf_write_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); @@ -552,8 +559,12 @@ static int udf_do_extend_file(struct inode *inode, * more extents, we may need to enter possible following * empty indirect extent. */ - if (new_block_bytes) - udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0); + if (new_block_bytes) { + err = udf_next_aext(inode, last_pos, &tmploc, &tmplen, + &tmptype, 0); + if (err < 0) + goto out_err; + } } iinfo->i_lenExtents += add; @@ -657,8 +668,10 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) */ udf_discard_prealloc(inode); - etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); - within_last_ext = (etype != -1); + err = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset, &etype); + if (err < 0) + goto out; + within_last_ext = (err == 1); /* We don't expect extents past EOF... */ WARN_ON_ONCE(within_last_ext && elen > ((loff_t)offset + 1) << inode->i_blkbits); @@ -672,8 +685,10 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; } else { epos.offset -= adsize; - etype = udf_next_aext(inode, &epos, &extent.extLocation, - &extent.extLength, 0); + err = udf_next_aext(inode, &epos, &extent.extLocation, + &extent.extLength, &etype, 0); + if (err <= 0) + goto out; extent.extLength |= etype << 30; } @@ -710,11 +725,11 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) loff_t lbcount = 0, b_off = 0; udf_pblk_t newblocknum; sector_t offset = 0; - int8_t etype; + int8_t etype, tmpetype; struct udf_inode_info *iinfo = UDF_I(inode); udf_pblk_t goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; - bool isBeyondEOF; + bool isBeyondEOF = false; int ret = 0; prev_epos.offset = udf_file_entry_alloc_offset(inode); @@ -746,9 +761,13 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) prev_epos.offset = cur_epos.offset; cur_epos.offset = next_epos.offset; - etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1); - if (etype == -1) + ret = udf_next_aext(inode, &next_epos, &eloc, &elen, &etype, 1); + if (ret < 0) { + goto out_free; + } else if (ret == 0) { + isBeyondEOF = true; break; + } c = !c; @@ -769,13 +788,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) * Move prev_epos and cur_epos into indirect extent if we are at * the pointer to it */ - udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0); - udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0); + ret = udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, &tmpetype, 0); + if (ret < 0) + goto out_free; + ret = udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, &tmpetype, 0); + if (ret < 0) + goto out_free; /* if the extent is allocated and recorded, return the block if the extent is not a multiple of the blocksize, round up */ - if (etype == (EXT_RECORDED_ALLOCATED >> 30)) { + if (!isBeyondEOF && etype == (EXT_RECORDED_ALLOCATED >> 30)) { if (elen & (inode->i_sb->s_blocksize - 1)) { elen = EXT_RECORDED_ALLOCATED | ((elen + inode->i_sb->s_blocksize - 1) & @@ -791,10 +814,9 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) } /* Are we beyond EOF and preallocated extent? */ - if (etype == -1) { + if (isBeyondEOF) { loff_t hole_len; - isBeyondEOF = true; if (count) { if (c) laarr[0] = laarr[1]; @@ -830,7 +852,6 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) endnum = c + 1; lastblock = 1; } else { - isBeyondEOF = false; endnum = startnum = ((count > 2) ? 2 : count); /* if the current extent is in position 0, @@ -844,15 +865,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) /* if the current block is located in an extent, read the next extent */ - etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0); - if (etype != -1) { + ret = udf_next_aext(inode, &next_epos, &eloc, &elen, &etype, 0); + if (ret > 0) { laarr[c + 1].extLength = (etype << 30) | elen; laarr[c + 1].extLocation = eloc; count++; startnum++; endnum++; - } else + } else if (ret == 0) lastblock = 1; + else + goto out_free; } /* if the current extent is not recorded but allocated, get the @@ -1170,6 +1193,7 @@ static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr, int start = 0, i; struct kernel_lb_addr tmploc; uint32_t tmplen; + int8_t tmpetype; int err; if (startnum > endnum) { @@ -1187,14 +1211,19 @@ static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr, */ if (err < 0) return err; - udf_next_aext(inode, epos, &laarr[i].extLocation, - &laarr[i].extLength, 1); + err = udf_next_aext(inode, epos, &laarr[i].extLocation, + &laarr[i].extLength, &tmpetype, 1); + if (err < 0) + return err; start++; } } for (i = start; i < endnum; i++) { - udf_next_aext(inode, epos, &tmploc, &tmplen, 0); + err = udf_next_aext(inode, epos, &tmploc, &tmplen, &tmpetype, 0); + if (err < 0) + return err; + udf_write_aext(inode, epos, &laarr[i].extLocation, laarr[i].extLength, 1); } @@ -1953,6 +1982,7 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, struct extent_position nepos; struct kernel_lb_addr neloc; int ver, adsize; + int err = 0; if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -1997,10 +2027,12 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, if (epos->offset + adsize > sb->s_blocksize) { struct kernel_lb_addr cp_loc; uint32_t cp_len; - int cp_type; + int8_t cp_type; epos->offset -= adsize; - cp_type = udf_current_aext(inode, epos, &cp_loc, &cp_len, 0); + err = udf_current_aext(inode, epos, &cp_loc, &cp_len, &cp_type, 0); + if (err <= 0) + goto err_out; cp_len |= ((uint32_t)cp_type) << 30; __udf_add_aext(inode, &nepos, &cp_loc, cp_len, 1); @@ -2015,6 +2047,9 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, *epos = nepos; return 0; +err_out: + brelse(bh); + return err; } /* @@ -2160,21 +2195,30 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, */ #define UDF_MAX_INDIR_EXTS 16 -int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, - struct kernel_lb_addr *eloc, uint32_t *elen, int inc) +/* + * Returns 1 on success, -errno on error, 0 on hit EOF. + */ +int udf_next_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype, + int inc) { - int8_t etype; unsigned int indirections = 0; + int ret = 0; + udf_pblk_t block; - while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == - (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) { - udf_pblk_t block; + while (1) { + ret = udf_current_aext(inode, epos, eloc, elen, + etype, inc); + if (ret <= 0) + return ret; + if (*etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) + return ret; if (++indirections > UDF_MAX_INDIR_EXTS) { udf_err(inode->i_sb, "too many indirect extents in inode %lu\n", inode->i_ino); - return -1; + return -EFSCORRUPTED; } epos->block = *eloc; @@ -2184,18 +2228,19 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, epos->bh = sb_bread(inode->i_sb, block); if (!epos->bh) { udf_debug("reading block %u failed!\n", block); - return -1; + return -EIO; } } - - return etype; } -int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, - struct kernel_lb_addr *eloc, uint32_t *elen, int inc) +/* + * Returns 1 on success, -errno on error, 0 on hit EOF. + */ +int udf_current_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype, + int inc) { int alen; - int8_t etype; uint8_t *ptr; struct short_ad *sad; struct long_ad *lad; @@ -2210,20 +2255,23 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, alen = udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc; } else { + struct allocExtDesc *header = + (struct allocExtDesc *)epos->bh->b_data; + if (!epos->offset) epos->offset = sizeof(struct allocExtDesc); ptr = epos->bh->b_data + epos->offset; - alen = sizeof(struct allocExtDesc) + - le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)-> - lengthAllocDescs); + if (check_add_overflow(sizeof(struct allocExtDesc), + le32_to_cpu(header->lengthAllocDescs), &alen)) + return -1; } switch (iinfo->i_alloc_type) { case ICBTAG_FLAG_AD_SHORT: sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc); if (!sad) - return -1; - etype = le32_to_cpu(sad->extLength) >> 30; + return 0; + *etype = le32_to_cpu(sad->extLength) >> 30; eloc->logicalBlockNum = le32_to_cpu(sad->extPosition); eloc->partitionReferenceNum = iinfo->i_location.partitionReferenceNum; @@ -2232,17 +2280,17 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, case ICBTAG_FLAG_AD_LONG: lad = udf_get_filelongad(ptr, alen, &epos->offset, inc); if (!lad) - return -1; - etype = le32_to_cpu(lad->extLength) >> 30; + return 0; + *etype = le32_to_cpu(lad->extLength) >> 30; *eloc = lelb_to_cpu(lad->extLocation); *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; break; default: udf_debug("alloc_type = %u unsupported\n", iinfo->i_alloc_type); - return -1; + return -EINVAL; } - return etype; + return 1; } static int udf_insert_aext(struct inode *inode, struct extent_position epos, @@ -2251,20 +2299,24 @@ static int udf_insert_aext(struct inode *inode, struct extent_position epos, struct kernel_lb_addr oeloc; uint32_t oelen; int8_t etype; - int err; + int ret; if (epos.bh) get_bh(epos.bh); - while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &oeloc, &oelen, &etype, 0); + if (ret <= 0) + break; udf_write_aext(inode, &epos, &neloc, nelen, 1); neloc = oeloc; nelen = (etype << 30) | oelen; } - err = udf_add_aext(inode, &epos, &neloc, nelen, 1); + if (ret == 0) + ret = udf_add_aext(inode, &epos, &neloc, nelen, 1); brelse(epos.bh); - return err; + return ret; } int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) @@ -2276,6 +2328,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) struct udf_inode_info *iinfo; struct kernel_lb_addr eloc; uint32_t elen; + int ret; if (epos.bh) { get_bh(epos.bh); @@ -2291,10 +2344,18 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) adsize = 0; oepos = epos; - if (udf_next_aext(inode, &epos, &eloc, &elen, 1) == -1) + if (udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1) <= 0) return -1; - while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) { + brelse(epos.bh); + brelse(oepos.bh); + return -1; + } + if (ret == 0) + break; udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1); if (oepos.bh != epos.bh) { oepos.block = epos.block; @@ -2351,14 +2412,17 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) return (elen >> 30); } -int8_t inode_bmap(struct inode *inode, sector_t block, - struct extent_position *pos, struct kernel_lb_addr *eloc, - uint32_t *elen, sector_t *offset) +/* + * Returns 1 on success, -errno on error, 0 on hit EOF. + */ +int inode_bmap(struct inode *inode, sector_t block, struct extent_position *pos, + struct kernel_lb_addr *eloc, uint32_t *elen, sector_t *offset, + int8_t *etype) { unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; loff_t lbcount = 0, bcount = (loff_t) block << blocksize_bits; - int8_t etype; struct udf_inode_info *iinfo; + int err = 0; iinfo = UDF_I(inode); if (!udf_read_extent_cache(inode, bcount, &lbcount, pos)) { @@ -2368,11 +2432,13 @@ int8_t inode_bmap(struct inode *inode, sector_t block, } *elen = 0; do { - etype = udf_next_aext(inode, pos, eloc, elen, 1); - if (etype == -1) { - *offset = (bcount - lbcount) >> blocksize_bits; - iinfo->i_lenExtents = lbcount; - return -1; + err = udf_next_aext(inode, pos, eloc, elen, etype, 1); + if (err <= 0) { + if (err == 0) { + *offset = (bcount - lbcount) >> blocksize_bits; + iinfo->i_lenExtents = lbcount; + } + return err; } lbcount += *elen; } while (lbcount <= bcount); @@ -2380,5 +2446,5 @@ int8_t inode_bmap(struct inode *inode, sector_t block, udf_update_extent_cache(inode, lbcount - *elen, pos); *offset = (bcount + *elen - lbcount) >> blocksize_bits; - return etype; + return 1; } diff --git a/fs/udf/partition.c b/fs/udf/partition.c index af877991edc1..2b85c9501bed 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -282,9 +282,11 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, sector_t ext_offset; struct extent_position epos = {}; uint32_t phyblock; + int8_t etype; + int err = 0; - if (inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset) != - (EXT_RECORDED_ALLOCATED >> 30)) + err = inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset, &etype); + if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) phyblock = 0xFFFFFFFF; else { map = &UDF_SB(sb)->s_partmaps[partition]; diff --git a/fs/udf/super.c b/fs/udf/super.c index 3460ecc826d1..1c8a736b3309 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2482,13 +2482,14 @@ static unsigned int udf_count_free_table(struct super_block *sb, uint32_t elen; struct kernel_lb_addr eloc; struct extent_position epos; + int8_t etype; mutex_lock(&UDF_SB(sb)->s_alloc_mutex); epos.block = UDF_I(table)->i_location; epos.offset = sizeof(struct unallocSpaceEntry); epos.bh = NULL; - while (udf_next_aext(table, &epos, &eloc, &elen, 1) != -1) + while (udf_next_aext(table, &epos, &eloc, &elen, &etype, 1) > 0) accum += (elen >> table->i_sb->s_blocksize_bits); brelse(epos.bh); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index a686c10fd709..4f33a4a48886 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -69,6 +69,7 @@ void udf_truncate_tail_extent(struct inode *inode) int8_t etype = -1, netype; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); + int ret; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || inode->i_size == iinfo->i_lenExtents) @@ -85,7 +86,10 @@ void udf_truncate_tail_extent(struct inode *inode) BUG(); /* Find the last extent in the file */ - while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &eloc, &elen, &netype, 1); + if (ret <= 0) + break; etype = netype; lbcount += elen; if (lbcount > inode->i_size) { @@ -101,7 +105,8 @@ void udf_truncate_tail_extent(struct inode *inode) epos.offset -= adsize; extent_trunc(inode, &epos, &eloc, etype, elen, nelen); epos.offset += adsize; - if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) + if (udf_next_aext(inode, &epos, &eloc, &elen, + &netype, 1) > 0) udf_err(inode->i_sb, "Extent after EOF in inode %u\n", (unsigned)inode->i_ino); @@ -110,7 +115,8 @@ void udf_truncate_tail_extent(struct inode *inode) } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ - iinfo->i_lenExtents = inode->i_size; + if (ret == 0) + iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); } @@ -124,6 +130,8 @@ void udf_discard_prealloc(struct inode *inode) int8_t etype = -1; struct udf_inode_info *iinfo = UDF_I(inode); int bsize = i_blocksize(inode); + int8_t tmpetype = -1; + int ret; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize)) @@ -132,15 +140,23 @@ void udf_discard_prealloc(struct inode *inode) epos.block = iinfo->i_location; /* Find the last extent in the file */ - while (udf_next_aext(inode, &epos, &eloc, &elen, 0) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &eloc, &elen, &tmpetype, 0); + if (ret < 0) + goto out; + if (ret == 0) + break; brelse(prev_epos.bh); prev_epos = epos; if (prev_epos.bh) get_bh(prev_epos.bh); - etype = udf_next_aext(inode, &epos, &eloc, &elen, 1); + ret = udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) + goto out; lbcount += elen; } + if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { lbcount -= elen; udf_delete_aext(inode, prev_epos); @@ -150,6 +166,7 @@ void udf_discard_prealloc(struct inode *inode) /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ iinfo->i_lenExtents = lbcount; +out: brelse(epos.bh); brelse(prev_epos.bh); } @@ -188,6 +205,7 @@ int udf_truncate_extents(struct inode *inode) loff_t byte_offset; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); + int ret = 0; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -196,10 +214,12 @@ int udf_truncate_extents(struct inode *inode) else BUG(); - etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); + ret = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset, &etype); + if (ret < 0) + return ret; byte_offset = (offset << sb->s_blocksize_bits) + (inode->i_size & (sb->s_blocksize - 1)); - if (etype == -1) { + if (ret == 0) { /* We should extend the file? */ WARN_ON(byte_offset); return 0; @@ -217,8 +237,8 @@ int udf_truncate_extents(struct inode *inode) else lenalloc -= sizeof(struct allocExtDesc); - while ((etype = udf_current_aext(inode, &epos, &eloc, - &elen, 0)) != -1) { + while ((ret = udf_current_aext(inode, &epos, &eloc, + &elen, &etype, 0)) > 0) { if (etype == (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) { udf_write_aext(inode, &epos, &neloc, nelen, 0); if (indirect_ext_len) { @@ -253,6 +273,11 @@ int udf_truncate_extents(struct inode *inode) } } + if (ret < 0) { + brelse(epos.bh); + return ret; + } + if (indirect_ext_len) { BUG_ON(!epos.bh); udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len); diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 88692512a466..d159f20d61e8 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -157,8 +157,9 @@ extern struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block, extern int udf_setsize(struct inode *, loff_t); extern void udf_evict_inode(struct inode *); extern int udf_write_inode(struct inode *, struct writeback_control *wbc); -extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, - struct kernel_lb_addr *, uint32_t *, sector_t *); +extern int inode_bmap(struct inode *inode, sector_t block, + struct extent_position *pos, struct kernel_lb_addr *eloc, + uint32_t *elen, sector_t *offset, int8_t *etype); int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, struct extent_position *epos); @@ -169,10 +170,12 @@ extern int udf_add_aext(struct inode *, struct extent_position *, extern void udf_write_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t, int); extern int8_t udf_delete_aext(struct inode *, struct extent_position); -extern int8_t udf_next_aext(struct inode *, struct extent_position *, - struct kernel_lb_addr *, uint32_t *, int); -extern int8_t udf_current_aext(struct inode *, struct extent_position *, - struct kernel_lb_addr *, uint32_t *, int); +extern int udf_next_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, + int8_t *etype, int inc); +extern int udf_current_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, + int8_t *etype, int inc); extern void udf_update_extra_perms(struct inode *inode, umode_t mode); /* misc.c */ diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 24bd12186647..c8390976ab6a 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -307,7 +307,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (old_dir != new_dir) ufs_set_link(old_inode, dir_de, dir_folio, new_dir, 0); else - folio_release_kmap(dir_folio, new_dir); + folio_release_kmap(dir_folio, dir_de); inode_dec_link_count(old_dir); } return 0; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 54a098fe7285..9a2221b4aa21 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -69,7 +69,7 @@ typedef __u32 xfs_nlink_t; #include <asm/param.h> #include <linux/uaccess.h> #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include "xfs_fs.h" #include "xfs_stats.h" diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c index 8ccb65c2b419..ff9a688f1f9c 100644 --- a/fs/zonefs/sysfs.c +++ b/fs/zonefs/sysfs.c @@ -92,6 +92,7 @@ int zonefs_sysfs_register(struct super_block *sb) struct zonefs_sb_info *sbi = ZONEFS_SB(sb); int ret; + super_set_sysfs_name_id(sb); init_completion(&sbi->s_kobj_unregister); ret = kobject_init_and_add(&sbi->s_kobj, &zonefs_sb_ktype, zonefs_sysfs_root, "%s", sb->s_id); |