diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-07-27 15:21:46 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-07-27 15:22:46 -0700 |
commit | 014acf26685cfcfae37c9e98b83c622c0b01cf19 (patch) | |
tree | 85ce0217698aef25875fdc62f2332bbbbc1bff2c /fs | |
parent | 9d0cd5d25f7d45bce01bbb3193b54ac24b3a60f3 (diff) | |
parent | 57012c57536f8814dec92e74197ee96c3498d24e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
No conflicts or adjacent changes.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/block-group.c | 51 | ||||
-rw-r--r-- | fs/btrfs/block-group.h | 4 | ||||
-rw-r--r-- | fs/btrfs/block-rsv.c | 5 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/free-space-tree.c | 24 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 10 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 3 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 172 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 14 | ||||
-rw-r--r-- | fs/file.c | 6 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 277 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 3 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 40 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 2 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 2 | ||||
-rw-r--r-- | fs/smb/client/cifsfs.h | 4 | ||||
-rw-r--r-- | fs/smb/client/ioctl.c | 17 | ||||
-rw-r--r-- | fs/smb/server/ksmbd_netlink.h | 3 | ||||
-rw-r--r-- | fs/smb/server/server.c | 7 | ||||
-rw-r--r-- | fs/smb/server/smb2pdu.c | 47 | ||||
-rw-r--r-- | fs/smb/server/smb_common.c | 19 | ||||
-rw-r--r-- | fs/smb/server/smb_common.h | 2 | ||||
-rw-r--r-- | fs/smb/server/vfs.c | 65 | ||||
-rw-r--r-- | fs/smb/server/vfs.h | 4 | ||||
-rw-r--r-- | fs/splice.c | 2 |
25 files changed, 440 insertions, 350 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 23726152d62d..030ab44fce18 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -499,12 +499,16 @@ static void fragment_free_space(struct btrfs_block_group *block_group) * used yet since their free space will be released as soon as the transaction * commits. */ -u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end) +int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, + u64 *total_added_ret) { struct btrfs_fs_info *info = block_group->fs_info; - u64 extent_start, extent_end, size, total_added = 0; + u64 extent_start, extent_end, size; int ret; + if (total_added_ret) + *total_added_ret = 0; + while (start < end) { ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, @@ -517,10 +521,12 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; start = extent_end + 1; } else { break; @@ -529,13 +535,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end if (start < end) { size = end - start; - total_added += size; ret = btrfs_add_free_space_async_trimmed(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + if (ret) + return ret; + if (total_added_ret) + *total_added_ret += size; } - return total_added; + return 0; } /* @@ -779,8 +787,13 @@ next: if (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY) { - total_found += add_new_free_space(block_group, last, - key.objectid); + u64 space_added; + + ret = add_new_free_space(block_group, last, key.objectid, + &space_added); + if (ret) + goto out; + total_found += space_added; if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + fs_info->nodesize; @@ -795,11 +808,10 @@ next: } path->slots[0]++; } - ret = 0; - - total_found += add_new_free_space(block_group, last, - block_group->start + block_group->length); + ret = add_new_free_space(block_group, last, + block_group->start + block_group->length, + NULL); out: btrfs_free_path(path); return ret; @@ -2294,9 +2306,11 @@ static int read_one_block_group(struct btrfs_fs_info *info, btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->cached = BTRFS_CACHE_FINISHED; - add_new_free_space(cache, cache->start, - cache->start + cache->length); + ret = add_new_free_space(cache, cache->start, + cache->start + cache->length, NULL); btrfs_free_excluded_extents(cache); + if (ret) + goto error; } ret = btrfs_add_block_group_cache(info, cache); @@ -2740,9 +2754,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - add_new_free_space(cache, chunk_offset, chunk_offset + size); - + ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); btrfs_free_excluded_extents(cache); + if (ret) { + btrfs_put_block_group(cache); + return ERR_PTR(ret); + } /* * Ensure the corresponding space_info object is created and diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 381c54a56417..aba5dff66c19 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -289,8 +289,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); -u64 add_new_free_space(struct btrfs_block_group *block_group, - u64 start, u64 end); +int add_new_free_space(struct btrfs_block_group *block_group, + u64 start, u64 end, u64 *total_added_ret); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 6279d200cf83..77684c5e0c8b 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -349,6 +349,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info) } read_unlock(&fs_info->global_root_lock); + if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) { + num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item); + min_items++; + } + /* * But we also want to reserve enough space so we can do the fallback * global reserve for an unlink, which is an additional diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7513388b0567..9b9914e5f03d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3438,11 +3438,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * For devices supporting discard turn on discard=async automatically, * unless it's already set or disabled. This could be turned off by * nodiscard for the same mount. + * + * The zoned mode piggy backs on the discard functionality for + * resetting a zone. There is no reason to delay the zone reset as it is + * fast enough. So, do not enable async discard for zoned mode. */ if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) || btrfs_test_opt(fs_info, DISCARD_ASYNC) || btrfs_test_opt(fs_info, NODISCARD)) && - fs_info->fs_devices->discardable) { + fs_info->fs_devices->discardable && + !btrfs_is_zoned(fs_info)) { btrfs_set_and_info(fs_info, DISCARD_ASYNC, "auto enabling async discard"); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 045ddce32eca..f169378e2ca6 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1515,9 +1515,13 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { - total_found += add_new_free_space(block_group, - extent_start, - offset); + u64 space_added; + + ret = add_new_free_space(block_group, extent_start, + offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); @@ -1529,8 +1533,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, } } if (prev_bit == 1) { - total_found += add_new_free_space(block_group, extent_start, - end); + ret = add_new_free_space(block_group, extent_start, end, NULL); + if (ret) + goto out; extent_count++; } @@ -1569,6 +1574,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, end = block_group->start + block_group->length; while (1) { + u64 space_added; + ret = btrfs_next_item(root, path); if (ret < 0) goto out; @@ -1583,8 +1590,11 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end); - total_found += add_new_free_space(block_group, key.objectid, - key.objectid + key.offset); + ret = add_new_free_space(block_group, key.objectid, + key.objectid + key.offset, &space_added); + if (ret) + goto out; + total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cf306351b148..91b6c2fdc420 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -826,8 +826,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) trans = start_transaction(root, 0, TRANS_ATTACH, BTRFS_RESERVE_NO_FLUSH, true); - if (trans == ERR_PTR(-ENOENT)) - btrfs_wait_for_commit(root->fs_info, 0); + if (trans == ERR_PTR(-ENOENT)) { + int ret; + + ret = btrfs_wait_for_commit(root->fs_info, 0); + if (ret) + return ERR_PTR(ret); + } return trans; } @@ -931,6 +936,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) } wait_for_commit(cur_trans, TRANS_STATE_COMPLETED); + ret = cur_trans->aborted; btrfs_put_transaction(cur_trans); out: return ret; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 85b8b332add9..72b90bc19a19 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -805,6 +805,9 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) return -EINVAL; } + btrfs_clear_and_info(info, DISCARD_ASYNC, + "zoned: async discard ignored and disabled for zoned mode"); + return 0; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a2475b8c9fb5..21b903fe546e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1006,14 +1006,11 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * fls() instead since we need to know the actual length while modifying * goal length. */ - order = fls(ac->ac_g_ex.fe_len); + order = fls(ac->ac_g_ex.fe_len) - 1; min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0; - if (1 << min_order < ac->ac_o_ex.fe_len) - min_order = fls(ac->ac_o_ex.fe_len) + 1; - if (sbi->s_stripe > 0) { /* * We are assuming that stripe size is always a multiple of @@ -1021,9 +1018,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context */ num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe); if (1 << min_order < num_stripe_clusters) - min_order = fls(num_stripe_clusters); + /* + * We consider 1 order less because later we round + * up the goal len to num_stripe_clusters + */ + min_order = fls(num_stripe_clusters) - 1; } + if (1 << min_order < ac->ac_o_ex.fe_len) + min_order = fls(ac->ac_o_ex.fe_len); + for (i = order; i >= min_order; i--) { int frag_order; /* @@ -4761,8 +4765,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) int order, i; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; - struct ext4_prealloc_space *tmp_pa, *cpa = NULL; - ext4_lblk_t tmp_pa_start, tmp_pa_end; + struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL; + loff_t tmp_pa_end; struct rb_node *iter; ext4_fsblk_t goal_block; @@ -4770,47 +4774,151 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) return false; - /* first, try per-file preallocation */ + /* + * first, try per-file preallocation by searching the inode pa rbtree. + * + * Here, we can't do a direct traversal of the tree because + * ext4_mb_discard_group_preallocation() can paralelly mark the pa + * deleted and that can cause direct traversal to skip some entries. + */ read_lock(&ei->i_prealloc_lock); + + if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) { + goto try_group_pa; + } + + /* + * Step 1: Find a pa with logical start immediately adjacent to the + * original logical start. This could be on the left or right. + * + * (tmp_pa->pa_lstart never changes so we can skip locking for it). + */ for (iter = ei->i_prealloc_node.rb_node; iter; iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical, - tmp_pa_start, iter)) { + tmp_pa->pa_lstart, iter)) { tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); + } - /* all fields in this condition don't change, - * so we can skip locking for them */ - tmp_pa_start = tmp_pa->pa_lstart; - tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); - - /* original request start doesn't lie in this PA */ - if (ac->ac_o_ex.fe_logical < tmp_pa_start || - ac->ac_o_ex.fe_logical >= tmp_pa_end) - continue; + /* + * Step 2: The adjacent pa might be to the right of logical start, find + * the left adjacent pa. After this step we'd have a valid tmp_pa whose + * logical start is towards the left of original request's logical start + */ + if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) { + struct rb_node *tmp; + tmp = rb_prev(&tmp_pa->pa_node.inode_node); - /* non-extent files can't have physical blocks past 2^32 */ - if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && - (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) > - EXT4_MAX_BLOCK_FILE_PHYS)) { + if (tmp) { + tmp_pa = rb_entry(tmp, struct ext4_prealloc_space, + pa_node.inode_node); + } else { /* - * Since PAs don't overlap, we won't find any - * other PA to satisfy this. + * If there is no adjacent pa to the left then finding + * an overlapping pa is not possible hence stop searching + * inode pa tree */ - break; + goto try_group_pa; } + } + + BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical)); - /* found preallocated blocks, use them */ + /* + * Step 3: If the left adjacent pa is deleted, keep moving left to find + * the first non deleted adjacent pa. After this step we should have a + * valid tmp_pa which is guaranteed to be non deleted. + */ + for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) { + if (!iter) { + /* + * no non deleted left adjacent pa, so stop searching + * inode pa tree + */ + goto try_group_pa; + } + tmp_pa = rb_entry(iter, struct ext4_prealloc_space, + pa_node.inode_node); spin_lock(&tmp_pa->pa_lock); - if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free && - likely(ext4_mb_pa_goal_check(ac, tmp_pa))) { - atomic_inc(&tmp_pa->pa_count); - ext4_mb_use_inode_pa(ac, tmp_pa); + if (tmp_pa->pa_deleted == 0) { + /* + * We will keep holding the pa_lock from + * this point on because we don't want group discard + * to delete this pa underneath us. Since group + * discard is anyways an ENOSPC operation it + * should be okay for it to wait a few more cycles. + */ + break; + } else { spin_unlock(&tmp_pa->pa_lock); - read_unlock(&ei->i_prealloc_lock); - return true; } + } + + BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical)); + BUG_ON(tmp_pa->pa_deleted == 1); + + /* + * Step 4: We now have the non deleted left adjacent pa. Only this + * pa can possibly satisfy the request hence check if it overlaps + * original logical start and stop searching if it doesn't. + */ + tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); + + if (ac->ac_o_ex.fe_logical >= tmp_pa_end) { spin_unlock(&tmp_pa->pa_lock); + goto try_group_pa; + } + + /* non-extent files can't have physical blocks past 2^32 */ + if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && + (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) > + EXT4_MAX_BLOCK_FILE_PHYS)) { + /* + * Since PAs don't overlap, we won't find any other PA to + * satisfy this. + */ + spin_unlock(&tmp_pa->pa_lock); + goto try_group_pa; + } + + if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) { + atomic_inc(&tmp_pa->pa_count); + ext4_mb_use_inode_pa(ac, tmp_pa); + spin_unlock(&tmp_pa->pa_lock); + read_unlock(&ei->i_prealloc_lock); + return true; + } else { + /* + * We found a valid overlapping pa but couldn't use it because + * it had no free blocks. This should ideally never happen + * because: + * + * 1. When a new inode pa is added to rbtree it must have + * pa_free > 0 since otherwise we won't actually need + * preallocation. + * + * 2. An inode pa that is in the rbtree can only have it's + * pa_free become zero when another thread calls: + * ext4_mb_new_blocks + * ext4_mb_use_preallocated + * ext4_mb_use_inode_pa + * + * 3. Further, after the above calls make pa_free == 0, we will + * immediately remove it from the rbtree in: + * ext4_mb_new_blocks + * ext4_mb_release_context + * ext4_mb_put_pa + * + * 4. Since the pa_free becoming 0 and pa_free getting removed + * from tree both happen in ext4_mb_new_blocks, which is always + * called with i_data_sem held for data allocations, we can be + * sure that another process will never see a pa in rbtree with + * pa_free == 0. + */ + WARN_ON_ONCE(tmp_pa->pa_free == 0); } + spin_unlock(&tmp_pa->pa_lock); +try_group_pa: read_unlock(&ei->i_prealloc_lock); /* can we use group allocation? */ diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 321e3a888c20..05151d61b00b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, memmove(here, (void *)here + size, (void *)last - (void *)here + sizeof(__u32)); memset(last, 0, size); + + /* + * Update i_inline_off - moved ibody region might contain + * system.data attribute. Handling a failure here won't + * cause other complications for setting an xattr. + */ + if (!is_block && ext4_has_inline_data(inode)) { + ret = ext4_find_inline_data_nolock(inode); + if (ret) { + ext4_warning_inode(inode, + "unable to update i_inline_off"); + goto out; + } + } } else if (s->not_found) { /* Insert new name. */ size_t size = EXT4_XATTR_LEN(name_len); diff --git a/fs/file.c b/fs/file.c index 7893ea161d77..35c62b54c9d6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1042,10 +1042,8 @@ unsigned long __fdget_pos(unsigned int fd) struct file *file = (struct file *)(v & ~3); if (file && (file->f_mode & FMODE_ATOMIC_POS)) { - if (file_count(file) > 1) { - v |= FDPUT_POS_UNLOCK; - mutex_lock(&file->f_pos_lock); - } + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); } return v; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 51bd38da21cd..9ec91017a7f3 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -27,7 +27,7 @@ * * Called with j_list_lock held. */ -static inline void __buffer_unlink_first(struct journal_head *jh) +static inline void __buffer_unlink(struct journal_head *jh) { transaction_t *transaction = jh->b_cp_transaction; @@ -41,45 +41,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh) } /* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - -/* - * Move a buffer from the checkpoint list to the checkpoint io list - * - * Called with j_list_lock held - */ -static inline void __buffer_relink_io(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - - if (!transaction->t_checkpoint_io_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_io_list; - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_io_list = jh; -} - -/* * Check a checkpoint buffer could be release or not. * * Requires j_list_lock @@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count) struct buffer_head *bh = journal->j_chkpt_bhs[i]; BUFFER_TRACE(bh, "brelse"); __brelse(bh); + journal->j_chkpt_bhs[i] = NULL; } *batch_count = 0; } @@ -242,15 +204,6 @@ restart: jh = transaction->t_checkpoint_list; bh = jh2bh(jh); - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - goto retry; - } if (jh->b_transaction != NULL) { transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; @@ -285,30 +238,50 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } - if (!buffer_dirty(bh)) { + if (!trylock_buffer(bh)) { + /* + * The buffer is locked, it may be writing back, or + * flushing out in the last couple of cycles, or + * re-adding into a new transaction, need to check + * it again until it's unlocked. + */ + get_bh(bh); + spin_unlock(&journal->j_list_lock); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + goto retry; + } else if (!buffer_dirty(bh)) { + unlock_buffer(bh); BUFFER_TRACE(bh, "remove from checkpoint"); - if (__jbd2_journal_remove_checkpoint(jh)) - /* The transaction was released; we're done */ + /* + * If the transaction was released or the checkpoint + * list was empty, we're done. + */ + if (__jbd2_journal_remove_checkpoint(jh) || + !transaction->t_checkpoint_list) goto out; - continue; + } else { + unlock_buffer(bh); + /* + * We are about to write the buffer, it could be + * raced by some other transaction shrink or buffer + * re-log logic once we release the j_list_lock, + * leave it on the checkpoint list and check status + * again to make sure it's clean. + */ + BUFFER_TRACE(bh, "queue"); + get_bh(bh); + J_ASSERT_BH(bh, !buffer_jwrite(bh)); + journal->j_chkpt_bhs[batch_count++] = bh; + transaction->t_chp_stats.cs_written++; + transaction->t_checkpoint_list = jh->b_cpnext; } - /* - * Important: we are about to write the buffer, and - * possibly block, while still holding the journal - * lock. We cannot afford to let the transaction - * logic start messing around with this buffer before - * we write it to disk, as that would break - * recoverability. - */ - BUFFER_TRACE(bh, "queue"); - get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); - journal->j_chkpt_bhs[batch_count++] = bh; - __buffer_relink_io(jh); - transaction->t_chp_stats.cs_written++; + if ((batch_count == JBD2_NR_BATCH) || - need_resched() || - spin_needbreak(&journal->j_list_lock)) + need_resched() || spin_needbreak(&journal->j_list_lock) || + jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0]) goto unlock_and_flush; } @@ -322,38 +295,6 @@ restart: goto restart; } - /* - * Now we issued all of the transaction's buffers, let's deal - * with the buffers that are out for I/O. - */ -restart2: - /* Did somebody clean up the transaction in the meanwhile? */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - goto out; - - while (transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart2; - } - - /* - * Now in whatever state the buffer currently is, we - * know that it has been written out and so we can - * drop it from the list - */ - if (__jbd2_journal_remove_checkpoint(jh)) - break; - } out: spin_unlock(&journal->j_list_lock); result = jbd2_cleanup_journal_tail(journal); @@ -409,49 +350,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ /* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and - * release them. If 'destroy' is set, clean all buffers unconditionally. - * - * Called with j_list_lock held. - * Returns 1 if we freed the transaction, 0 otherwise. - */ -static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - - if (!destroy && __cp_buffer_busy(jh)) - return 0; - - if (__jbd2_journal_remove_checkpoint(jh)) - return 1; - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - return 0; - } while (jh != last_jh); - - return 0; -} - -/* * journal_shrink_one_cp_list * - * Find 'nr_to_scan' written-back checkpoint buffers in the given list + * Find all the written-back checkpoint buffers in the given list * and try to release them. If the whole transaction is released, set * the 'released' parameter. Return the number of released checkpointed * buffers. @@ -459,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) * Called with j_list_lock held. */ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, - unsigned long *nr_to_scan, - bool *released) + bool destroy, bool *released) { struct journal_head *last_jh; struct journal_head *next_jh = jh; unsigned long nr_freed = 0; int ret; - if (!jh || *nr_to_scan == 0) + *released = false; + if (!jh) return 0; last_jh = jh->b_cpprev; @@ -475,12 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, jh = next_jh; next_jh = jh->b_cpnext; - (*nr_to_scan)--; - if (__cp_buffer_busy(jh)) - continue; + if (destroy) { + ret = __jbd2_journal_remove_checkpoint(jh); + } else { + ret = jbd2_journal_try_remove_checkpoint(jh); + if (ret < 0) + continue; + } nr_freed++; - ret = __jbd2_journal_remove_checkpoint(jh); if (ret) { *released = true; break; @@ -488,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, if (need_resched()) break; - } while (jh != last_jh && *nr_to_scan); + } while (jh != last_jh); return nr_freed; } @@ -506,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan) { transaction_t *transaction, *last_transaction, *next_transaction; - bool released; + bool __maybe_unused released; tid_t first_tid = 0, last_tid = 0, next_tid = 0; tid_t tid = 0; unsigned long nr_freed = 0; - unsigned long nr_scanned = *nr_to_scan; + unsigned long freed; again: spin_lock(&journal->j_list_lock); @@ -539,19 +443,11 @@ again: transaction = next_transaction; next_transaction = transaction->t_cpnext; tid = transaction->t_tid; - released = false; - - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list, - nr_to_scan, &released); - if (*nr_to_scan == 0) - break; - if (need_resched() || spin_needbreak(&journal->j_list_lock)) - break; - if (released) - continue; - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list, - nr_to_scan, &released); + freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, + false, &released); + nr_freed += freed; + (*nr_to_scan) -= min(*nr_to_scan, freed); if (*nr_to_scan == 0) break; if (need_resched() || spin_needbreak(&journal->j_list_lock)) @@ -572,9 +468,8 @@ again: if (*nr_to_scan && next_tid) goto again; out: - nr_scanned -= *nr_to_scan; trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid, - nr_freed, nr_scanned, next_tid); + nr_freed, next_tid); return nr_freed; } @@ -590,7 +485,7 @@ out: void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret; + bool released; transaction = journal->j_checkpoint_transactions; if (!transaction) @@ -601,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, - destroy); + journal_shrink_one_cp_list(transaction->t_checkpoint_list, + destroy, &released); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if @@ -610,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) */ if (need_resched()) return; - if (ret) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret = journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, destroy); - if (need_resched()) - return; /* * Stop scanning if we couldn't free the transaction. This * avoids pointless scanning of transactions which still * weren't checkpointed. */ - if (!ret) + if (!released) return; } while (transaction != last_transaction); } @@ -705,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) jbd2_journal_put_journal_head(jh); /* Is this transaction empty? */ - if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list) + if (transaction->t_checkpoint_list) return 0; /* @@ -737,6 +621,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) } /* + * Check the checkpoint buffer and try to remove it from the checkpoint + * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if + * it frees the transaction, 0 otherwise. + * + * This function is called with j_list_lock held. + */ +int jbd2_journal_try_remove_checkpoint(struct journal_head *jh) +{ + struct buffer_head *bh = jh2bh(jh); + + if (!trylock_buffer(bh)) + return -EBUSY; + if (buffer_dirty(bh)) { + unlock_buffer(bh); + return -EBUSY; + } + unlock_buffer(bh); + + /* + * Buffer is clean and the IO has finished (we held the buffer + * lock) so the checkpoint is done. We can safely remove the + * buffer from this transaction. + */ + JBUFFER_TRACE(jh, "remove from checkpoint list"); + return __jbd2_journal_remove_checkpoint(jh); +} + +/* * journal_insert_checkpoint: put a committed buffer onto a checkpoint * list so that we know when it is safe to clean the transaction out of * the log. @@ -797,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_forget == NULL); J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(atomic_read(&transaction->t_updates) == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b33155dd7001..1073259902a6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1141,8 +1141,7 @@ restart_loop: spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; /* Check if the transaction can be dropped now that we are finished */ - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { + if (commit_transaction->t_checkpoint_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); jbd2_journal_free_transaction(commit_transaction); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 18611241f451..4d1fda1f7143 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) * Otherwise, if the buffer has been written to disk, * it is safe to remove the checkpoint and drop it. */ - if (!buffer_dirty(bh)) { - __jbd2_journal_remove_checkpoint(jh); + if (jbd2_journal_try_remove_checkpoint(jh) >= 0) { spin_unlock(&journal->j_list_lock); goto drop; } @@ -2100,35 +2099,6 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) __brelse(bh); } -/* - * Called from jbd2_journal_try_to_free_buffers(). - * - * Called under jh->b_state_lock - */ -static void -__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) -{ - struct journal_head *jh; - - jh = bh2jh(bh); - - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - - if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) - goto out; - - spin_lock(&journal->j_list_lock); - if (jh->b_cp_transaction != NULL) { - /* written-back checkpointed metadata buffer */ - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __jbd2_journal_remove_checkpoint(jh); - } - spin_unlock(&journal->j_list_lock); -out: - return; -} - /** * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation @@ -2186,7 +2156,13 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio) continue; spin_lock(&jh->b_state_lock); - __journal_try_to_free_buffer(journal, bh); + if (!jh->b_transaction && !jh->b_next_transaction) { + spin_lock(&journal->j_list_lock); + /* Remove written-back checkpointed metadata buffer */ + if (jh->b_cp_transaction != NULL) + jbd2_journal_try_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + } spin_unlock(&jh->b_state_lock); jbd2_journal_put_journal_head(jh); if (buffer_jbd(bh)) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6e61fa3acaf1..3aefbad4cc09 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6341,8 +6341,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; - if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) - return status; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5b069f1a1e44..cc8977498c48 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1460,7 +1460,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) ovl_trusted_xattr_handlers; sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; - sb->s_iflags |= SB_I_SKIP_SYNC; + sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; err = -ENOMEM; root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index d7274eefc666..15c8cc4b6680 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -159,6 +159,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 43 -#define CIFS_VERSION "2.43" +#define SMB3_PRODUCT_BUILD 44 +#define CIFS_VERSION "2.44" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index fff092bbc7a3..e1904b86ed96 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -433,16 +433,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * Dump encryption keys. This is an old ioctl that only * handles AES-128-{CCM,GCM}. */ - if (pSMBFile == NULL) - break; if (!capable(CAP_SYS_ADMIN)) { rc = -EACCES; break; } - tcon = tlink_tcon(pSMBFile->tlink); + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); if (!smb3_encryption_required(tcon)) { rc = -EOPNOTSUPP; + cifs_put_tlink(tlink); break; } pkey_inf.cipher_type = @@ -459,6 +464,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EFAULT; else rc = 0; + cifs_put_tlink(tlink); break; case CIFS_DUMP_FULL_KEY: /* @@ -470,8 +476,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EACCES; break; } - tcon = tlink_tcon(pSMBFile->tlink); + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + tcon = tlink_tcon(tlink); rc = cifs_dump_full_key(tcon, (void __user *)arg); + cifs_put_tlink(tlink); break; case CIFS_IOC_NOTIFY: if (!S_ISDIR(inode->i_mode)) { diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index fb8b2d566efb..b7521e41402e 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -352,7 +352,8 @@ enum KSMBD_TREE_CONN_STATUS { #define KSMBD_SHARE_FLAG_STREAMS BIT(11) #define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) #define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) -#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) /* * Tree connect request flags. diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index ced7a9e916f0..9df121bdf349 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -286,6 +286,7 @@ static void handle_ksmbd_work(struct work_struct *wk) static int queue_ksmbd_work(struct ksmbd_conn *conn) { struct ksmbd_work *work; + int err; work = ksmbd_alloc_work_struct(); if (!work) { @@ -297,7 +298,11 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) work->request_buf = conn->request_buf; conn->request_buf = NULL; - ksmbd_init_smb_server(work); + err = ksmbd_init_smb_server(work); + if (err) { + ksmbd_free_work_struct(work); + return 0; + } ksmbd_conn_enqueue_request(work); atomic_inc(&conn->r_count); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cf8822103f50..9849d7489345 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -87,9 +87,9 @@ struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn */ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) { - struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf); + struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); unsigned int cmd = le16_to_cpu(req_hdr->Command); - int tree_id; + unsigned int tree_id; if (cmd == SMB2_TREE_CONNECT_HE || cmd == SMB2_CANCEL_HE || @@ -114,7 +114,7 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) pr_err("The first operation in the compound does not have tcon\n"); return -EINVAL; } - if (work->tcon->id != tree_id) { + if (tree_id != UINT_MAX && work->tcon->id != tree_id) { pr_err("tree id(%u) is different with id(%u) in first operation\n", tree_id, work->tcon->id); return -EINVAL; @@ -559,9 +559,9 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) */ int smb2_check_user_session(struct ksmbd_work *work) { - struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf); + struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); struct ksmbd_conn *conn = work->conn; - unsigned int cmd = conn->ops->get_cmd_val(work); + unsigned int cmd = le16_to_cpu(req_hdr->Command); unsigned long long sess_id; /* @@ -587,7 +587,7 @@ int smb2_check_user_session(struct ksmbd_work *work) pr_err("The first operation in the compound does not have sess\n"); return -EINVAL; } - if (work->sess->id != sess_id) { + if (sess_id != ULLONG_MAX && work->sess->id != sess_id) { pr_err("session id(%llu) is different with the first operation(%lld)\n", sess_id, work->sess->id); return -EINVAL; @@ -2467,8 +2467,9 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon, } } -static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name, - int open_flags, umode_t posix_mode, bool is_dir) +static int smb2_creat(struct ksmbd_work *work, struct path *parent_path, + struct path *path, char *name, int open_flags, + umode_t posix_mode, bool is_dir) { struct ksmbd_tree_connect *tcon = work->tcon; struct ksmbd_share_config *share = tcon->share_conf; @@ -2495,7 +2496,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name, return rc; } - rc = ksmbd_vfs_kern_path_locked(work, name, 0, path, 0); + rc = ksmbd_vfs_kern_path_locked(work, name, 0, parent_path, path, 0); if (rc) { pr_err("cannot get linux path (%s), err = %d\n", name, rc); @@ -2565,7 +2566,7 @@ int smb2_open(struct ksmbd_work *work) struct ksmbd_tree_connect *tcon = work->tcon; struct smb2_create_req *req; struct smb2_create_rsp *rsp; - struct path path; + struct path path, parent_path; struct ksmbd_share_config *share = tcon->share_conf; struct ksmbd_file *fp = NULL; struct file *filp = NULL; @@ -2786,7 +2787,8 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } - rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, &path, 1); + rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, + &parent_path, &path, 1); if (!rc) { file_present = true; @@ -2906,7 +2908,8 @@ int smb2_open(struct ksmbd_work *work) /*create file if not present */ if (!file_present) { - rc = smb2_creat(work, &path, name, open_flags, posix_mode, + rc = smb2_creat(work, &parent_path, &path, name, open_flags, + posix_mode, req->CreateOptions & FILE_DIRECTORY_FILE_LE); if (rc) { if (rc == -ENOENT) { @@ -3321,8 +3324,9 @@ int smb2_open(struct ksmbd_work *work) err_out: if (file_present || created) { - inode_unlock(d_inode(path.dentry->d_parent)); - dput(path.dentry); + inode_unlock(d_inode(parent_path.dentry)); + path_put(&path); + path_put(&parent_path); } ksmbd_revert_fsids(work); err_out1: @@ -5545,7 +5549,7 @@ static int smb2_create_link(struct ksmbd_work *work, struct nls_table *local_nls) { char *link_name = NULL, *target_name = NULL, *pathname = NULL; - struct path path; + struct path path, parent_path; bool file_present = false; int rc; @@ -5575,7 +5579,7 @@ static int smb2_create_link(struct ksmbd_work *work, ksmbd_debug(SMB, "target name is %s\n", target_name); rc = ksmbd_vfs_kern_path_locked(work, link_name, LOOKUP_NO_SYMLINKS, - &path, 0); + &parent_path, &path, 0); if (rc) { if (rc != -ENOENT) goto out; @@ -5605,8 +5609,9 @@ static int smb2_create_link(struct ksmbd_work *work, rc = -EINVAL; out: if (file_present) { - inode_unlock(d_inode(path.dentry->d_parent)); + inode_unlock(d_inode(parent_path.dentry)); path_put(&path); + path_put(&parent_path); } if (!IS_ERR(link_name)) kfree(link_name); @@ -6209,6 +6214,11 @@ int smb2_read(struct ksmbd_work *work) unsigned int max_read_size = conn->vals->max_read_size; WORK_BUFFERS(work, req, rsp); + if (work->next_smb2_rcv_hdr_off) { + work->send_no_response = 1; + err = -EOPNOTSUPP; + goto out; + } if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) { @@ -8609,7 +8619,8 @@ int smb3_decrypt_req(struct ksmbd_work *work) struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf); int rc = 0; - if (buf_data_size < sizeof(struct smb2_hdr)) { + if (pdu_length < sizeof(struct smb2_transform_hdr) || + buf_data_size < sizeof(struct smb2_hdr)) { pr_err("Transform message is too small (%u)\n", pdu_length); return -ECONNABORTED; diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index ef20f63e55e6..c2b75d898852 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -388,26 +388,29 @@ static struct smb_version_cmds smb1_server_cmds[1] = { [SMB_COM_NEGOTIATE_EX] = { .proc = smb1_negotiate, }, }; -static void init_smb1_server(struct ksmbd_conn *conn) +static int init_smb1_server(struct ksmbd_conn *conn) { conn->ops = &smb1_server_ops; conn->cmds = smb1_server_cmds; conn->max_cmds = ARRAY_SIZE(smb1_server_cmds); + return 0; } -void ksmbd_init_smb_server(struct ksmbd_work *work) +int ksmbd_init_smb_server(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; __le32 proto; - if (conn->need_neg == false) - return; - proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol; + if (conn->need_neg == false) { + if (proto == SMB1_PROTO_NUMBER) + return -EINVAL; + return 0; + } + if (proto == SMB1_PROTO_NUMBER) - init_smb1_server(conn); - else - init_smb3_11_server(conn); + return init_smb1_server(conn); + return init_smb3_11_server(conn); } int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index aeca0f46068f..f1092519c0c2 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn); int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count); -void ksmbd_init_smb_server(struct ksmbd_work *work); +int ksmbd_init_smb_server(struct ksmbd_work *work); struct ksmbd_kstat; int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index e35914457350..3d5d652153a5 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -63,13 +63,13 @@ int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child) static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, char *pathname, unsigned int flags, + struct path *parent_path, struct path *path) { struct qstr last; struct filename *filename; struct path *root_share_path = &share_conf->vfs_path; int err, type; - struct path parent_path; struct dentry *d; if (pathname[0] == '\0') { @@ -84,7 +84,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, return PTR_ERR(filename); err = vfs_path_parent_lookup(filename, flags, - &parent_path, &last, &type, + parent_path, &last, &type, root_share_path); if (err) { putname(filename); @@ -92,13 +92,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, } if (unlikely(type != LAST_NORM)) { - path_put(&parent_path); + path_put(parent_path); putname(filename); return -ENOENT; } - inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); + inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl(&last, parent_path->dentry, 0); if (IS_ERR(d)) goto err_out; @@ -108,15 +108,22 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, } path->dentry = d; - path->mnt = share_conf->vfs_path.mnt; - path_put(&parent_path); - putname(filename); + path->mnt = mntget(parent_path->mnt); + if (test_share_config_flag(share_conf, KSMBD_SHARE_FLAG_CROSSMNT)) { + err = follow_down(path, 0); + if (err < 0) { + path_put(path); + goto err_out; + } + } + + putname(filename); return 0; err_out: - inode_unlock(parent_path.dentry->d_inode); - path_put(&parent_path); + inode_unlock(d_inode(parent_path->dentry)); + path_put(parent_path); putname(filename); return -ENOENT; } @@ -412,7 +419,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, { char *stream_buf = NULL, *wbuf; struct mnt_idmap *idmap = file_mnt_idmap(fp->filp); - size_t size, v_len; + size_t size; + ssize_t v_len; int err = 0; ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n", @@ -429,9 +437,9 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, fp->stream.name, fp->stream.size, &stream_buf); - if ((int)v_len < 0) { + if (v_len < 0) { pr_err("not found stream in xattr : %zd\n", v_len); - err = (int)v_len; + err = v_len; goto out; } @@ -1194,14 +1202,14 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, * Return: 0 on success, otherwise error */ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, - unsigned int flags, struct path *path, - bool caseless) + unsigned int flags, struct path *parent_path, + struct path *path, bool caseless) { struct ksmbd_share_config *share_conf = work->tcon->share_conf; int err; - struct path parent_path; - err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, path); + err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, parent_path, + path); if (!err) return 0; @@ -1216,10 +1224,10 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, path_len = strlen(filepath); remain_len = path_len; - parent_path = share_conf->vfs_path; - path_get(&parent_path); + *parent_path = share_conf->vfs_path; + path_get(parent_path); - while (d_can_lookup(parent_path.dentry)) { + while (d_can_lookup(parent_path->dentry)) { char *filename = filepath + path_len - remain_len; char *next = strchrnul(filename, '/'); size_t filename_len = next - filename; @@ -1228,7 +1236,7 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, if (filename_len == 0) break; - err = ksmbd_vfs_lookup_in_dir(&parent_path, filename, + err = ksmbd_vfs_lookup_in_dir(parent_path, filename, filename_len, work->conn->um); if (err) @@ -1245,8 +1253,8 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, goto out2; else if (is_last) goto out1; - path_put(&parent_path); - parent_path = *path; + path_put(parent_path); + *parent_path = *path; next[0] = '/'; remain_len -= filename_len + 1; @@ -1254,16 +1262,17 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, err = -EINVAL; out2: - path_put(&parent_path); + path_put(parent_path); out1: kfree(filepath); } if (!err) { - err = ksmbd_vfs_lock_parent(parent_path.dentry, path->dentry); - if (err) - dput(path->dentry); - path_put(&parent_path); + err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); + if (err) { + path_put(path); + path_put(parent_path); + } } return err; } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index 80039312c255..72f9fb4b48d1 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -115,8 +115,8 @@ int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, const struct path *path, char *attr_name); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, - unsigned int flags, struct path *path, - bool caseless); + unsigned int flags, struct path *parent_path, + struct path *path, bool caseless); struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, diff --git a/fs/splice.c b/fs/splice.c index 004eb1c4ce31..3e2a31e1ce6a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -876,6 +876,8 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, msg.msg_flags |= MSG_MORE; if (remain && pipe_occupancy(pipe->head, tail) > 0) msg.msg_flags |= MSG_MORE; + if (out->f_flags & O_NONBLOCK) + msg.msg_flags |= MSG_DONTWAIT; iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc, len - remain); |