From 38f7e14519d39cf524ddc02d4caee9b337dad703 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Jul 2024 12:44:31 +0100 Subject: workqueue: Fix UBSAN 'subtraction overflow' error in shift_and_mask() UBSAN reports the following 'subtraction overflow' error when booting in a virtual machine on Android: | Internal error: UBSAN: integer subtraction overflow: 00000000f2005515 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.10.0-00006-g3cbe9e5abd46-dirty #4 | Hardware name: linux,dummy-virt (DT) | pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : cancel_delayed_work+0x34/0x44 | lr : cancel_delayed_work+0x2c/0x44 | sp : ffff80008002ba60 | x29: ffff80008002ba60 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000000000000 x22: 0000000000000000 x21: ffff1f65014cd3c0 | x20: ffffc0e84c9d0da0 x19: ffffc0e84cab3558 x18: ffff800080009058 | x17: 00000000247ee1f8 x16: 00000000247ee1f8 x15: 00000000bdcb279d | x14: 0000000000000001 x13: 0000000000000075 x12: 00000a0000000000 | x11: ffff1f6501499018 x10: 00984901651fffff x9 : ffff5e7cc35af000 | x8 : 0000000000000001 x7 : 3d4d455453595342 x6 : 000000004e514553 | x5 : ffff1f6501499265 x4 : ffff1f650ff60b10 x3 : 0000000000000620 | x2 : ffff80008002ba78 x1 : 0000000000000000 x0 : 0000000000000000 | Call trace: | cancel_delayed_work+0x34/0x44 | deferred_probe_extend_timeout+0x20/0x70 | driver_register+0xa8/0x110 | __platform_driver_register+0x28/0x3c | syscon_init+0x24/0x38 | do_one_initcall+0xe4/0x338 | do_initcall_level+0xac/0x178 | do_initcalls+0x5c/0xa0 | do_basic_setup+0x20/0x30 | kernel_init_freeable+0x8c/0xf8 | kernel_init+0x28/0x1b4 | ret_from_fork+0x10/0x20 | Code: f9000fbf 97fffa2f 39400268 37100048 (d42aa2a0) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: UBSAN: integer subtraction overflow: Fatal exception This is due to shift_and_mask() using a signed immediate to construct the mask and being called with a shift of 31 (WORK_OFFQ_POOL_SHIFT) so that it ends up decrementing from INT_MIN. Use an unsigned constant '1U' to generate the mask in shift_and_mask(). Cc: Tejun Heo Cc: Lai Jiangshan Fixes: 1211f3b21c2a ("workqueue: Preserve OFFQ bits in cancel[_sync] paths") Signed-off-by: Will Deacon Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1745ca788ede..b35f8ce80bc7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits) { - return (v >> shift) & ((1 << bits) - 1); + return (v >> shift) & ((1U << bits) - 1); } static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) -- cgit v1.2.3-70-g09d2 From 98cc1730c89467fc26e2dc2ceb2a014f332daa97 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 25 Jul 2024 09:04:37 +0800 Subject: workqueue: Remove incorrect "WARN_ON_ONCE(!list_empty(&worker->entry));" from dying worker The commit 68f83057b913 ("workqueue: Reap workers via kthread_stop() and remove detach_completion") changes the procedure of destroying workers; the dying workers are kept in the cull_list in wake_dying_workers() with the pool lock held and removed from the cull_list by the newly added reap_dying_workers() without the pool lock. This can cause a warning if the dying worker is wokenup earlier than reaped as reported by Marc: 2024/07/23 18:01:21 [M83LP63]: [ 157.267727] ------------[ cut here ]------------ 2024/07/23 18:01:21 [M83LP63]: [ 157.267735] WARNING: CPU: 21 PID: 725 at kernel/workqueue.c:3340 worker_thread+0x54e/0x558 2024/07/23 18:01:21 [M83LP63]: [ 157.267746] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc dm_service_time s390_trng vfio_ccw mdev vfio_iommu_type1 vfio sch_fq_codel 2024/07/23 18:01:21 [M83LP63]: loop dm_multipath configfs nfnetlink lcs ctcm fsm zfcp scsi_transport_fc ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common scm_block eadm_sch scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt rng_core autofs4 2024/07/23 18:01:21 [M83LP63]: [ 157.267792] CPU: 21 PID: 725 Comm: kworker/dying Not tainted 6.10.0-rc2-00239-g68f83057b913 #95 2024/07/23 18:01:21 [M83LP63]: [ 157.267796] Hardware name: IBM 3906 M04 704 (LPAR) 2024/07/23 18:01:21 [M83LP63]: [ 157.267802] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 2024/07/23 18:01:21 [M83LP63]: [ 157.267797] Krnl PSW : 0704d00180000000 000003d600fcd9fa (worker_thread+0x552/0x558) 2024/07/23 18:01:21 [M83LP63]: [ 157.267806] Krnl GPRS: 6479696e6700776f 000002c901b62780 000003d602493ec8 000002c914954600 2024/07/23 18:01:21 [M83LP63]: [ 157.267809] 0000000000000000 0000000000000008 000002c901a85400 000002c90719e840 2024/07/23 18:01:21 [M83LP63]: [ 157.267811] 000002c90719e880 000002c901a85420 000002c91127adf0 000002c901a85400 2024/07/23 18:01:21 [M83LP63]: [ 157.267813] 000002c914954600 0000000000000000 000003d600fcd772 000003560452bd98 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] Krnl Code: 000003d600fcd9ec: c0e500674262 brasl %r14,000003d601cb5eb0 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcd9f2: a7f4ffc8 brc 15,000003d600fcd982 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] #000003d600fcd9f6: af000000 mc 0,0 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] >000003d600fcd9fa: a7f4fec2 brc 15,000003d600fcd77e 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcd9fe: 0707 bcr 0,%r7 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda00: c00400682e10 brcl 0,000003d601cd3620 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda06: eb7ff0500024 stmg %r7,%r15,80(%r15) 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda0c: b90400ef lgr %r14,%r15 2024/07/23 18:01:21 [M83LP63]: [ 157.267853] Call Trace: 2024/07/23 18:01:21 [M83LP63]: [ 157.267855] [<000003d600fcd9fa>] worker_thread+0x552/0x558 2024/07/23 18:01:21 [M83LP63]: [ 157.267859] ([<000003d600fcd772>] worker_thread+0x2ca/0x558) 2024/07/23 18:01:21 [M83LP63]: [ 157.267862] [<000003d600fd6c80>] kthread+0x120/0x128 2024/07/23 18:01:21 [M83LP63]: [ 157.267865] [<000003d600f5305c>] __ret_from_fork+0x3c/0x58 2024/07/23 18:01:21 [M83LP63]: [ 157.267868] [<000003d601cc746a>] ret_from_fork+0xa/0x30 2024/07/23 18:01:21 [M83LP63]: [ 157.267873] Last Breaking-Event-Address: 2024/07/23 18:01:21 [M83LP63]: [ 157.267874] [<000003d600fcd778>] worker_thread+0x2d0/0x558 Since the procedure of destroying workers is changed, the WARN_ON_ONCE() becomes incorrect and should be removed. Cc: Marc Hartmayer Link: https://lore.kernel.org/lkml/87le1sjd2e.fsf@linux.ibm.com/ Reported-by: Marc Hartmayer Fixes: 68f83057b913 ("workqueue: Reap workers via kthread_stop() and remove detach_completion") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b35f8ce80bc7..d56bd2277e58 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3351,7 +3351,6 @@ woke_up: set_pf_worker(false); ida_free(&pool->worker_ida, worker->id); - WARN_ON_ONCE(!list_empty(&worker->entry)); return 0; } -- cgit v1.2.3-70-g09d2 From 8bc35475ef1a23b0e224f3242eb11c76cab0ea88 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Aug 2024 09:37:25 -1000 Subject: workqueue: Fix spruious data race in __flush_work() When flushing a work item for cancellation, __flush_work() knows that it exclusively owns the work item through its PENDING bit. 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items") added a read of @work->data to determine whether to use busy wait for BH work items that are being canceled. While the read is safe when @from_cancel, @work->data was read before testing @from_cancel to simplify code structure: data = *work_data_bits(work); if (from_cancel && !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { While the read data was never used if !@from_cancel, this could trigger KCSAN data race detection spuriously: ================================================================== BUG: KCSAN: data-race in __flush_work / __flush_work write to 0xffff8881223aa3e8 of 8 bytes by task 3998 on cpu 0: instrument_write include/linux/instrumented.h:41 [inline] ___set_bit include/asm-generic/bitops/instrumented-non-atomic.h:28 [inline] insert_wq_barrier kernel/workqueue.c:3790 [inline] start_flush_work kernel/workqueue.c:4142 [inline] __flush_work+0x30b/0x570 kernel/workqueue.c:4178 flush_work kernel/workqueue.c:4229 [inline] ... read to 0xffff8881223aa3e8 of 8 bytes by task 50 on cpu 1: __flush_work+0x42a/0x570 kernel/workqueue.c:4188 flush_work kernel/workqueue.c:4229 [inline] flush_delayed_work+0x66/0x70 kernel/workqueue.c:4251 ... value changed: 0x0000000000400000 -> 0xffff88810006c00d Reorganize the code so that @from_cancel is tested before @work->data is accessed. The only problem is triggering KCSAN detection spuriously. This shouldn't need READ_ONCE() or other access qualifiers. No functional changes. Signed-off-by: Tejun Heo Reported-by: syzbot+b3e4f2f51ed645fd5df2@syzkaller.appspotmail.com Fixes: 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items") Link: http://lkml.kernel.org/r/000000000000ae429e061eea2157@google.com Cc: Jens Axboe --- kernel/workqueue.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d56bd2277e58..ef174d8c1f63 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4166,7 +4166,6 @@ already_gone: static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; - unsigned long data; if (WARN_ON(!wq_online)) return false; @@ -4184,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) * was queued on a BH workqueue, we also know that it was running in the * BH context and thus can be busy-waited. */ - data = *work_data_bits(work); - if (from_cancel && - !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { - /* - * On RT, prevent a live lock when %current preempted soft - * interrupt processing or prevents ksoftirqd from running by - * keeping flipping BH. If the BH work item runs on a different - * CPU then this has no effect other than doing the BH - * disable/enable dance for nothing. This is copied from - * kernel/softirq.c::tasklet_unlock_spin_wait(). - */ - while (!try_wait_for_completion(&barr.done)) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - local_bh_disable(); - local_bh_enable(); - } else { - cpu_relax(); + if (from_cancel) { + unsigned long data = *work_data_bits(work); + + if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && + (data & WORK_OFFQ_BH)) { + /* + * On RT, prevent a live lock when %current preempted + * soft interrupt processing or prevents ksoftirqd from + * running by keeping flipping BH. If the BH work item + * runs on a different CPU then this has no effect other + * than doing the BH disable/enable dance for nothing. + * This is copied from + * kernel/softirq.c::tasklet_unlock_spin_wait(). + */ + while (!try_wait_for_completion(&barr.done)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } } + goto out_destroy; } - } else { - wait_for_completion(&barr.done); } + wait_for_completion(&barr.done); + +out_destroy: destroy_work_on_stack(&barr.work); return true; } -- cgit v1.2.3-70-g09d2 From c4c8f369b6a6d21ce27286de1501137771e01dc3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 5 Aug 2024 09:30:29 +0200 Subject: workqueue: Correct declaration of cpu_pwq in struct workqueue_struct cpu_pwq is used in various percpu functions that expect variable in __percpu address space. Correct the declaration of cpu_pwq to struct pool_workqueue __rcu * __percpu *cpu_pwq to declare the variable as __percpu pointer. The patch also fixes following sparse errors: workqueue.c:380:37: warning: duplicate [noderef] workqueue.c:380:37: error: multiple address spaces given: __rcu & __percpu workqueue.c:2271:15: error: incompatible types in comparison expression (different address spaces): workqueue.c:2271:15: struct pool_workqueue [noderef] __rcu * workqueue.c:2271:15: struct pool_workqueue [noderef] __percpu * and uncovers a couple of exisiting "incorrect type in assignment" warnings (from __rcu address space), which this patch does not address. Found by GCC's named address space checks. There were no changes in the resulting object files. Signed-off-by: Uros Bizjak Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ef174d8c1f63..e7b005ff3750 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -377,7 +377,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ - struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */ + struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; -- cgit v1.2.3-70-g09d2