diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 10:43:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 10:43:59 -0800 |
commit | ce8a79d5601aab94c02ed4539c48e8605422ac94 (patch) | |
tree | 7830a97a475d57284640c8e2d3516521722708b6 /lib/sbitmap.c | |
parent | 96f7e448b9f4546ffd0356ffceb2b9586777f316 (diff) | |
parent | f596da3efaf4130ff61cd029558845808df9bf99 (diff) |
Merge tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull requests via Christoph:
- Support some passthrough commands without CAP_SYS_ADMIN (Kanchan
Joshi)
- Refactor PCIe probing and reset (Christoph Hellwig)
- Various fabrics authentication fixes and improvements (Sagi
Grimberg)
- Avoid fallback to sequential scan due to transient issues (Uday
Shankar)
- Implement support for the DEAC bit in Write Zeroes (Christoph
Hellwig)
- Allow overriding the IEEE OUI and firmware revision in configfs
for nvmet (Aleksandr Miloserdov)
- Force reconnect when number of queue changes in nvmet (Daniel
Wagner)
- Minor fixes and improvements (Uros Bizjak, Joel Granados, Sagi
Grimberg, Christoph Hellwig, Christophe JAILLET)
- Fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni)
- Use the common tagset helpers in nvme-pci driver (Christoph
Hellwig)
- Cleanup the nvme-pci removal path (Christoph Hellwig)
- Use kstrtobool() instead of strtobool (Christophe JAILLET)
- Allow unprivileged passthrough of Identify Controller (Joel
Granados)
- Support io stats on the mpath device (Sagi Grimberg)
- Minor nvmet cleanup (Sagi Grimberg)
- MD pull requests via Song:
- Code cleanups (Christoph)
- Various fixes
- Floppy pull request from Denis:
- Fix a memory leak in the init error path (Yuan)
- Series fixing some batch wakeup issues with sbitmap (Gabriel)
- Removal of the pktcdvd driver that was deprecated more than 5 years
ago, and subsequent removal of the devnode callback in struct
block_device_operations as no users are now left (Greg)
- Fix for partition read on an exclusively opened bdev (Jan)
- Series of elevator API cleanups (Jinlong, Christoph)
- Series of fixes and cleanups for blk-iocost (Kemeng)
- Series of fixes and cleanups for blk-throttle (Kemeng)
- Series adding concurrent support for sync queues in BFQ (Yu)
- Series bringing drbd a bit closer to the out-of-tree maintained
version (Christian, Joel, Lars, Philipp)
- Misc drbd fixes (Wang)
- blk-wbt fixes and tweaks for enable/disable (Yu)
- Fixes for mq-deadline for zoned devices (Damien)
- Add support for read-only and offline zones for null_blk
(Shin'ichiro)
- Series fixing the delayed holder tracking, as used by DM (Yu,
Christoph)
- Series enabling bio alloc caching for IRQ based IO (Pavel)
- Series enabling userspace peer-to-peer DMA (Logan)
- BFQ waker fixes (Khazhismel)
- Series fixing elevator refcount issues (Christoph, Jinlong)
- Series cleaning up references around queue destruction (Christoph)
- Series doing quiesce by tagset, enabling cleanups in drivers
(Christoph, Chao)
- Series untangling the queue kobject and queue references (Christoph)
- Misc fixes and cleanups (Bart, David, Dawei, Jinlong, Kemeng, Ye,
Yang, Waiman, Shin'ichiro, Randy, Pankaj, Christoph)
* tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux: (247 commits)
blktrace: Fix output non-blktrace event when blk_classic option enabled
block: sed-opal: Don't include <linux/kernel.h>
sed-opal: allow using IOC_OPAL_SAVE for locking too
blk-cgroup: Fix typo in comment
block: remove bio_set_op_attrs
nvmet: don't open-code NVME_NS_ATTR_RO enumeration
nvme-pci: use the tagset alloc/free helpers
nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set
nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers
nvme: consolidate setting the tagset flags
nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set
block: bio_copy_data_iter
nvme-pci: split out a nvme_pci_ctrl_is_dead helper
nvme-pci: return early on ctrl state mismatch in nvme_reset_work
nvme-pci: rename nvme_disable_io_queues
nvme-pci: cleanup nvme_suspend_queue
nvme-pci: remove nvme_pci_disable
nvme-pci: remove nvme_disable_admin_queue
nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl
nvme: use nvme_wait_ready in nvme_shutdown_ctrl
...
Diffstat (limited to 'lib/sbitmap.c')
-rw-r--r-- | lib/sbitmap.c | 144 |
1 files changed, 38 insertions, 106 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 58de526ff051..1fcede228fa2 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -434,6 +434,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); + atomic_set(&sbq->completion_cnt, 0); + atomic_set(&sbq->wakeup_cnt, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { @@ -441,40 +443,21 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, return -ENOMEM; } - for (i = 0; i < SBQ_WAIT_QUEUES; i++) { + for (i = 0; i < SBQ_WAIT_QUEUES; i++) init_waitqueue_head(&sbq->ws[i].wait); - atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); - } return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int wake_batch) -{ - int i; - - if (sbq->wake_batch != wake_batch) { - WRITE_ONCE(sbq->wake_batch, wake_batch); - /* - * Pairs with the memory barrier in sbitmap_queue_wake_up() - * to ensure that the batch size is updated before the wait - * counts. - */ - smp_mb(); - for (i = 0; i < SBQ_WAIT_QUEUES; i++) - atomic_set(&sbq->ws[i].wait_cnt, 1); - } -} - static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; wake_batch = sbq_calc_wake_batch(sbq, depth); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + if (sbq->wake_batch != wake_batch) + WRITE_ONCE(sbq->wake_batch, wake_batch); } void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, @@ -488,7 +471,8 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, min_batch, SBQ_WAKE_BATCH); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + + WRITE_ONCE(sbq->wake_batch, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); @@ -576,106 +560,56 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); -static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) +static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { int i, wake_index; if (!atomic_read(&sbq->ws_active)) - return NULL; + return; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; - if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) { - if (wake_index != atomic_read(&sbq->wake_index)) - atomic_set(&sbq->wake_index, wake_index); - return ws; - } - + /* + * Advance the index before checking the current queue. + * It improves fairness, by ensuring the queue doesn't + * need to be fully emptied before trying to wake up + * from the next one. + */ wake_index = sbq_index_inc(wake_index); + + /* + * It is sufficient to wake up at least one waiter to + * guarantee forward progress. + */ + if (waitqueue_active(&ws->wait) && + wake_up_nr(&ws->wait, nr)) + break; } - return NULL; + if (wake_index != atomic_read(&sbq->wake_index)) + atomic_set(&sbq->wake_index, wake_index); } -static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr) +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { - struct sbq_wait_state *ws; - unsigned int wake_batch; - int wait_cnt, cur, sub; - bool ret; + unsigned int wake_batch = READ_ONCE(sbq->wake_batch); + unsigned int wakeups; - if (*nr <= 0) - return false; + if (!atomic_read(&sbq->ws_active)) + return; - ws = sbq_wake_ptr(sbq); - if (!ws) - return false; + atomic_add(nr, &sbq->completion_cnt); + wakeups = atomic_read(&sbq->wakeup_cnt); - cur = atomic_read(&ws->wait_cnt); do { - /* - * For concurrent callers of this, callers should call this - * function again to wakeup a new batch on a different 'ws'. - */ - if (cur == 0) - return true; - sub = min(*nr, cur); - wait_cnt = cur - sub; - } while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt)); - - /* - * If we decremented queue without waiters, retry to avoid lost - * wakeups. - */ - if (wait_cnt > 0) - return !waitqueue_active(&ws->wait); - - *nr -= sub; - - /* - * When wait_cnt == 0, we have to be particularly careful as we are - * responsible to reset wait_cnt regardless whether we've actually - * woken up anybody. But in case we didn't wakeup anybody, we still - * need to retry. - */ - ret = !waitqueue_active(&ws->wait); - wake_batch = READ_ONCE(sbq->wake_batch); - - /* - * Wake up first in case that concurrent callers decrease wait_cnt - * while waitqueue is empty. - */ - wake_up_nr(&ws->wait, wake_batch); + if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) + return; + } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, + &wakeups, wakeups + wake_batch)); - /* - * Pairs with the memory barrier in sbitmap_queue_resize() to - * ensure that we see the batch size update before the wait - * count is reset. - * - * Also pairs with the implicit barrier between decrementing wait_cnt - * and checking for waitqueue_active() to make sure waitqueue_active() - * sees result of the wakeup if atomic_dec_return() has seen the result - * of atomic_set(). - */ - smp_mb__before_atomic(); - - /* - * Increase wake_index before updating wait_cnt, otherwise concurrent - * callers can see valid wait_cnt in old waitqueue, which can cause - * invalid wakeup on the old waitqueue. - */ - sbq_index_atomic_inc(&sbq->wake_index); - atomic_set(&ws->wait_cnt, wake_batch); - - return ret || *nr; -} - -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) -{ - while (__sbq_wake_up(sbq, &nr)) - ; + __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); @@ -792,9 +726,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; - - seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", - atomic_read(&ws->wait_cnt), + seq_printf(m, "\t{.wait=%s},\n", waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); |