diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 13:23:30 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 13:23:30 -0800 |
commit | 42e1b14b6e1455ece2ccbe474c25388d0230a590 (patch) | |
tree | 7f62d95f795a2ac5c183248dce39e75340ccfb76 | |
parent | 828cad8ea05d194d8a9452e0793261c2024c23a2 (diff) | |
parent | 95cb64c1fe61e70685a95f6260c8e9cd219fe08c (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"The main changes in this cycle were:
- Implement wraparound-safe refcount_t and kref_t types based on
generic atomic primitives (Peter Zijlstra)
- Improve and fix the ww_mutex code (Nicolai Hähnle)
- Add self-tests to the ww_mutex code (Chris Wilson)
- Optimize percpu-rwsems with the 'rcuwait' mechanism (Davidlohr
Bueso)
- Micro-optimize the current-task logic all around the core kernel
(Davidlohr Bueso)
- Tidy up after recent optimizations: remove stale code and APIs,
clean up the code (Waiman Long)
- ... plus misc fixes, updates and cleanups"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (50 commits)
fork: Fix task_struct alignment
locking/spinlock/debug: Remove spinlock lockup detection code
lockdep: Fix incorrect condition to print bug msgs for MAX_LOCKDEP_CHAIN_HLOCKS
lkdtm: Convert to refcount_t testing
kref: Implement 'struct kref' using refcount_t
refcount_t: Introduce a special purpose refcount type
sched/wake_q: Clarify queue reinit comment
sched/wait, rcuwait: Fix typo in comment
locking/mutex: Fix lockdep_assert_held() fail
locking/rtmutex: Flip unlikely() branch to likely() in __rt_mutex_slowlock()
locking/rwsem: Reinit wake_q after use
locking/rwsem: Remove unnecessary atomic_long_t casts
jump_labels: Move header guard #endif down where it belongs
locking/atomic, kref: Implement kref_put_lock()
locking/ww_mutex: Turn off __must_check for now
locking/atomic, kref: Avoid more abuse
locking/atomic, kref: Use kref_get_unless_zero() more
locking/atomic, kref: Kill kref_sub()
locking/atomic, kref: Add kref_read()
locking/atomic, kref: Add KREF_INIT()
...
116 files changed, 1864 insertions, 761 deletions
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt index 8a112dc304c3..34c3a1b50b9a 100644 --- a/Documentation/locking/ww-mutex-design.txt +++ b/Documentation/locking/ww-mutex-design.txt @@ -309,11 +309,15 @@ Design: normal mutex locks, which are far more common. As such there is only a small increase in code size if wait/wound mutexes are not used. + We maintain the following invariants for the wait list: + (1) Waiters with an acquire context are sorted by stamp order; waiters + without an acquire context are interspersed in FIFO order. + (2) Among waiters with contexts, only the first one can have other locks + acquired already (ctx->acquired > 0). Note that this waiter may come + after other waiters without contexts in the list. + In general, not much contention is expected. The locks are typically used to - serialize access to resources for devices. The only way to make wakeups - smarter would be at the cost of adding a field to struct mutex_waiter. This - would add overhead to all cases where normal mutexes are used, and - ww_mutexes are generally less performance sensitive. + serialize access to resources for devices. Lockdep: Special care has been taken to warn for as many cases of api abuse diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 05523f14d7b2..57f03050c850 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -76,7 +76,7 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, add_sigio_fd(random_fd); add_wait_queue(&host_read_wait, &wait); - set_task_state(current, TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(&host_read_wait, &wait); diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 921bea7a2708..6d391909e864 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -23,9 +23,6 @@ /* How long a lock should spin before we consider blocking */ #define SPIN_THRESHOLD (1 << 15) -extern struct static_key paravirt_ticketlocks_enabled; -static __always_inline bool static_key_false(struct static_key *key); - #include <asm/qspinlock.h> /* diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index fc25f698d792..c37bd0f39c70 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -32,8 +32,7 @@ static void bug_at(unsigned char *ip, int line) * Something went wrong. Crash the box, as something could be * corrupting the kernel. */ - pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n", - ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line); + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); BUG(); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 36bc66416021..099fcba4981d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -620,18 +620,4 @@ void __init kvm_spinlock_init(void) } } -static __init int kvm_spinlock_init_jump(void) -{ - if (!kvm_para_available()) - return 0; - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - printk(KERN_INFO "KVM setup paravirtual spinlock\n"); - - return 0; -} -early_initcall(kvm_spinlock_init_jump); - #endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 6d4bf812af45..6259327f3454 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -42,6 +42,3 @@ struct pv_lock_ops pv_lock_ops = { #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); - -struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; -EXPORT_SYMBOL(paravirt_ticketlocks_enabled); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e8a9ea7d7a21..25a7c4302ce7 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -141,25 +141,6 @@ void __init xen_init_spinlocks(void) pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } -/* - * While the jump_label init code needs to happend _after_ the jump labels are - * enabled and before SMP is started. Hence we use pre-SMP initcall level - * init. We cannot do it in xen_init_spinlocks as that is done before - * jump labels are activated. - */ -static __init int xen_init_spinlocks_jump(void) -{ - if (!xen_pvspin) - return 0; - - if (!xen_domain()) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - return 0; -} -early_initcall(xen_init_spinlocks_jump); - static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ab62b81c2ca7..dece26f119d4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1070,7 +1070,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned .done = 0, .flags = flags, .error = 0, - .kref = { ATOMIC_INIT(2) }, + .kref = KREF_INIT(2), }; if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 83482721bc01..c3ff60c30dde 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2948,7 +2948,6 @@ void drbd_delete_device(struct drbd_device *device) struct drbd_resource *resource = device->resource; struct drbd_connection *connection; struct drbd_peer_device *peer_device; - int refs = 3; /* move to free_peer_device() */ for_each_peer_device(peer_device, device) @@ -2956,13 +2955,15 @@ void drbd_delete_device(struct drbd_device *device) drbd_debugfs_device_cleanup(device); for_each_connection(connection, resource) { idr_remove(&connection->peer_devices, device->vnr); - refs++; + kref_put(&device->kref, drbd_destroy_device); } idr_remove(&resource->devices, device->vnr); + kref_put(&device->kref, drbd_destroy_device); idr_remove(&drbd_devices, device_to_minor(device)); + kref_put(&device->kref, drbd_destroy_device); del_gendisk(device->vdisk); synchronize_rcu(); - kref_sub(&device->kref, refs, drbd_destroy_device); + kref_put(&device->kref, drbd_destroy_device); } static int __init drbd_init(void) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index de279fe4e4fd..b489ac2e9c44 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -421,7 +421,6 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, struct drbd_peer_device *peer_device = first_peer_device(device); unsigned s = req->rq_state; int c_put = 0; - int k_put = 0; if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP)) set |= RQ_COMPLETION_SUSP; @@ -437,6 +436,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* intent: get references */ + kref_get(&req->kref); + if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING)) atomic_inc(&req->completion_ref); @@ -473,15 +474,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) { D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING); - /* local completion may still come in later, - * we need to keep the req object around. */ - kref_get(&req->kref); ++c_put; } if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) { if (req->rq_state & RQ_LOCAL_ABORTED) - ++k_put; + kref_put(&req->kref, drbd_req_destroy); else ++c_put; list_del_init(&req->req_pending_local); @@ -503,7 +501,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (s & RQ_NET_SENT) atomic_sub(req->i.size >> 9, &device->ap_in_flight); if (s & RQ_EXP_BARR_ACK) - ++k_put; + kref_put(&req->kref, drbd_req_destroy); req->net_done_jif = jiffies; /* in ahead/behind mode, or just in case, @@ -516,25 +514,16 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, /* potentially complete and destroy */ - if (k_put || c_put) { - /* Completion does it's own kref_put. If we are going to - * kref_sub below, we need req to be still around then. */ - int at_least = k_put + !!c_put; - int refcount = atomic_read(&req->kref.refcount); - if (refcount < at_least) - drbd_err(device, - "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n", - s, req->rq_state, refcount, at_least); - } - /* If we made progress, retry conflicting peer requests, if any. */ if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) - k_put += drbd_req_put_completion_ref(req, m, c_put); - if (k_put) - kref_sub(&req->kref, k_put, drbd_req_destroy); + if (c_put) { + if (drbd_req_put_completion_ref(req, m, c_put)) + kref_put(&req->kref, drbd_req_destroy); + } else { + kref_put(&req->kref, drbd_req_destroy); + } } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 36d2b9f4e836..436baa66f701 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1535,7 +1535,7 @@ static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { dout("%s: obj %p (was %d)\n", __func__, obj_request, - atomic_read(&obj_request->kref.refcount)); + kref_read(&obj_request->kref)); kref_get(&obj_request->kref); } @@ -1544,14 +1544,14 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) { rbd_assert(obj_request != NULL); dout("%s: obj %p (was %d)\n", __func__, obj_request, - atomic_read(&obj_request->kref.refcount)); + kref_read(&obj_request->kref)); kref_put(&obj_request->kref, rbd_obj_request_destroy); } static void rbd_img_request_get(struct rbd_img_request *img_request) { dout("%s: img %p (was %d)\n", __func__, img_request, - atomic_read(&img_request->kref.refcount)); + kref_read(&img_request->kref)); kref_get(&img_request->kref); } @@ -1562,7 +1562,7 @@ static void rbd_img_request_put(struct rbd_img_request *img_request) { rbd_assert(img_request != NULL); dout("%s: img %p (was %d)\n", __func__, img_request, - atomic_read(&img_request->kref.refcount)); + kref_read(&img_request->kref)); if (img_request_child_test(img_request)) kref_put(&img_request->kref, rbd_parent_request_destroy); else diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 10332c24f961..264c5eac12b0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -770,7 +770,7 @@ static void virtblk_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); + refc = kref_read(&disk_to_dev(vblk->disk)->kobj.kref); put_disk(vblk->disk); vdev->config->del_vqs(vdev); kfree(vblk->vqs); diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 1d6c335584ec..33cd51632721 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -376,7 +376,7 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj, off = drm_vma_node_start(&obj->vma_node); seq_printf(m, "%2d (%2d) %08llx %pad %p %zu", - obj->name, obj->refcount.refcount.counter, + obj->name, kref_read(&obj->refcount), off, &cma_obj->paddr, cma_obj->vaddr, obj->size); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index ffb2ab389d1d..6b68e9088436 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -118,7 +118,7 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, obj->handle_count, - atomic_read(&obj->refcount.refcount)); + kref_read(&obj->refcount)); return 0; } diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index 9f17085b1fdd..c6885a4911c0 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -159,7 +159,7 @@ EXPORT_SYMBOL(drm_mode_object_find); void drm_mode_object_unreference(struct drm_mode_object *obj) { if (obj->free_cb) { - DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount)); + DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount)); kref_put(&obj->refcount, obj->free_cb); } } @@ -176,7 +176,7 @@ EXPORT_SYMBOL(drm_mode_object_unreference); void drm_mode_object_reference(struct drm_mode_object *obj) { if (obj->free_cb) { - DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount)); + DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount)); kref_get(&obj->refcount); } } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 114dddbd297b..aa6e35ddc87f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -486,7 +486,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n", etnaviv_obj->flags, is_active(etnaviv_obj) ? 'A' : 'I', - obj->name, obj->refcount.refcount.counter, + obj->name, kref_read(&obj->refcount), off, etnaviv_obj->vaddr, obj->size); rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 6a368de9d81e..ecfefb9d42e4 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -256,7 +256,7 @@ extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); static inline bool i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) { - return atomic_read(&obj->base.refcount.refcount) == 0; + return kref_read(&obj->base.refcount) == 0; } static inline bool diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 8098677a3916..1974ccb781de 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -642,7 +642,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) seq_printf(m, "%08x: %c %2d (%2d) %08llx %p\t", msm_obj->flags, is_active(msm_obj) ? 'A' : 'I', - obj->name, obj->refcount.refcount.counter, + obj->name, kref_read(&obj->refcount), off, msm_obj->vaddr); for (id = 0; id < priv->num_aspaces; id++) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index a6126c93f215..88ee60d1b907 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -527,7 +527,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence *f) * caller should have a reference on the fence, * else fence could get freed here */ - WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1); + WARN_ON(kref_read(&fence->base.refcount) <= 1); /* * This needs uevents to work correctly, but dma_fence_add_callback relies on diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 4a90c690f09e..74a9968df421 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1033,7 +1033,7 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off = drm_vma_node_start(&obj->vma_node); seq_printf(m, "%08x: %2d (%2d) %08llx %pad (%2d) %p %4d", - omap_obj->flags, obj->name, obj->refcount.refcount.counter, + omap_obj->flags, obj->name, kref_read(&obj->refcount), off, &omap_obj->paddr, omap_obj->paddr_cnt, omap_obj->vaddr, omap_obj->roll); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d5063618efa7..ffc6cb55c78c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -140,8 +140,8 @@ static void ttm_bo_release_list(struct kref *list_kref) struct ttm_bo_device *bdev = bo->bdev; size_t acc_size = bo->acc_size; - BUG_ON(atomic_read(&bo->list_kref.refcount)); - BUG_ON(atomic_read(&bo->kref.refcount)); + BUG_ON(kref_read(&bo->list_kref)); + BUG_ON(kref_read(&bo->kref)); BUG_ON(atomic_read(&bo->cpu_writers)); BUG_ON(bo->mem.mm_node != NULL); BUG_ON(!list_empty(&bo->lru)); @@ -181,61 +181,46 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) } EXPORT_SYMBOL(ttm_bo_add_to_lru); -int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) +static void ttm_bo_ref_bug(struct kref *list_kref) +{ + BUG(); +} + +void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; - int put_count = 0; if (bdev->driver->lru_removal) bdev->driver->lru_removal(bo); if (!list_empty(&bo->swap)) { list_del_init(&bo->swap); - ++put_count; + kref_put(&bo->list_kref, ttm_bo_ref_bug); } if (!list_empty(&bo->lru)) { list_del_init(&bo->lru); - ++put_count; + kref_put(&bo->list_kref, ttm_bo_ref_bug); } - - return put_count; -} - -static void ttm_bo_ref_bug(struct kref *list_kref) -{ - BUG(); -} - -void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, - bool never_free) -{ - kref_sub(&bo->list_kref, count, - (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list); } void ttm_bo_del_sub_from_lru(struct ttm_buffer_object *bo) { - int put_count; - spin_lock(&bo->glob->lru_lock); - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); spin_unlock(&bo->glob->lru_lock); - ttm_bo_list_ref_sub(bo, put_count, true); } EXPORT_SYMBOL(ttm_bo_del_sub_from_lru); void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; - int put_count = 0; lockdep_assert_held(&bo->resv->lock.base); if (bdev->driver->lru_removal) bdev->driver->lru_removal(bo); - put_count = ttm_bo_del_from_lru(bo); - ttm_bo_list_ref_sub(bo, put_count, true); + ttm_bo_del_from_lru(bo); ttm_bo_add_to_lru(bo); } EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -447,7 +432,6 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; - int put_count; int ret; spin_lock(&glob->lru_lock); @@ -455,13 +439,10 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) if (!ret) { if (!ttm_bo_wait(bo, false, true)) { - put_count = ttm_bo_del_from_lru(bo); - + ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - ttm_bo_list_ref_sub(bo, put_count, true); - return; } else ttm_bo_flush_all_fences(bo); @@ -504,7 +485,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, bool no_wait_gpu) { struct ttm_bo_global *glob = bo->glob; - int put_count; int ret; ret = ttm_bo_wait(bo, false, true); @@ -554,15 +534,13 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, return ret; } - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); - ++put_count; + kref_put(&bo->list_kref, ttm_bo_ref_bug); spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - ttm_bo_list_ref_sub(bo, put_count, true); - return 0; } @@ -740,7 +718,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, struct ttm_bo_global *glob = bdev->glob; struct ttm_mem_type_manager *man = &bdev->man[mem_type]; struct ttm_buffer_object *bo; - int ret = -EBUSY, put_count; + int ret = -EBUSY; spin_lock(&glob->lru_lock); list_for_each_entry(bo, &man->lru, lru) { @@ -771,13 +749,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, return ret; } - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); BUG_ON(ret != 0); - ttm_bo_list_ref_sub(bo, put_count, true); - ret = ttm_bo_evict(bo, interruptible, no_wait_gpu); ttm_bo_unreserve(bo); @@ -1669,7 +1645,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) container_of(shrink, struct ttm_bo_global, shrink); struct ttm_buffer_object *bo; int ret = -EBUSY; - int put_count; uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); spin_lock(&glob->lru_lock); @@ -1692,11 +1667,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) return ret; } - put_count = ttm_bo_del_from_lru(bo); + ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); - ttm_bo_list_ref_sub(bo, put_count, true); - /** * Move to system cached */ diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index d35bc491e8de..5e1bcabffef5 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -48,9 +48,7 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list) list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - unsigned put_count = ttm_bo_del_from_lru(bo); - - ttm_bo_list_ref_sub(bo, put_count, true); + ttm_bo_del_from_lru(bo); } } diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 4f5fa8d65fe9..fdb451e3ec01 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -304,7 +304,7 @@ bool ttm_ref_object_exists(struct ttm_object_file *tfile, * Verify that the ref->obj pointer was actually valid! */ rmb(); - if (unlikely(atomic_read(&ref->kref.refcount) == 0)) + if (unlikely(kref_read(&ref->kref) == 0)) goto out_false; rcu_read_unlock(); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index b9efadfffb4f..e66e75921797 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -55,14 +55,14 @@ #define put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ - WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + ep, kref_read(&((ep)->kref))); \ + WARN_ON(kref_read(&((ep)->kref)) < 1); \ kref_put(&((ep)->kref), __free_ep); \ } #define get_ep(ep) { \ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ + ep, kref_read(&((ep)->kref))); \ kref_get(&((ep)->kref)); \ } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index d939980a708f..a9194db7f9b8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -961,7 +961,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, case IWCH_QP_STATE_RTS: switch (attrs->next_state) { case IWCH_QP_STATE_CLOSING: - BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + BUG_ON(kref_read(&qhp->ep->com.kref) < 2); qhp->attr.state = IWCH_QP_STATE_CLOSING; if (!internal) { abort=0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 8cd4d054a87e..d19662f635b1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -672,14 +672,14 @@ enum c4iw_mmid_state { #define c4iw_put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ - WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + ep, kref_read(&((ep)->kref))); \ + WARN_ON(kref_read(&((ep)->kref)) < 1); \ kref_put(&((ep)->kref), _c4iw_free_ep); \ } #define c4iw_get_ep(ep) { \ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ - ep, atomic_read(&((ep)->kref.refcount))); \ + ep, kref_read(&((ep)->kref))); \ kref_get(&((ep)->kref)); \ } void _c4iw_free_ep(struct kref *kref); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 04c1c382dedb..d4fd2f5c8326 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1580,7 +1580,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + BUG_ON(kref_read(&qhp->ep->com.kref) < 2); t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 80ef3f8998c8..04443242e258 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -80,7 +80,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr, left = PAGE_SIZE; mutex_lock(&us_ibdev->usdev_lock); - if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) { + if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; /* @@ -99,7 +99,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr, PCI_FUNC(us_ibdev->pdev->devfn), netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac, - atomic_read(&us_ibdev->vf_cnt.refcount)); + kref_read(&us_ibdev->vf_cnt)); UPDATE_PTR_LEFT(n, ptr, left); for (res_type = USNIC_VNIC_RES_TYPE_EOL; @@ -147,7 +147,7 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); return scnprintf(buf, PAGE_SIZE, "%u\n", - atomic_read(&us_ibdev->vf_cnt.refcount)); + kref_read(&us_ibdev->vf_cnt)); } static ssize_t diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 74819a7951e2..69df8e353123 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -291,11 +291,11 @@ int usnic_ib_query_device(struct ib_device *ibdev, qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); props->max_qp = qp_per_vf * - atomic_read(&us_ibdev->vf_cnt.refcount); + kref_read(&us_ibdev->vf_cnt); props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * - atomic_read(&us_ibdev->vf_cnt.refcount); + kref_read(&us_ibdev->vf_cnt); props->max_pd = USNIC_UIOM_MAX_PD_CNT; props->max_mr = USNIC_UIOM_MAX_MR_CNT; props->local_ca_ack_delay = 0; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 84d2f0e4c754..d36d427a9efb 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -794,7 +794,7 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c) DECLARE_WAITQUEUE(wait, current); add_wait_queue(&c->free_buffer_wait, &wait); - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); dm_bufio_unlock(c); io_schedule(); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8a9f742d8ed7..1cb2ca9dfae3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1210,14 +1210,14 @@ continue_locked: spin_unlock_irq(&cc->write_thread_wait.lock); if (unlikely(kthread_should_stop())) { - set_task_state(current, TASK_RUNNING); + set_current_state(TASK_RUNNING); remove_wait_queue(&cc->write_thread_wait, &wait); break; } schedule(); - set_task_state(current, TASK_RUNNING); + set_current_state(TASK_RUNNING); spin_lock_irq(&cc->write_thread_wait.lock); __remove_wait_queue(&cc->write_thread_wait, &wait); goto continue_locked; diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a6dde7cab458..758d90cc2733 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -120,7 +120,7 @@ static int __check_holder(struct block_lock *lock) static void __wait(struct waiter *w) { for (;;) { - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!w->task) break; @@ -128,7 +128,7 @@ static void __wait(struct waiter *w) schedule(); } - set_task_state(current, TASK_RUNNING); + set_current_state(TASK_RUNNING); } static void __wake_waiter(struct waiter *w) diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 7f1b282d7d96..cb290b8ca0c8 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -1396,7 +1396,7 @@ int genwqe_device_remove(struct genwqe_dev *cd) * application which will decrease this reference from * 1/unused to 0/illegal and not from 2/used 1/empty. */ - rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount); + rc = kref_read(&cd->cdev_genwqe.kobj.kref); if (rc != 1) { dev_err(&pci_dev->dev, "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc); diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h index cfa1039c62e7..67d27be60405 100644 --- a/drivers/misc/lkdtm.h +++ b/drivers/misc/lkdtm.h @@ -19,8 +19,12 @@ void lkdtm_SOFTLOCKUP(void); void lkdtm_HARDLOCKUP(void); void lkdtm_SPINLOCKUP(void); void lkdtm_HUNG_TASK(void); -void lkdtm_ATOMIC_UNDERFLOW(void); -void lkdtm_ATOMIC_OVERFLOW(void); +void lkdtm_REFCOUNT_SATURATE_INC(void); +void lkdtm_REFCOUNT_SATURATE_ADD(void); +void lkdtm_REFCOUNT_ZERO_DEC(void); +void lkdtm_REFCOUNT_ZERO_INC(void); +void lkdtm_REFCOUNT_ZERO_SUB(void); +void lkdtm_REFCOUNT_ZERO_ADD(void); void lkdtm_CORRUPT_LIST_ADD(void); void lkdtm_CORRUPT_LIST_DEL(void); diff --git a/drivers/misc/lkdtm_bugs.c b/drivers/misc/lkdtm_bugs.c index 91edd0b55e5c..cba0837aee2e 100644 --- a/drivers/misc/lkdtm_bugs.c +++ b/drivers/misc/lkdtm_bugs.c @@ -6,6 +6,7 @@ */ #include "lkdtm.h" #include <linux/list.h> +#include <linux/refcount.h> #include <linux/sched.h> struct lkdtm_list { @@ -129,28 +130,86 @@ void lkdtm_HUNG_TASK(void) schedule(); } -void lkdtm_ATOMIC_UNDERFLOW(void) +void lkdtm_REFCOUNT_SATURATE_INC(void) { - atomic_t under = ATOMIC_INIT(INT_MIN); + refcount_t over = REFCOUNT_INIT(UINT_MAX - 1); - pr_info("attempting good atomic increment\n"); - atomic_inc(&under); - atomic_dec(&under); + pr_info("attempting good refcount decrement\n"); + refcount_dec(&over); + refcount_inc(&over); - pr_info("attempting bad atomic underflow\n"); - atomic_dec(&under); + pr_info("attempting bad refcount inc overflow\n"); + refcount_inc(&over); + refcount_inc(&over); + if (refcount_read(&over) == UINT_MAX) + pr_err("Correctly stayed saturated, but no BUG?!\n"); + else + pr_err("Fail: refcount wrapped\n"); +} + +void lkdtm_REFCOUNT_SATURATE_ADD(void) +{ + refcount_t over = REFCOUNT_INIT(UINT_MAX - 1); + + pr_info("attempting good refcount decrement\n"); + refcount_dec(&over); + refcount_inc(&over); + + pr_info("attempting bad refcount add overflow\n"); + refcount_add(2, &over); + if (refcount_read(&over) == UINT_MAX) + pr_err("Correctly stayed saturated, but no BUG?!\n"); + else + pr_err("Fail: refcount wrapped\n"); +} + +void lkdtm_REFCOUNT_ZERO_DEC(void) +{ + refcount_t zero = REFCOUNT_INIT(1); + + pr_info("attempting bad refcount decrement to zero\n"); + refcount_dec(&zero); + if (refcount_read(&zero) == 0) + pr_err("Stayed at zero, but no BUG?!\n"); + else + pr_err("Fail: refcount went crazy\n"); } -void lkdtm_ATOMIC_OVERFLOW(void) +void lkdtm_REFCOUNT_ZERO_SUB(void) { - atomic_t over = ATOMIC_INIT(INT_MAX); + refcount_t zero = REFCOUNT_INIT(1); + + pr_info("attempting bad refcount subtract past zero\n"); + if (!refcount_sub_and_test(2, &zero)) + pr_info("wrap attempt was noticed\n"); + if (refcount_read(&zero) == 1) + pr_err("Correctly stayed above 0, but no BUG?!\n"); + else + pr_err("Fail: refcount wrapped\n"); +} - pr_info("attempting good atomic decrement\n"); - atomic_dec(&over); - atomic_inc(&over); +void lkdtm_REFCOUNT_ZERO_INC(void) +{ + refcount_t zero = REFCOUNT_INIT(0); - pr_info("attempting bad atomic overflow\n"); - atomic_inc(&over); + pr_info("attempting bad refcount increment from zero\n"); + refcount_inc(&zero); + if (refcount_read(&zero) == 0) + pr_err("Stayed at zero, but no BUG?!\n"); + else + pr_err("Fail: refcount went past zero\n"); +} + +void lkdtm_REFCOUNT_ZERO_ADD(void) +{ + refcount_t zero = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount addition from zero\n"); + refcount_add(2, &zero); + if (refcount_read(&zero) == 0) + pr_err("Stayed at zero, but no BUG?!\n"); + else + pr_err("Fail: refcount went past zero\n"); } void lkdtm_CORRUPT_LIST_ADD(void) diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c index 7eeb71a75549..16e4cf110930 100644 --- a/drivers/misc/lkdtm_core.c +++ b/drivers/misc/lkdtm_core.c @@ -220,8 +220,12 @@ struct crashtype crashtypes[] = { CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), - CRASHTYPE(ATOMIC_UNDERFLOW), - CRASHTYPE(ATOMIC_OVERFLOW), + CRASHTYPE(REFCOUNT_SATURATE_INC), + CRASHTYPE(REFCOUNT_SATURATE_ADD), + CRASHTYPE(REFCOUNT_ZERO_DEC), + CRASHTYPE(REFCOUNT_ZERO_INC), + CRASHTYPE(REFCOUNT_ZERO_SUB), + CRASHTYPE(REFCOUNT_ZERO_ADD), CRASHTYPE(USERCOPY_HEAP_SIZE_TO), CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), CRASHTYPE(USERCOPY_HEAP_FLAG_TO), diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c index c6217a4993ad..a617aa5a3ad8 100644 --- a/drivers/misc/mei/debugfs.c +++ b/drivers/misc/mei/debugfs.c @@ -67,7 +67,7 @@ static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf, me_cl->props.max_number_of_connections, me_cl->props.max_msg_length, me_cl->props.single_recv_buf, - atomic_read(&me_cl->refcnt.refcount)); + kref_read(&me_cl->refcnt)); mei_me_cl_put(me_cl); } diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 56efaf72d08e..d2961ef39a3a 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -155,7 +155,7 @@ static void pnv_php_detach_device_nodes(struct device_node *parent) pnv_php_detach_device_nodes(dn); of_node_put(dn); - refcount = atomic_read(&dn->kobj.kref.refcount); + refcount = kref_read(&dn->kobj.kref); if (refcount != 1) pr_warn("Invalid refcount %d on <%s>\n", refcount, of_node_full_name(dn)); diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 429d34c348b9..e42909524dee 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -345,7 +345,7 @@ EXPORT_SYMBOL_GPL(pci_create_slot); void pci_destroy_slot(struct pci_slot *slot) { dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n", - slot->number, atomic_read(&slot->kobj.kref.refcount) - 1); + slot->number, kref_read(&slot->kobj.kref) - 1); mutex_lock(&pci_slot_mutex); kobject_put(&slot->kobj); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index f501095f91ac..898461b146cc 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -74,7 +74,7 @@ static void bnx2fc_cmd_timeout(struct work_struct *work) &io_req->req_flags)) { /* Handle internally generated ABTS timeout */ BNX2FC_IO_DBG(io_req, "ABTS timed out refcnt = %d\n", - io_req->refcount.refcount.counter); + kref_read(&io_req->refcount)); if (!(test_and_set_bit(BNX2FC_FLAG_ABTS_DONE, &io_req->req_flags))) { /* @@ -1141,7 +1141,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) return SUCCESS; } BNX2FC_IO_DBG(io_req, "eh_abort - refcnt = %d\n", - io_req->refcount.refcount.counter); + kref_read(&io_req->refcount)); /* Hold IO request across abort processing */ kref_get(&io_req->refcount); @@ -1299,7 +1299,7 @@ void bnx2fc_process_cleanup_compl(struct bnx2fc_cmd *io_req, { BNX2FC_IO_DBG(io_req, "Entered process_cleanup_compl " "refcnt = %d, cmd_type = %d\n", - io_req->refcount.refcount.counter, io_req->cmd_type); + kref_read(&io_req->refcount), io_req->cmd_type); bnx2fc_scsi_done(io_req, DID_ERROR); kref_put(&io_req->refcount, bnx2fc_cmd_release); if (io_req->wait_for_comp) @@ -1318,7 +1318,7 @@ void bnx2fc_process_abts_compl(struct bnx2fc_cmd *io_req, BNX2FC_IO_DBG(io_req, "Entered process_abts_compl xid = 0x%x" "refcnt = %d, cmd_type = %d\n", io_req->xid, - io_req->refcount.refcount.counter, io_req->cmd_type); + kref_read(&io_req->refcount), io_req->cmd_type); if (test_and_set_bit(BNX2FC_FLAG_ABTS_DONE, &io_req->req_flags)) { diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h index 95ba99044c3e..18e0ea83d361 100644 --- a/drivers/scsi/cxgbi/libcxgbi.h +++ b/drivers/scsi/cxgbi/libcxgbi.h @@ -301,7 +301,7 @@ static inline void __cxgbi_sock_put(const char *fn, struct cxgbi_sock *csk) { log_debug(1 << CXGBI_DBG_SOCK, "%s, put csk 0x%p, ref %u-1.\n", - fn, csk, atomic_read(&csk->refcnt.refcount)); + fn, csk, kref_read(&csk->refcnt)); kref_put(&csk->refcnt, cxgbi_sock_free); } #define cxgbi_sock_put(csk) __cxgbi_sock_put(__func__, csk) @@ -310,7 +310,7 @@ static inline void __cxgbi_sock_get(const char *fn, struct cxgbi_sock *csk) { log_debug(1 << CXGBI_DBG_SOCK, "%s, get csk 0x%p, ref %u+1.\n", - fn, csk, atomic_read(&csk->refcnt.refcount)); + fn, csk, kref_read(&csk->refcnt)); kref_get(&csk->refcnt); } #define cxgbi_sock_get(csk) __cxgbi_sock_get(__func__, csk) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index a63542bac153..caa7a7b0ec53 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -607,7 +607,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) len += snprintf(buf+len, size-len, "usgmap:%x ", ndlp->nlp_usg_map); len += snprintf(buf+len, size-len, "refcnt:%x", - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); len += snprintf(buf+len, size-len, "\n"); } spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 7b6bd8ed0d0b..63bef4566548 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3690,7 +3690,7 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "0006 rpi%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if (NLP_CHK_NODE_ACT(ndlp)) { lpfc_nlp_put(ndlp); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index ed223937798a..82047070cdc9 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -3440,7 +3440,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0002 rpi:%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; @@ -3861,7 +3861,7 @@ out: lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0003 rpi:%x DID:%x flg:%x %d map%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if (vport->port_state < LPFC_VPORT_READY) { @@ -4238,7 +4238,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0277 lpfc_enable_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return NULL; } /* The ndlp should not already be in active mode */ @@ -4248,7 +4248,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0278 lpfc_enable_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return NULL; } @@ -4272,7 +4272,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0008 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); } @@ -4546,7 +4546,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == LPFC_SLI_INTF_IF_TYPE_2) && - (atomic_read(&ndlp->kref.refcount) > 0)) { + (kref_read(&ndlp->kref) > 0)) { mbox->context1 = lpfc_nlp_get(ndlp); mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr; @@ -4695,14 +4695,14 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "0280 lpfc_cleanup_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); lpfc_dequeue_node(vport, ndlp); } else { lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE, "0281 lpfc_cleanup_node: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); lpfc_disable_node(vport, ndlp); } @@ -4791,7 +4791,7 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0005 rpi:%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) != NULL) { @@ -5557,7 +5557,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0004 rpi:%x DID:%x flg:%x %d map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); /* * Start issuing Fabric-Device Management Interface (FDMI) command to @@ -5728,7 +5728,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "0007 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount), + kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); ndlp->active_rrqs_xri_bitmap = @@ -5767,7 +5767,7 @@ lpfc_nlp_release(struct kref *kref) "0279 lpfc_nlp_release: ndlp:x%p did %x " "usgmap:x%x refcnt:%d rpi:%x\n", (void *)ndlp, ndlp->nlp_DID, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount), ndlp->nlp_rpi); + kref_read(&ndlp->kref), ndlp->nlp_rpi); /* remove ndlp from action. */ lpfc_nlp_remove(ndlp->vport, ndlp); @@ -5804,7 +5804,7 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp) lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, "node get: did:x%x flg:x%x refcnt:x%x", ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); /* The check of ndlp usage to prevent incrementing the * ndlp reference count that is in the process of being * released. @@ -5817,7 +5817,7 @@ lpfc_nlp_get(struct lpfc_nodelist *ndlp) "0276 lpfc_nlp_get: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return NULL; } else kref_get(&ndlp->kref); @@ -5844,7 +5844,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, "node put: did:x%x flg:x%x refcnt:x%x", ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); phba = ndlp->phba; spin_lock_irqsave(&phba->ndlp_lock, flags); /* Check the ndlp memory free acknowledge flag to avoid the @@ -5857,7 +5857,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) "0274 lpfc_nlp_put: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return 1; } /* Check the ndlp inactivate log flag to avoid the possible @@ -5870,7 +5870,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) "0275 lpfc_nlp_put: ndlp:x%p " "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, - atomic_read(&ndlp->kref.refcount)); + kref_read(&ndlp->kref)); return 1; } /* For last put, mark the ndlp usage flags to make sure no @@ -5878,7 +5878,7 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) * in between the process when the final kref_put has been * invoked on this ndlp. */ - if (atomic_read(&ndlp->kref.refcount) == 1) { + if (kref_read(&ndlp->kref) == 1) { /* Indicate ndlp is put to inactive state. */ NLP_SET_IACT_REQ(ndlp); /* Acknowledge ndlp memory free has been seen. */ @@ -5906,8 +5906,8 @@ lpfc_nlp_not_used(struct lpfc_nodelist *ndlp) lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, "node not used: did:x%x flg:x%x refcnt:x%x", ndlp->nlp_DID, ndlp->nlp_flag, - atomic_read(&ndlp->kref.refcount)); - if (atomic_read(&ndlp->kref.refcount) == 1) + kref_read(&ndlp->kref)); + if (kref_read(&ndlp->kref) == 1) if (lpfc_nlp_put(ndlp)) return 1; return 0; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4776fd85514f..64717c171b15 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2660,8 +2660,7 @@ lpfc_cleanup(struct lpfc_vport *vport) "usgmap:x%x refcnt:%d\n", ndlp->nlp_DID, (void *)ndlp, ndlp->nlp_usg_map, - atomic_read( - &ndlp->kref.refcount)); + kref_read(&ndlp->kref)); } break; } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index d925910be761..3084983c1287 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -371,7 +371,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) */ pr_debug("write_pending aborted cmd[%p] refcount %d " "transport_state %x, t_state %x, se_cmd_flags %x\n", - cmd,cmd->se_cmd.cmd_kref.refcount.counter, + cmd, kref_read(&cmd->se_cmd.cmd_kref), cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); @@ -584,7 +584,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) */ pr_debug("queue_data_in aborted cmd[%p] refcount %d " "transport_state %x, t_state %x, se_cmd_flags %x\n", - cmd,cmd->se_cmd.cmd_kref.refcount.counter, + cmd, kref_read(&cmd->se_cmd.cmd_kref), cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index b653451843c8..937c2d5d7ec3 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -1300,7 +1300,7 @@ static int ion_debug_heap_show(struct seq_file *s, void *unused) seq_printf(s, "%16s %16u %16zu %d %d\n", buffer->task_comm, buffer->pid, buffer->size, buffer->kmap_cnt, - atomic_read(&buffer->ref.refcount)); + kref_read(&buffer->ref)); total_orphaned_size += buffer->size; } } diff --git a/drivers/staging/comedi/comedi_buf.c b/drivers/staging/comedi/comedi_buf.c index c7d7682b1412..1e1df89b5018 100644 --- a/drivers/staging/comedi/comedi_buf.c +++ b/drivers/staging/comedi/comedi_buf.c @@ -188,7 +188,7 @@ bool comedi_buf_is_mmapped(struct comedi_subdevice *s) { struct comedi_buf_map *bm = s->async->buf_map; - return bm && (atomic_read(&bm->refcount.refcount) > 1); + return bm && (kref_read(&bm->refcount) > 1); } int comedi_buf_alloc(struct comedi_device *dev, struct comedi_subdevice *s, diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c index 39a72e3f0c18..7035356e56b3 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c @@ -107,7 +107,7 @@ void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) libcfs_debug_dumplog(); if (libcfs_panic_on_lbug) panic("LBUG"); - set_task_state(current, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); while (1) schedule(); } diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index d761025144f9..e18051185846 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -788,7 +788,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * __core_scsi3_add_registration() */ dest_lun = rcu_dereference_check(deve_tmp->se_lun, - atomic_read(&deve_tmp->pr_kref.refcount) != 0); + kref_read(&deve_tmp->pr_kref) != 0); pr_reg_atp = __core_scsi3_do_alloc_registration(dev, nacl_tmp, dest_lun, deve_tmp, @@ -1463,7 +1463,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) * For nacl->dynamic_node_acl=1 */ lun_acl = rcu_dereference_check(se_deve->se_lun_acl, - atomic_read(&se_deve->pr_kref.refcount) != 0); + kref_read(&se_deve->pr_kref) != 0); if (!lun_acl) return 0; @@ -1478,7 +1478,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) * For nacl->dynamic_node_acl=1 */ lun_acl = rcu_dereference_check(se_deve->se_lun_acl, - atomic_read(&se_deve->pr_kref.refcount) != 0); + kref_read(&se_deve->pr_kref) != 0); if (!lun_acl) { kref_put(&se_deve->pr_kref, target_pr_kref_release); return; @@ -1759,7 +1759,7 @@ core_scsi3_decode_spec_i_port( * 2nd loop which will never fail. */ dest_lun = rcu_dereference_check(dest_se_deve->se_lun, - atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + kref_read(&dest_se_deve->pr_kref) != 0); dest_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, dest_lun, dest_se_deve, @@ -3466,7 +3466,7 @@ after_iport_check: iport_ptr); if (!dest_pr_reg) { struct se_lun *dest_lun = rcu_dereference_check(dest_se_deve->se_lun, - atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + kref_read(&dest_se_deve->pr_kref) != 0); spin_unlock(&dev->dev_reservation_lock); if (core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index fd5c3de79470..c91979c1463d 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -454,7 +454,7 @@ static void ft_sess_free(struct kref *kref) void ft_sess_put(struct ft_sess *sess) { - int sess_held = atomic_read(&sess->kref.refcount); + int sess_held = kref_read(&sess->kref); BUG_ON(!sess_held); kref_put(&sess->kref, ft_sess_free); diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index 1bf8ed13f827..9229de43e19d 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -200,7 +200,6 @@ static struct ld_semaphore __sched * down_read_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; - struct task_struct *tsk = current; long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; /* set up my own style of waitqueue */ @@ -221,8 +220,8 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout) list_add_tail(&waiter.list, &sem->read_wait); sem->wait_readers++; - waiter.task = tsk; - get_task_struct(tsk); + waiter.task = current; + get_task_struct(current); /* if there are no active locks, wake the new lock owner(s) */ if ((count & LDSEM_ACTIVE_MASK) == 0) @@ -232,7 +231,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout) /* wait to be given the lock */ for (;;) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!waiter.task) break; @@ -241,7 +240,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout) timeout = schedule_timeout(timeout); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); if (!timeout) { /* lock timed out but check if this task was just @@ -268,7 +267,6 @@ static struct ld_semaphore __sched * down_write_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; - struct task_struct *tsk = current; long adjust = -LDSEM_ACTIVE_BIAS; int locked = 0; @@ -289,16 +287,16 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout) list_add_tail(&waiter.list, &sem->write_wait); - waiter.task = tsk; + waiter.task = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { if (!timeout) break; raw_spin_unlock_irq(&sem->wait_lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->wait_lock); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); locked = writer_trylock(sem); if (locked) break; @@ -309,7 +307,7 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout) list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); /* lock wait may have timed out */ if (!locked) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index fd80c1b9c823..e6a17455adac 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3698,7 +3698,7 @@ static void ffs_closed(struct ffs_data *ffs) goto done; if (opts->no_configfs || !opts->func_inst.group.cg_item.ci_parent - || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) + || !kref_read(&opts->func_inst.group.cg_item.ci_kref)) goto done; ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c index 33ff49c4cea4..46847340b819 100644 --- a/drivers/usb/mon/mon_main.c +++ b/drivers/usb/mon/mon_main.c @@ -409,7 +409,7 @@ static void __exit mon_exit(void) printk(KERN_ERR TAG ": Outstanding opens (%d) on usb%d, leaking...\n", mbus->nreaders, mbus->u_bus->busnum); - atomic_set(&mbus->ref.refcount, 2); /* Force leak */ + kref_get(&mbus->ref); /* Force leak */ } mon_dissolve(mbus, mbus->u_bus); diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 5e6a2c0a1f0b..1f7d5e46cdda 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -122,7 +122,7 @@ void exofs_sysfs_dbg_print(void) list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) { printk(KERN_INFO "%s: name %s ref %d\n", __func__, kobject_name(k_name), - (int)atomic_read(&k_name->kref.refcount)); + (int)kref_read(&k_name->kref)); } #endif } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 91307940c8ac..052f8d3c41cb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -256,7 +256,7 @@ struct fuse_io_priv { #define FUSE_IO_PRIV_SYNC(f) \ { \ - .refcnt = { ATOMIC_INIT(1) }, \ + .refcnt = KREF_INIT(1), \ .async = 0, \ .file = f, \ } diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 27d1242c8383..564c504d6efd 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -349,7 +349,7 @@ static void sc_show_sock_container(struct seq_file *seq, " func key: 0x%08x\n" " func type: %u\n", sc, - atomic_read(&sc->sc_kref.refcount), + kref_read(&sc->sc_kref), &saddr, inet ? ntohs(sport) : 0, &daddr, inet ? ntohs(dport) : 0, sc->sc_node->nd_name, diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d4b5c81f0445..ec000575e863 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -97,7 +97,7 @@ typeof(sc) __sc = (sc); \ mlog(ML_SOCKET, "[sc %p refs %d sock %p node %u page %p " \ "pg_off %zu] " fmt, __sc, \ - atomic_read(&__sc->sc_kref.refcount), __sc->sc_sock, \ + kref_read(&__sc->sc_kref), __sc->sc_sock, \ __sc->sc_node->nd_num, __sc->sc_page, __sc->sc_page_off , \ ##args); \ } while (0) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index e7b760deefae..9b984cae4c4e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -81,7 +81,7 @@ static void __dlm_print_lock(struct dlm_lock *lock) lock->ml.type, lock->ml.convert_type, lock->ml.node, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount), + kref_read(&lock->lock_refs), (list_empty(&lock->ast_list) ? 'y' : 'n'), (lock->ast_pending ? 'y' : 'n'), (list_empty(&lock->bast_list) ? 'y' : 'n'), @@ -106,7 +106,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) printk("lockres: %s, owner=%u, state=%u\n", buf, res->owner, res->state); printk(" last used: %lu, refcnt: %u, on purge list: %s\n", - res->last_used, atomic_read(&res->refs.refcount), + res->last_used, kref_read(&res->refs), list_empty(&res->purge) ? "no" : "yes"); printk(" on dirty list: %s, on reco list: %s, " "migrating pending: %s\n", @@ -298,7 +298,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) mle_type, mle->master, mle->new_master, !list_empty(&mle->hb_events), !!mle->inuse, - atomic_read(&mle->mle_refs.refcount)); + kref_read(&mle->mle_refs)); out += snprintf(buf + out, len - out, "Maybe="); out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, @@ -494,7 +494,7 @@ static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) lock->ast_pending, lock->bast_pending, lock->convert_pending, lock->lock_pending, lock->cancel_pending, lock->unlock_pending, - atomic_read(&lock->lock_refs.refcount)); + kref_read(&lock->lock_refs)); spin_unlock(&lock->spinlock); return out; @@ -521,7 +521,7 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) !list_empty(&res->recovering), res->inflight_locks, res->migration_pending, atomic_read(&res->asts_reserved), - atomic_read(&res->refs.refcount)); + kref_read(&res->refs)); /* refmap */ out += snprintf(buf + out, len - out, "RMAP:"); @@ -777,7 +777,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) /* Purge Count: xxx Refs: xxx */ out += snprintf(buf + out, len - out, "Purge Count: %d Refs: %d\n", dlm->purge_count, - atomic_read(&dlm->dlm_refs.refcount)); + kref_read(&dlm->dlm_refs)); /* Dead Node: xxx */ out += snprintf(buf + out, len - out, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 733e4e79c8e2..32fd261ae13d 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -2072,7 +2072,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks); mlog(0, "context init: refcount %u\n", - atomic_read(&dlm->dlm_refs.refcount)); + kref_read(&dlm->dlm_refs)); leave: if (ret < 0 && dlm) { diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a464c8088170..7025d8c27999 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -233,7 +233,7 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle) assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); - if (!atomic_read(&mle->mle_refs.refcount)) { + if (!kref_read(&mle->mle_refs)) { /* this may or may not crash, but who cares. * it's a BUG. */ mlog(ML_ERROR, "bad mle: %p\n", mle); @@ -1124,9 +1124,9 @@ recheck: unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS); /* - if (atomic_read(&mle->mle_refs.refcount) < 2) + if (kref_read(&mle->mle_refs) < 2) mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle, - atomic_read(&mle->mle_refs.refcount), + kref_read(&mle->mle_refs), res->lockname.len, res->lockname.name); */ atomic_set(&mle->woken, 0); @@ -1979,7 +1979,7 @@ ok: * on this mle. */ spin_lock(&dlm->master_lock); - rr = atomic_read(&mle->mle_refs.refcount); + rr = kref_read(&mle->mle_refs); if (mle->inuse > 0) { if (extra_ref && rr < 3) err = 1; diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 1082b2c3014b..63d701cd1e2e 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -251,7 +251,7 @@ leave: mlog(0, "lock %u:%llu should be gone now! refs=%d\n", dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount)-1); + kref_read(&lock->lock_refs)-1); dlm_lock_put(lock); } if (actions & DLM_UNLOCK_CALL_AST) diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index 5be122e3d326..6c6a2141f271 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -33,7 +33,7 @@ */ static inline void __down_read(struct rw_semaphore *sem) { - if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0)) + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) rwsem_down_read_failed(sem); } @@ -58,7 +58,7 @@ static inline void __down_write(struct rw_semaphore *sem) long tmp; tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count); + &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) rwsem_down_write_failed(sem); } @@ -68,7 +68,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) long tmp; tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count); + &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; @@ -91,7 +91,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; - tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count); + tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) rwsem_wake(sem); } @@ -102,7 +102,7 @@ static inline void __up_read(struct rw_semaphore *sem) static inline void __up_write(struct rw_semaphore *sem) { if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count) < 0)) + &sem->count) < 0)) rwsem_wake(sem); } @@ -120,8 +120,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ - tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, - (atomic_long_t *)&sem->count); + tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); if (tmp < 0) rwsem_downgrade_wake(sem); } diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 1ddfa2928802..a232e7f0c869 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -247,7 +247,7 @@ static inline void drm_framebuffer_unreference(struct drm_framebuffer *fb) */ static inline uint32_t drm_framebuffer_read_refcount(struct drm_framebuffer *fb) { - return atomic_read(&fb->base.refcount.refcount); + return kref_read(&fb->base.refcount); } /** diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 652e45be97c8..9a465314572c 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -332,19 +332,6 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo, */ extern void ttm_bo_unref(struct ttm_buffer_object **bo); - -/** - * ttm_bo_list_ref_sub - * - * @bo: The buffer object. - * @count: The number of references with which to decrease @bo::list_kref; - * @never_free: The refcount should not reach zero with this operation. - * - * Release @count lru list references to this buffer object. - */ -extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, - bool never_free); - /** * ttm_bo_add_to_lru * @@ -367,7 +354,7 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); * and is usually called just immediately after the bo has been reserved to * avoid recursive reservation from lru lists. */ -extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); +extern void ttm_bo_del_from_lru(struct ttm_buffer_object *bo); /** * ttm_bo_move_to_lru_tail diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cdbdb40eb5bd..feecf33a1212 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -878,7 +878,7 @@ static inline int ttm_bo_reserve(struct ttm_buffer_object *bo, { int ret; - WARN_ON(!atomic_read(&bo->kref.refcount)); + WARN_ON(!kref_read(&bo->kref)); ret = __ttm_bo_reserve(bo, interruptible, no_wait, ticket); if (likely(ret == 0)) @@ -903,7 +903,7 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, { int ret = 0; - WARN_ON(!atomic_read(&bo->kref.refcount)); + WARN_ON(!kref_read(&bo->kref)); if (interruptible) ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a0547c571800..b63d6b7b0db0 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -402,6 +402,6 @@ extern bool ____wrong_branch_error(void); #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) -#endif /* _LINUX_JUMP_LABEL_H */ - #endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/kref.h b/include/linux/kref.h index e15828fd71f1..f4156f88f557 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -15,22 +15,27 @@ #ifndef _KREF_H_ #define _KREF_H_ -#include <linux/bug.h> -#include <linux/atomic.h> -#include <linux/kernel.h> -#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/refcount.h> struct kref { - atomic_t refcount; + refcount_t refcount; }; +#define KREF_INIT(n) { .refcount = REFCOUNT_INIT(n), } + /** * kref_init - initialize object. * @kref: object in question. */ static inline void kref_init(struct kref *kref) { - atomic_set(&kref->refcount, 1); + refcount_set(&kref->refcount, 1); +} + +static inline unsigned int kref_read(const struct kref *kref) +{ + return refcount_read(&kref->refcount); } /** @@ -39,17 +44,12 @@ static inline void kref_init(struct kref *kref) */ static inline void kref_get(struct kref *kref) { - /* If refcount was 0 before incrementing then we have a race - * condition when this kref is freeing by some other thread right now. - * In this case one should use kref_get_unless_zero() - */ - WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2); + refcount_inc(&kref->refcount); } /** - * kref_sub - subtract a number of refcounts for object. + * kref_put - decrement refcount for object. * @kref: object. - * @count: Number of recounts to subtract. * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree @@ -58,57 +58,43 @@ static inline void kref_get(struct kref *kref) * maintainer, and anyone else who happens to notice it. You have * been warned. * - * Subtract @count from the refcount, and if 0, call release(). + * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this * function returns 0, you still can not count on the kref from remaining in * memory. Only use the return value if you want to see if the kref is now * gone, not present. */ -static inline int kref_sub(struct kref *kref, unsigned int count, - void (*release)(struct kref *kref)) +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { WARN_ON(release == NULL); - if (atomic_sub_and_test((int) count, &kref->refcount)) { + if (refcount_dec_and_test(&kref->refcount)) { release(kref); return 1; } return 0; } -/** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the - * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. If the caller does pass kfree to this - * function, you will be publicly mocked mercilessly by the kref - * maintainer, and anyone else who happens to notice it. You have - * been warned. - * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. - */ -static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) -{ - return kref_sub(kref, 1, release); -} - static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) { WARN_ON(release == NULL); - if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { - mutex_lock(lock); - if (unlikely(!atomic_dec_and_test(&kref->refcount))) { - mutex_unlock(lock); - return 0; - } + + if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { + release(kref); + return 1; + } + return 0; +} + +static inline int kref_put_lock(struct kref *kref, + void (*release)(struct kref *kref), + spinlock_t *lock) +{ + WARN_ON(release == NULL); + + if (refcount_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; } @@ -133,6 +119,6 @@ static inline int kref_put_mutex(struct kref *kref, */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { - return atomic_add_unless(&kref->refcount, 1, 0); + return refcount_inc_not_zero(&kref->refcount); } #endif /* _KREF_H_ */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 7fffbfcd5430..1127fe31645d 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,8 @@ #include <linux/osq_lock.h> #include <linux/debug_locks.h> +struct ww_acquire_ctx; + /* * Simple, straightforward mutexes with strict semantics: * @@ -65,7 +67,7 @@ struct mutex { static inline struct task_struct *__mutex_owner(struct mutex *lock) { - return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07); } /* @@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock) struct mutex_waiter { struct list_head list; struct task_struct *task; + struct ww_acquire_ctx *ww_ctx; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5b2e6159b744..93664f022ecf 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -4,15 +4,15 @@ #include <linux/atomic.h> #include <linux/rwsem.h> #include <linux/percpu.h> -#include <linux/wait.h> +#include <linux/rcuwait.h> #include <linux/rcu_sync.h> #include <linux/lockdep.h> struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; - struct rw_semaphore rw_sem; - wait_queue_head_t writer; + struct rw_semaphore rw_sem; /* slowpath */ + struct rcuwait writer; /* blocked writer */ int readers_block; }; @@ -22,7 +22,7 @@ static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ + .writer = __RCUWAIT_INITIALIZER(name.writer), \ } extern int __percpu_down_read(struct percpu_rw_semaphore *, int); diff --git a/include/linux/poison.h b/include/linux/poison.h index 51334edec506..a39540326417 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -80,6 +80,7 @@ /********** kernel/mutexes **********/ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 +#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) /********** lib/flex_array.c **********/ #define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h new file mode 100644 index 000000000000..a4ede51b3e7c --- /dev/null +++ b/include/linux/rcuwait.h @@ -0,0 +1,63 @@ +#ifndef _LINUX_RCUWAIT_H_ +#define _LINUX_RCUWAIT_H_ + +#include <linux/rcupdate.h> + +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner; where it is forbidden to use + * after exit_notify(). task_struct is not properly rcu protected, + * unless dealing with rcu-aware lists, ie: find_task_by_*(). + * + * Alternatively we have task_rcu_dereference(), but the return + * semantics have different implications which would break the + * wakeup side. The only time @task is non-nil is when a user is + * blocked (or checking if it needs to) on a condition, and reset + * as soon as we know that the condition has succeeded and are + * awoken. + */ +struct rcuwait { + struct task_struct *task; +}; + +#define __RCUWAIT_INITIALIZER(name) \ + { .task = NULL, } + +static inline void rcuwait_init(struct rcuwait *w) +{ + w->task = NULL; +} + +extern void rcuwait_wake_up(struct rcuwait *w); + +/* + * The caller is responsible for locking around rcuwait_wait_event(), + * such that writes to @task are properly serialized. + */ +#define rcuwait_wait_event(w, condition) \ +({ \ + /* \ + * Complain if we are called after do_exit()/exit_notify(), \ + * as we cannot rely on the rcu critical region for the \ + * wakeup side. \ + */ \ + WARN_ON(current->exit_state); \ + \ + rcu_assign_pointer((w)->task, current); \ + for (;;) { \ + /* \ + * Implicit barrier (A) pairs with (B) in \ + * rcuwait_wake_up(). \ + */ \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + \ + schedule(); \ + } \ + \ + WRITE_ONCE((w)->task, NULL); \ + __set_current_state(TASK_RUNNING); \ +}) + +#endif /* _LINUX_RCUWAIT_H_ */ diff --git a/include/linux/refcount.h b/include/linux/refcount.h new file mode 100644 index 000000000000..600aadf9cca4 --- /dev/null +++ b/include/linux/refcount.h @@ -0,0 +1,294 @@ +#ifndef _LINUX_REFCOUNT_H +#define _LINUX_REFCOUNT_H + +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + */ + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> + +#ifdef CONFIG_DEBUG_REFCOUNT +#define REFCOUNT_WARN(cond, str) WARN_ON(cond) +#define __refcount_check __must_check +#else +#define REFCOUNT_WARN(cond, str) (void)(cond) +#define __refcount_check +#endif + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } + +static inline void refcount_set(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +static inline __refcount_check +bool refcount_add_not_zero(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (!val) + return false; + + if (unlikely(val == UINT_MAX)) + return true; + + new = val + i; + if (new < val) + new = UINT_MAX; + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +static inline void refcount_add(unsigned int i, refcount_t *r) +{ + REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + */ +static inline __refcount_check +bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + new = val + 1; + + if (!val) + return false; + + if (unlikely(!new)) + return true; + + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/* + * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object, will WARN when this is not so. + */ +static inline void refcount_inc(refcount_t *r) +{ + REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return false; + + new = val - i; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return !new; +} + +static inline __refcount_check +bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + +/* + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ +static inline +void refcount_dec(refcount_t *r) +{ + REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); +} + +/* + * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the + * success thereof. + * + * Like all decrement operations, it provides release memory order and provides + * a control dependency. + * + * It can be used like a try-delete operator; this explicit case is provided + * and not cmpxchg in generic, because that would allow implementing unsafe + * operations. + */ +static inline __refcount_check +bool refcount_dec_if_one(refcount_t *r) +{ + return atomic_cmpxchg_release(&r->refs, 1, 0) == 1; +} + +/* + * No atomic_t counterpart, it decrements unless the value is 1, in which case + * it will return false. + * + * Was often done like: atomic_add_unless(&var, -1, 1) + */ +static inline __refcount_check +bool refcount_dec_not_one(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return true; + + if (val == 1) + return false; + + new = val - 1; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return true; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return true; +} + +/* + * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail + * to decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) +{ + if (refcount_dec_not_one(r)) + return false; + + mutex_lock(lock); + if (!refcount_dec_and_test(r)) { + mutex_unlock(lock); + return false; + } + + return true; +} + +/* + * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock(lock); + if (!refcount_dec_and_test(r)) { + spin_unlock(lock); + return false; + } + + return true; +} + +#endif /* _LINUX_REFCOUNT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c89b7fdec41e..c8e519d0b4a3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -226,7 +226,7 @@ extern void proc_sched_set_task(struct task_struct *p); extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; -/* Convenience macros for the sake of set_task_state */ +/* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) @@ -253,17 +253,6 @@ extern char ___assert_task_state[1 - 2*!!( #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -#define __set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - (tsk)->state = (state_value); \ - } while (0) -#define set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ - } while (0) - #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -276,20 +265,6 @@ extern char ___assert_task_state[1 - 2*!!( } while (0) #else - -/* - * @tsk had better be current, or you get to keep the pieces. - * - * The only reason is that computing current can be more expensive than - * using a pointer that's already available. - * - * Therefore, see set_current_state(). - */ -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - smp_store_mb((tsk)->state, (state_value)) - /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to @@ -1018,8 +993,8 @@ enum cpu_idle_type { * * The DEFINE_WAKE_Q macro declares and initializes the list head. * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function, where the fact that the queue is - * not used again will be easy to see by inspection. + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). * * Note that this can cause spurious wakeups. schedule() callers * must ensure the call is done inside a loop, confirming that the @@ -1039,6 +1014,12 @@ struct wake_q_head { #define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 47dd0cebd204..59248dcc6ef3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -180,8 +180,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock_nested(lock, subclass) -# define raw_spin_lock_bh_nested(lock, subclass) \ - _raw_spin_lock_bh_nested(lock, subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) \ do { \ @@ -197,7 +195,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) -# define raw_spin_lock_bh_nested(lock, subclass) _raw_spin_lock_bh(lock) #endif #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -317,11 +314,6 @@ do { \ raw_spin_lock_nested(spinlock_check(lock), subclass); \ } while (0) -#define spin_lock_bh_nested(lock, subclass) \ -do { \ - raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\ -} while (0) - #define spin_lock_nest_lock(lock, nest_lock) \ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 5344268e6e62..42dfab89e740 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -22,8 +22,6 @@ int in_lock_functions(unsigned long addr); void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) __acquires(lock); -void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) - __acquires(lock); void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map) __acquires(lock); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d3afef9d8dbe..d0d188861ad6 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -57,7 +57,6 @@ #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) -#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..8a511c0985aa 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -198,7 +198,7 @@ static inline struct cache_head *cache_get(struct cache_head *h) static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->ref.refcount) <= 2 && + if (kref_read(&h->ref) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; kref_put(&h->ref, cd->cache_put); diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 7b0066814fa0..5dd9a7682227 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -51,10 +51,10 @@ struct ww_mutex { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \ - , .ww_class = &ww_class +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ + , .ww_class = class #else -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) #endif #define __WW_CLASS_INITIALIZER(ww_class) \ @@ -63,7 +63,7 @@ struct ww_mutex { , .mutex_name = #ww_class "_mutex" } #define __WW_MUTEX_INITIALIZER(lockname, class) \ - { .base = { \__MUTEX_INITIALIZER(lockname) } \ + { .base = __MUTEX_INITIALIZER(lockname.base) \ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } #define DEFINE_WW_CLASS(classname) \ @@ -186,11 +186,6 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) #endif } -extern int __must_check __ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); -extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); - /** * ww_mutex_lock - acquire the w/w mutex * @lock: the mutex to be acquired @@ -220,14 +215,7 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - if (ctx) - return __ww_mutex_lock(lock, ctx); - - mutex_lock(&lock->base); - return 0; -} +extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible @@ -259,14 +247,8 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - if (ctx) - return __ww_mutex_lock_interruptible(lock, ctx); - else - return mutex_lock_interruptible(&lock->base); -} +extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 554671c81f4a..90708f68cc02 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -987,7 +987,7 @@ static inline void hci_conn_drop(struct hci_conn *conn) static inline void hci_dev_put(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); put_device(&d->dev); } @@ -995,7 +995,7 @@ static inline void hci_dev_put(struct hci_dev *d) static inline struct hci_dev *hci_dev_hold(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); get_device(&d->dev); return d; diff --git a/init/version.c b/init/version.c index fe41a63efed6..5606341e9efd 100644 --- a/init/version.c +++ b/init/version.c @@ -23,9 +23,7 @@ int version_string(LINUX_VERSION_CODE); #endif struct uts_namespace init_uts_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, + .kref = KREF_INIT(2), .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, diff --git a/kernel/exit.c b/kernel/exit.c index 8e5e21338b3a..b67c57faa705 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -55,6 +55,7 @@ #include <linux/shm.h> #include <linux/kcov.h> #include <linux/random.h> +#include <linux/rcuwait.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -282,6 +283,35 @@ retry: return task; } +void rcuwait_wake_up(struct rcuwait *w) +{ + struct task_struct *task; + + rcu_read_lock(); + + /* + * Order condition vs @task, such that everything prior to the load + * of @task is visible. This is the condition as to why the user called + * rcuwait_trywake() in the first place. Pairs with set_current_state() + * barrier (A) in rcuwait_wait_event(). + * + * WAIT WAKE + * [S] tsk = current [S] cond = true + * MB (A) MB (B) + * [L] cond [L] tsk + */ + smp_rmb(); /* (B) */ + + /* + * Avoid using task_rcu_dereference() magic as long as we are careful, + * see comment in rcuwait_wait_event() regarding ->exit_state. + */ + task = rcu_dereference(w->task); + if (task) + wake_up_process(task); + rcu_read_unlock(); +} + struct task_struct *try_get_task_struct(struct task_struct **ptask) { struct task_struct *task; @@ -468,12 +498,12 @@ assign_new_owner: * Turn us into a lazy TLB process if we * aren't already.. */ -static void exit_mm(struct task_struct *tsk) +static void exit_mm(void) { - struct mm_struct *mm = tsk->mm; + struct mm_struct *mm = current->mm; struct core_state *core_state; - mm_release(tsk, mm); + mm_release(current, mm); if (!mm) return; sync_mm_rss(mm); @@ -491,7 +521,7 @@ static void exit_mm(struct task_struct *tsk) up_read(&mm->mmap_sem); - self.task = tsk; + self.task = current; self.next = xchg(&core_state->dumper.next, &self); /* * Implies mb(), the result of xchg() must be visible @@ -501,22 +531,22 @@ static void exit_mm(struct task_struct *tsk) complete(&core_state->startup); for (;;) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!self.task) /* see coredump_finish() */ break; freezable_schedule(); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); down_read(&mm->mmap_sem); } atomic_inc(&mm->mm_count); - BUG_ON(mm != tsk->active_mm); + BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ - task_lock(tsk); - tsk->mm = NULL; + task_lock(current); + current->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); - task_unlock(tsk); + task_unlock(current); mm_update_next_owner(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) @@ -823,7 +853,7 @@ void __noreturn do_exit(long code) tsk->exit_code = code; taskstats_exit(tsk, group_dead); - exit_mm(tsk); + exit_mm(); if (group_dead) acct_process(); diff --git a/kernel/fork.c b/kernel/fork.c index f6995cdfe714..ff82e24573b6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -432,11 +432,13 @@ void __init fork_init(void) int i; #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN -#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES +#define ARCH_MIN_TASKALIGN 0 #endif + int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); + /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", - arch_task_struct_size, ARCH_MIN_TASKALIGN, + arch_task_struct_size, align, SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); #endif diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 6f88e352cd4f..760158d9d98d 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o +obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d9a698e8458f..9812e5dd409e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2203,7 +2203,7 @@ cache_hit: * Important for check_no_collision(). */ if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) { - if (debug_locks_off_graph_unlock()) + if (!debug_locks_off_graph_unlock()) return 0; print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index d3de04b12f8c..28350dc8ecbb 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -372,6 +372,78 @@ static struct lock_torture_ops mutex_lock_ops = { .name = "mutex_lock" }; +#include <linux/ww_mutex.h> +static DEFINE_WW_CLASS(torture_ww_class); +static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class); +static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class); +static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class); + +static int torture_ww_mutex_lock(void) +__acquires(torture_ww_mutex_0) +__acquires(torture_ww_mutex_1) +__acquires(torture_ww_mutex_2) +{ + LIST_HEAD(list); + struct reorder_lock { + struct list_head link; + struct ww_mutex *lock; + } locks[3], *ll, *ln; + struct ww_acquire_ctx ctx; + + locks[0].lock = &torture_ww_mutex_0; + list_add(&locks[0].link, &list); + + locks[1].lock = &torture_ww_mutex_1; + list_add(&locks[1].link, &list); + + locks[2].lock = &torture_ww_mutex_2; + list_add(&locks[2].link, &list); + + ww_acquire_init(&ctx, &torture_ww_class); + + list_for_each_entry(ll, &list, link) { + int err; + + err = ww_mutex_lock(ll->lock, &ctx); + if (!err) + continue; + + ln = ll; + list_for_each_entry_continue_reverse(ln, &list, link) + ww_mutex_unlock(ln->lock); + + if (err != -EDEADLK) + return err; + + ww_mutex_lock_slow(ll->lock, &ctx); + list_move(&ll->link, &list); + } + + ww_acquire_fini(&ctx); + return 0; +} + +static void torture_ww_mutex_unlock(void) +__releases(torture_ww_mutex_0) +__releases(torture_ww_mutex_1) +__releases(torture_ww_mutex_2) +{ + ww_mutex_unlock(&torture_ww_mutex_0); + ww_mutex_unlock(&torture_ww_mutex_1); + ww_mutex_unlock(&torture_ww_mutex_2); +} + +static struct lock_torture_ops ww_mutex_lock_ops = { + .writelock = torture_ww_mutex_lock, + .write_delay = torture_mutex_delay, + .task_boost = torture_boost_dummy, + .writeunlock = torture_ww_mutex_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, + .name = "ww_mutex_lock" +}; + #ifdef CONFIG_RT_MUTEXES static DEFINE_RT_MUTEX(torture_rtmutex); @@ -797,6 +869,7 @@ static int __init lock_torture_init(void) &spin_lock_ops, &spin_lock_irq_ops, &rw_lock_ops, &rw_lock_irq_ops, &mutex_lock_ops, + &ww_mutex_lock_ops, #ifdef CONFIG_RT_MUTEXES &rtmutex_lock_ops, #endif diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index a459faa48987..4174417d5309 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -26,20 +26,3 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); - -#define spin_lock_mutex(lock, flags) \ - do { \ - struct mutex *l = container_of(lock, struct mutex, wait_lock); \ - \ - DEBUG_LOCKS_WARN_ON(in_interrupt()); \ - local_irq_save(flags); \ - arch_spin_lock(&(lock)->rlock.raw_lock);\ - DEBUG_LOCKS_WARN_ON(l->magic != l); \ - } while (0) - -#define spin_unlock_mutex(lock, flags) \ - do { \ - arch_spin_unlock(&(lock)->rlock.raw_lock); \ - local_irq_restore(flags); \ - preempt_check_resched(); \ - } while (0) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 8464a5cbab97..ad2d9e22697b 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init); /* * @owner: contains: 'struct task_struct *' to the current lock owner, * NULL means not owned. Since task_struct pointers are aligned at - * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low - * bits to store extra state. + * at least L1_CACHE_BYTES, we have low bits to store extra state. * * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. * Bit1 indicates unlock needs to hand the lock to the top-waiter + * Bit2 indicates handoff has been done and we're waiting for pickup. */ #define MUTEX_FLAG_WAITERS 0x01 #define MUTEX_FLAG_HANDOFF 0x02 +#define MUTEX_FLAG_PICKUP 0x04 -#define MUTEX_FLAGS 0x03 +#define MUTEX_FLAGS 0x07 static inline struct task_struct *__owner_task(unsigned long owner) { @@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner) } /* - * Actual trylock that will work on any unlocked state. - * - * When setting the owner field, we must preserve the low flag bits. - * - * Be careful with @handoff, only set that in a wait-loop (where you set - * HANDOFF) to avoid recursive lock attempts. + * Trylock variant that retuns the owning task on failure. */ -static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) +static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) { unsigned long owner, curr = (unsigned long)current; owner = atomic_long_read(&lock->owner); for (;;) { /* must loop, can race against a flag */ unsigned long old, flags = __owner_flags(owner); + unsigned long task = owner & ~MUTEX_FLAGS; - if (__owner_task(owner)) { - if (handoff && unlikely(__owner_task(owner) == current)) { - /* - * Provide ACQUIRE semantics for the lock-handoff. - * - * We cannot easily use load-acquire here, since - * the actual load is a failed cmpxchg, which - * doesn't imply any barriers. - * - * Also, this is a fairly unlikely scenario, and - * this contains the cost. - */ - smp_mb(); /* ACQUIRE */ - return true; - } + if (task) { + if (likely(task != curr)) + break; - return false; + if (likely(!(flags & MUTEX_FLAG_PICKUP))) + break; + + flags &= ~MUTEX_FLAG_PICKUP; + } else { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP); +#endif } /* @@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) * past the point where we acquire it. This would be possible * if we (accidentally) set the bit on an unlocked mutex. */ - if (handoff) - flags &= ~MUTEX_FLAG_HANDOFF; + flags &= ~MUTEX_FLAG_HANDOFF; old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); if (old == owner) - return true; + return NULL; owner = old; } + + return __owner_task(owner); +} + +/* + * Actual trylock that will work on any unlocked state. + */ +static inline bool __mutex_trylock(struct mutex *lock) +{ + return !__mutex_trylock_or_owner(lock); } #ifndef CONFIG_DEBUG_LOCK_ALLOC @@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait /* * Give up ownership to a specific task, when @task = NULL, this is equivalent - * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE - * semantics like a regular unlock, the __mutex_trylock() provides matching - * ACQUIRE semantics for the handoff. + * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves + * WAITERS. Provides RELEASE semantics like a regular unlock, the + * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff. */ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) { @@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); + DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); #endif new = (owner & MUTEX_FLAG_WAITERS); new |= (unsigned long)task; + if (task) + new |= MUTEX_FLAG_PICKUP; old = atomic_long_cmpxchg_release(&lock->owner, owner, new); if (old == owner) @@ -237,8 +241,8 @@ void __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); #endif -static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, - struct ww_acquire_ctx *ww_ctx) +static __always_inline void +ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { #ifdef CONFIG_DEBUG_MUTEXES /* @@ -277,17 +281,50 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, ww_ctx->acquired++; } +static inline bool __sched +__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) +{ + return a->stamp - b->stamp <= LONG_MAX && + (a->stamp != b->stamp || a > b); +} + +/* + * Wake up any waiters that may have to back off when the lock is held by the + * given context. + * + * Due to the invariants on the wait list, this can only affect the first + * waiter with a context. + * + * The current task must not be on the wait list. + */ +static void __sched +__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) +{ + struct mutex_waiter *cur; + + lockdep_assert_held(&lock->wait_lock); + + list_for_each_entry(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + if (cur->ww_ctx->acquired > 0 && + __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) { + debug_mutex_wake_waiter(lock, cur); + wake_up_process(cur->task); + } + + break; + } +} + /* * After acquiring lock with fastpath or when we lost out in contested * slowpath, set ctx and wake up any waiters so they can recheck. */ static __always_inline void -ww_mutex_set_context_fastpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) +ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - unsigned long flags; - struct mutex_waiter *cur; - ww_mutex_lock_acquired(lock, ctx); lock->ctx = ctx; @@ -311,46 +348,79 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, * Uh oh, we raced in fastpath, wake up everyone in this case, * so they can see the new lock->ctx. */ - spin_lock_mutex(&lock->base.wait_lock, flags); - list_for_each_entry(cur, &lock->base.wait_list, list) { - debug_mutex_wake_waiter(&lock->base, cur); - wake_up_process(cur->task); - } - spin_unlock_mutex(&lock->base.wait_lock, flags); + spin_lock(&lock->base.wait_lock); + __ww_mutex_wakeup_for_backoff(&lock->base, ctx); + spin_unlock(&lock->base.wait_lock); } /* - * After acquiring lock in the slowpath set ctx and wake up any - * waiters so they can recheck. + * After acquiring lock in the slowpath set ctx. + * + * Unlike for the fast path, the caller ensures that waiters are woken up where + * necessary. * * Callers must hold the mutex wait_lock. */ static __always_inline void -ww_mutex_set_context_slowpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) +ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - struct mutex_waiter *cur; - ww_mutex_lock_acquired(lock, ctx); lock->ctx = ctx; +} + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + +static inline +bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) +{ + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); /* - * Give any possible sleeping processes the chance to wake up, - * so they can recheck if they have to back off. + * If ww->ctx is set the contents are undefined, only + * by acquiring wait_lock there is a guarantee that + * they are not invalid when reading. + * + * As such, when deadlock detection needs to be + * performed the optimistic spinning cannot be done. + * + * Check this in every inner iteration because we may + * be racing against another thread's ww_mutex_lock. */ - list_for_each_entry(cur, &lock->base.wait_list, list) { - debug_mutex_wake_waiter(&lock->base, cur); - wake_up_process(cur->task); - } + if (ww_ctx->acquired > 0 && READ_ONCE(ww->ctx)) + return false; + + /* + * If we aren't on the wait list yet, cancel the spin + * if there are waiters. We want to avoid stealing the + * lock from a waiter with an earlier stamp, since the + * other thread may already own a lock that we also + * need. + */ + if (!waiter && (atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS)) + return false; + + /* + * Similarly, stop spinning if we are no longer the + * first waiter. + */ + if (waiter && !__mutex_waiter_is_first(lock, waiter)) + return false; + + return true; } -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* - * Look out! "owner" is an entirely speculative pointer - * access and not reliable. + * Look out! "owner" is an entirely speculative pointer access and not + * reliable. + * + * "noinline" so that this function shows up on perf profiles. */ static noinline -bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, + struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) { bool ret = true; @@ -373,6 +443,11 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) break; } + if (ww_ctx && !ww_mutex_spin_on_owner(lock, ww_ctx, waiter)) { + ret = false; + break; + } + cpu_relax(); } rcu_read_unlock(); @@ -431,12 +506,10 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) * with the spinner at the head of the OSQ, if present, until the owner is * changed to itself. */ -static bool mutex_optimistic_spin(struct mutex *lock, - struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, const bool waiter) +static __always_inline bool +mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + const bool use_ww_ctx, struct mutex_waiter *waiter) { - struct task_struct *task = current; - if (!waiter) { /* * The purpose of the mutex_can_spin_on_owner() function is @@ -460,40 +533,17 @@ static bool mutex_optimistic_spin(struct mutex *lock, for (;;) { struct task_struct *owner; - if (use_ww_ctx && ww_ctx->acquired > 0) { - struct ww_mutex *ww; - - ww = container_of(lock, struct ww_mutex, base); - /* - * If ww->ctx is set the contents are undefined, only - * by acquiring wait_lock there is a guarantee that - * they are not invalid when reading. - * - * As such, when deadlock detection needs to be - * performed the optimistic spinning cannot be done. - */ - if (READ_ONCE(ww->ctx)) - goto fail_unlock; - } + /* Try to acquire the mutex... */ + owner = __mutex_trylock_or_owner(lock); + if (!owner) + break; /* - * If there's an owner, wait for it to either + * There's an owner, wait for it to either * release the lock or go to sleep. */ - owner = __mutex_owner(lock); - if (owner) { - if (waiter && owner == task) { - smp_mb(); /* ACQUIRE */ - break; - } - - if (!mutex_spin_on_owner(lock, owner)) - goto fail_unlock; - } - - /* Try to acquire the mutex if it is unlocked. */ - if (__mutex_trylock(lock, waiter)) - break; + if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter)) + goto fail_unlock; /* * The cpu_relax() call is a compiler barrier which forces @@ -532,9 +582,9 @@ fail: return false; } #else -static bool mutex_optimistic_spin(struct mutex *lock, - struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, const bool waiter) +static __always_inline bool +mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + const bool use_ww_ctx, struct mutex_waiter *waiter) { return false; } @@ -594,23 +644,88 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) EXPORT_SYMBOL(ww_mutex_unlock); static inline int __sched -__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) +__ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter, + struct ww_acquire_ctx *ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); + struct mutex_waiter *cur; + + if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) + goto deadlock; - if (!hold_ctx) + /* + * If there is a waiter in front of us that has a context, then its + * stamp is earlier than ours and we must back off. + */ + cur = waiter; + list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) { + if (cur->ww_ctx) + goto deadlock; + } + + return 0; + +deadlock: +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); + ctx->contending_lock = ww; +#endif + return -EDEADLK; +} + +static inline int __sched +__ww_mutex_add_waiter(struct mutex_waiter *waiter, + struct mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex_waiter *cur; + struct list_head *pos; + + if (!ww_ctx) { + list_add_tail(&waiter->list, &lock->wait_list); return 0; + } - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && - (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { + /* + * Add the waiter before the first waiter with a higher stamp. + * Waiters without a context are skipped to avoid starving + * them. + */ + pos = &lock->wait_list; + list_for_each_entry_reverse(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { + /* Back off immediately if necessary. */ + if (ww_ctx->acquired > 0) { #ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(ctx->contending_lock); - ctx->contending_lock = ww; + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); + ww_ctx->contending_lock = ww; #endif - return -EDEADLK; + return -EDEADLK; + } + + break; + } + + pos = &cur->list; + + /* + * Wake up the waiter so that it gets a chance to back + * off. + */ + if (cur->ww_ctx->acquired > 0) { + debug_mutex_wake_waiter(lock, cur); + wake_up_process(cur->task); + } } + list_add_tail(&waiter->list, pos); return 0; } @@ -622,15 +737,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { - struct task_struct *task = current; struct mutex_waiter waiter; - unsigned long flags; bool first = false; struct ww_mutex *ww; int ret; - if (use_ww_ctx) { - ww = container_of(lock, struct ww_mutex, base); + might_sleep(); + + ww = container_of(lock, struct ww_mutex, base); + if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; } @@ -638,36 +753,54 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); - if (__mutex_trylock(lock, false) || - mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { + if (__mutex_trylock(lock) || + mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx) + if (use_ww_ctx && ww_ctx) ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; } - spin_lock_mutex(&lock->wait_lock, flags); + spin_lock(&lock->wait_lock); /* * After waiting to acquire the wait_lock, try again. */ - if (__mutex_trylock(lock, false)) + if (__mutex_trylock(lock)) { + if (use_ww_ctx && ww_ctx) + __ww_mutex_wakeup_for_backoff(lock, ww_ctx); + goto skip_wait; + } debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, task); + debug_mutex_add_waiter(lock, &waiter, current); - /* add waiting tasks to the end of the waitqueue (FIFO): */ - list_add_tail(&waiter.list, &lock->wait_list); - waiter.task = task; + lock_contended(&lock->dep_map, ip); + + if (!use_ww_ctx) { + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + +#ifdef CONFIG_DEBUG_MUTEXES + waiter.ww_ctx = MUTEX_POISON_WW_CTX; +#endif + } else { + /* Add in stamp order, waking up waiters that must back off. */ + ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); + if (ret) + goto err_early_backoff; + + waiter.ww_ctx = ww_ctx; + } + + waiter.task = current; if (__mutex_waiter_is_first(lock, &waiter)) __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); - lock_contended(&lock->dep_map, ip); - - set_task_state(task, state); + set_current_state(state); for (;;) { /* * Once we hold wait_lock, we're serialized against @@ -675,7 +808,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * before testing the error conditions to make sure we pick up * the handoff. */ - if (__mutex_trylock(lock, first)) + if (__mutex_trylock(lock)) goto acquired; /* @@ -683,42 +816,47 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * wait_lock. This ensures the lock cancellation is ordered * against mutex_unlock() and wake-ups do not go missing. */ - if (unlikely(signal_pending_state(state, task))) { + if (unlikely(signal_pending_state(state, current))) { ret = -EINTR; goto err; } - if (use_ww_ctx && ww_ctx->acquired > 0) { - ret = __ww_mutex_lock_check_stamp(lock, ww_ctx); + if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) { + ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx); if (ret) goto err; } - spin_unlock_mutex(&lock->wait_lock, flags); + spin_unlock(&lock->wait_lock); schedule_preempt_disabled(); - if (!first && __mutex_waiter_is_first(lock, &waiter)) { - first = true; - __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); + /* + * ww_mutex needs to always recheck its position since its waiter + * list is not FIFO ordered. + */ + if ((use_ww_ctx && ww_ctx) || !first) { + first = __mutex_waiter_is_first(lock, &waiter); + if (first) + __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); } - set_task_state(task, state); + set_current_state(state); /* * Here we order against unlock; we must either see it change * state back to RUNNING and fall through the next schedule(), * or we must see its unlock and acquire. */ - if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) || - __mutex_trylock(lock, first)) + if (__mutex_trylock(lock) || + (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter))) break; - spin_lock_mutex(&lock->wait_lock, flags); + spin_lock(&lock->wait_lock); } - spin_lock_mutex(&lock->wait_lock, flags); + spin_lock(&lock->wait_lock); acquired: - __set_task_state(task, TASK_RUNNING); + __set_current_state(TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, task); + mutex_remove_waiter(lock, &waiter, current); if (likely(list_empty(&lock->wait_list))) __mutex_clear_flag(lock, MUTEX_FLAGS); @@ -728,30 +866,44 @@ skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx) + if (use_ww_ctx && ww_ctx) ww_mutex_set_context_slowpath(ww, ww_ctx); - spin_unlock_mutex(&lock->wait_lock, flags); + spin_unlock(&lock->wait_lock); preempt_enable(); return 0; err: - __set_task_state(task, TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, task); - spin_unlock_mutex(&lock->wait_lock, flags); + __set_current_state(TASK_RUNNING); + mutex_remove_waiter(lock, &waiter, current); +err_early_backoff: + spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, 1, ip); preempt_enable(); return ret; } +static int __sched +__mutex_lock(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip) +{ + return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); +} + +static int __sched +__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx) +{ + return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true); +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { - might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL, 0); + __mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -759,27 +911,21 @@ EXPORT_SYMBOL_GPL(mutex_lock_nested); void __sched _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) { - might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - 0, nest, _RET_IP_, NULL, 0); + __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); } EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); int __sched mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) { - might_sleep(); - return __mutex_lock_common(lock, TASK_KILLABLE, - subclass, NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { - might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -824,35 +970,37 @@ ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) } int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; might_sleep(); - ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); - if (!ret && ctx->acquired > 1) + ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, + 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, + ctx); + if (!ret && ctx && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); return ret; } -EXPORT_SYMBOL_GPL(__ww_mutex_lock); +EXPORT_SYMBOL_GPL(ww_mutex_lock); int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; might_sleep(); - ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); + ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, + 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, + ctx); - if (!ret && ctx->acquired > 1) + if (!ret && ctx && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); return ret; } -EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); #endif @@ -862,8 +1010,8 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) { struct task_struct *next = NULL; - unsigned long owner, flags; DEFINE_WAKE_Q(wake_q); + unsigned long owner; mutex_release(&lock->dep_map, 1, ip); @@ -880,6 +1028,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); + DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); #endif if (owner & MUTEX_FLAG_HANDOFF) @@ -897,7 +1046,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne owner = old; } - spin_lock_mutex(&lock->wait_lock, flags); + spin_lock(&lock->wait_lock); debug_mutex_unlock(lock); if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ @@ -914,7 +1063,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne if (owner & MUTEX_FLAG_HANDOFF) __mutex_handoff(lock, next); - spin_unlock_mutex(&lock->wait_lock, flags); + spin_unlock(&lock->wait_lock); wake_up_q(&wake_q); } @@ -977,37 +1126,34 @@ EXPORT_SYMBOL_GPL(mutex_lock_io); static noinline void __sched __mutex_lock_slowpath(struct mutex *lock) { - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL, 0); + __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); } static noinline int __sched __mutex_lock_killable_slowpath(struct mutex *lock) { - return __mutex_lock_common(lock, TASK_KILLABLE, 0, - NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); } static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock) { - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); } static noinline int __sched __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx, 1); + return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL, + _RET_IP_, ctx); } static noinline int __sched __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx, 1); + return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL, + _RET_IP_, ctx); } #endif @@ -1028,7 +1174,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, */ int __sched mutex_trylock(struct mutex *lock) { - bool locked = __mutex_trylock(lock, false); + bool locked = __mutex_trylock(lock); if (locked) mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); @@ -1039,32 +1185,34 @@ EXPORT_SYMBOL(mutex_trylock); #ifndef CONFIG_DEBUG_LOCK_ALLOC int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { might_sleep(); if (__mutex_trylock_fast(&lock->base)) { - ww_mutex_set_context_fastpath(lock, ctx); + if (ctx) + ww_mutex_set_context_fastpath(lock, ctx); return 0; } return __ww_mutex_lock_slowpath(lock, ctx); } -EXPORT_SYMBOL(__ww_mutex_lock); +EXPORT_SYMBOL(ww_mutex_lock); int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { might_sleep(); if (__mutex_trylock_fast(&lock->base)) { - ww_mutex_set_context_fastpath(lock, ctx); + if (ctx) + ww_mutex_set_context_fastpath(lock, ctx); return 0; } return __ww_mutex_lock_interruptible_slowpath(lock, ctx); } -EXPORT_SYMBOL(__ww_mutex_lock_interruptible); +EXPORT_SYMBOL(ww_mutex_lock_interruptible); #endif diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 4410a4af42a3..6ebc1902f779 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -9,10 +9,6 @@ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: */ -#define spin_lock_mutex(lock, flags) \ - do { spin_lock(lock); (void)(flags); } while (0) -#define spin_unlock_mutex(lock, flags) \ - do { spin_unlock(lock); (void)(flags); } while (0) #define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index ce182599cf2e..883cf1b92d90 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -1,7 +1,6 @@ #include <linux/atomic.h> #include <linux/rwsem.h> #include <linux/percpu.h> -#include <linux/wait.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/rcupdate.h> @@ -18,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); __init_rwsem(&sem->rw_sem, name, rwsem_key); - init_waitqueue_head(&sem->writer); + rcuwait_init(&sem->writer); sem->readers_block = 0; return 0; } @@ -103,7 +102,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) __this_cpu_dec(*sem->read_count); /* Prod writer to recheck readers_active */ - wake_up(&sem->writer); + rcuwait_wake_up(&sem->writer); } EXPORT_SYMBOL_GPL(__percpu_up_read); @@ -160,7 +159,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) */ /* Wait for all now active readers to complete. */ - wait_event(sem->writer, readers_active_check(sem)); + rcuwait_wait_event(&sem->writer, readers_active_check(sem)); } EXPORT_SYMBOL_GPL(percpu_down_write); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index e3b5520005db..e6b2f7ad3e51 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -263,7 +263,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running; + return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu); } /* diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2f443ed2320a..d340be3a488f 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1179,7 +1179,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, * TASK_INTERRUPTIBLE checks for signals and * timeout. Ignored otherwise. */ - if (unlikely(state == TASK_INTERRUPTIBLE)) { + if (likely(state == TASK_INTERRUPTIBLE)) { /* Signal pending? */ if (signal_pending(current)) ret = -EINTR; diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 1591f6b3539f..5eacab880f67 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -128,7 +128,6 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem) void __sched __down_read(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - struct task_struct *tsk; unsigned long flags; raw_spin_lock_irqsave(&sem->wait_lock, flags); @@ -140,13 +139,12 @@ void __sched __down_read(struct rw_semaphore *sem) goto out; } - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); /* set up my own style of waitqueue */ - waiter.task = tsk; + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); + get_task_struct(current); list_add_tail(&waiter.list, &sem->wait_list); @@ -158,10 +156,10 @@ void __sched __down_read(struct rw_semaphore *sem) if (!waiter.task) break; schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); out: ; } @@ -194,15 +192,13 @@ int __down_read_trylock(struct rw_semaphore *sem) int __sched __down_write_common(struct rw_semaphore *sem, int state) { struct rwsem_waiter waiter; - struct task_struct *tsk; unsigned long flags; int ret = 0; raw_spin_lock_irqsave(&sem->wait_lock, flags); /* set up my own style of waitqueue */ - tsk = current; - waiter.task = tsk; + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list, &sem->wait_list); @@ -220,7 +216,7 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) ret = -EINTR; goto out; } - set_task_state(tsk, state); + set_current_state(state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); raw_spin_lock_irqsave(&sem->wait_lock, flags); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 631506004f9e..2ad8d8dc3bb1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -224,10 +224,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; - struct task_struct *tsk = current; DEFINE_WAKE_Q(wake_q); - waiter.task = tsk; + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; raw_spin_lock_irq(&sem->wait_lock); @@ -254,13 +253,13 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* wait to be given the lock */ while (true) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!waiter.task) break; schedule(); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); return sem; } EXPORT_SYMBOL(rwsem_down_read_failed); @@ -503,8 +502,6 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * wake any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS) { - DEFINE_WAKE_Q(wake_q); - __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); /* * The wakeup is normally called _after_ the wait_lock @@ -514,6 +511,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * for attempting rwsem_try_write_lock(). */ wake_up_q(&wake_q); + + /* + * Reinitialize wake_q after use. + */ + wake_q_init(&wake_q); } } else diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index b8120abe594b..9512e37637dc 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -204,19 +204,18 @@ struct semaphore_waiter { static inline int __sched __down_common(struct semaphore *sem, long state, long timeout) { - struct task_struct *task = current; struct semaphore_waiter waiter; list_add_tail(&waiter.list, &sem->wait_list); - waiter.task = task; + waiter.task = current; waiter.up = false; for (;;) { - if (signal_pending_state(state, task)) + if (signal_pending_state(state, current)) goto interrupted; if (unlikely(timeout <= 0)) goto timed_out; - __set_task_state(task, state); + __set_current_state(state); raw_spin_unlock_irq(&sem->lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->lock); diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index db3ccb1dd614..4b082b5cac9e 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -363,14 +363,6 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) } EXPORT_SYMBOL(_raw_spin_lock_nested); -void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) -{ - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -} -EXPORT_SYMBOL(_raw_spin_lock_bh_nested); - unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass) { diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 0374a596cffa..9aa0fccd5d43 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -103,38 +103,14 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock) lock->owner_cpu = -1; } -static void __spin_lock_debug(raw_spinlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - - for (i = 0; i < loops; i++) { - if (arch_spin_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - spin_dump(lock, "lockup suspected"); -#ifdef CONFIG_SMP - trigger_all_cpu_backtrace(); -#endif - - /* - * The trylock above was causing a livelock. Give the lower level arch - * specific lock code a chance to acquire the lock. We have already - * printed a warning/backtrace at this point. The non-debug arch - * specific code might actually succeed in acquiring the lock. If it is - * not successful, the end-result is the same - there is no forward - * progress. - */ - arch_spin_lock(&lock->raw_lock); -} - +/* + * We are now relying on the NMI watchdog to detect lockup instead of doing + * the detection here with an unfair lock which can cause problem of its own. + */ void do_raw_spin_lock(raw_spinlock_t *lock) { debug_spin_lock_before(lock); - if (unlikely(!arch_spin_trylock(&lock->raw_lock))) - __spin_lock_debug(lock); + arch_spin_lock(&lock->raw_lock); debug_spin_lock_after(lock); } @@ -172,32 +148,6 @@ static void rwlock_bug(rwlock_t *lock, const char *msg) #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) -#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ -static void __read_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_read_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - void do_raw_read_lock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); @@ -247,32 +197,6 @@ static inline void debug_write_unlock(rwlock_t *lock) lock->owner_cpu = -1; } -#if 0 /* This can cause lockups */ -static void __write_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_write_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - void do_raw_write_lock(rwlock_t *lock) { debug_write_lock_before(lock); diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c new file mode 100644 index 000000000000..da6c9a34f62f --- /dev/null +++ b/kernel/locking/test-ww_mutex.c @@ -0,0 +1,646 @@ +/* + * Module-based API test facility for ww_mutexes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + */ + +#include <linux/kernel.h> + +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/ww_mutex.h> + +static DEFINE_WW_CLASS(ww_class); +struct workqueue_struct *wq; + +struct test_mutex { + struct work_struct work; + struct ww_mutex mutex; + struct completion ready, go, done; + unsigned int flags; +}; + +#define TEST_MTX_SPIN BIT(0) +#define TEST_MTX_TRY BIT(1) +#define TEST_MTX_CTX BIT(2) +#define __TEST_MTX_LAST BIT(3) + +static void test_mutex_work(struct work_struct *work) +{ + struct test_mutex *mtx = container_of(work, typeof(*mtx), work); + + complete(&mtx->ready); + wait_for_completion(&mtx->go); + + if (mtx->flags & TEST_MTX_TRY) { + while (!ww_mutex_trylock(&mtx->mutex)) + cpu_relax(); + } else { + ww_mutex_lock(&mtx->mutex, NULL); + } + complete(&mtx->done); + ww_mutex_unlock(&mtx->mutex); +} + +static int __test_mutex(unsigned int flags) +{ +#define TIMEOUT (HZ / 16) + struct test_mutex mtx; + struct ww_acquire_ctx ctx; + int ret; + + ww_mutex_init(&mtx.mutex, &ww_class); + ww_acquire_init(&ctx, &ww_class); + + INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); + init_completion(&mtx.ready); + init_completion(&mtx.go); + init_completion(&mtx.done); + mtx.flags = flags; + + schedule_work(&mtx.work); + + wait_for_completion(&mtx.ready); + ww_mutex_lock(&mtx.mutex, (flags & TEST_MTX_CTX) ? &ctx : NULL); + complete(&mtx.go); + if (flags & TEST_MTX_SPIN) { + unsigned long timeout = jiffies + TIMEOUT; + + ret = 0; + do { + if (completion_done(&mtx.done)) { + ret = -EINVAL; + break; + } + cpu_relax(); + } while (time_before(jiffies, timeout)); + } else { + ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); + } + ww_mutex_unlock(&mtx.mutex); + ww_acquire_fini(&ctx); + + if (ret) { + pr_err("%s(flags=%x): mutual exclusion failure\n", + __func__, flags); + ret = -EINVAL; + } + + flush_work(&mtx.work); + destroy_work_on_stack(&mtx.work); + return ret; +#undef TIMEOUT +} + +static int test_mutex(void) +{ + int ret; + int i; + + for (i = 0; i < __TEST_MTX_LAST; i++) { + ret = __test_mutex(i); + if (ret) + return ret; + } + + return 0; +} + +static int test_aa(void) +{ + struct ww_mutex mutex; + struct ww_acquire_ctx ctx; + int ret; + + ww_mutex_init(&mutex, &ww_class); + ww_acquire_init(&ctx, &ww_class); + + ww_mutex_lock(&mutex, &ctx); + + if (ww_mutex_trylock(&mutex)) { + pr_err("%s: trylocked itself!\n", __func__); + ww_mutex_unlock(&mutex); + ret = -EINVAL; + goto out; + } + + ret = ww_mutex_lock(&mutex, &ctx); + if (ret != -EALREADY) { + pr_err("%s: missed deadlock for recursing, ret=%d\n", + __func__, ret); + if (!ret) + ww_mutex_unlock(&mutex); + ret = -EINVAL; + goto out; + } + + ret = 0; +out: + ww_mutex_unlock(&mutex); + ww_acquire_fini(&ctx); + return ret; +} + +struct test_abba { + struct work_struct work; + struct ww_mutex a_mutex; + struct ww_mutex b_mutex; + struct completion a_ready; + struct completion b_ready; + bool resolve; + int result; +}; + +static void test_abba_work(struct work_struct *work) +{ + struct test_abba *abba = container_of(work, typeof(*abba), work); + struct ww_acquire_ctx ctx; + int err; + + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&abba->b_mutex, &ctx); + + complete(&abba->b_ready); + wait_for_completion(&abba->a_ready); + + err = ww_mutex_lock(&abba->a_mutex, &ctx); + if (abba->resolve && err == -EDEADLK) { + ww_mutex_unlock(&abba->b_mutex); + ww_mutex_lock_slow(&abba->a_mutex, &ctx); + err = ww_mutex_lock(&abba->b_mutex, &ctx); + } + + if (!err) + ww_mutex_unlock(&abba->a_mutex); + ww_mutex_unlock(&abba->b_mutex); + ww_acquire_fini(&ctx); + + abba->result = err; +} + +static int test_abba(bool resolve) +{ + struct test_abba abba; + struct ww_acquire_ctx ctx; + int err, ret; + + ww_mutex_init(&abba.a_mutex, &ww_class); + ww_mutex_init(&abba.b_mutex, &ww_class); + INIT_WORK_ONSTACK(&abba.work, test_abba_work); + init_completion(&abba.a_ready); + init_completion(&abba.b_ready); + abba.resolve = resolve; + + schedule_work(&abba.work); + + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&abba.a_mutex, &ctx); + + complete(&abba.a_ready); + wait_for_completion(&abba.b_ready); + + err = ww_mutex_lock(&abba.b_mutex, &ctx); + if (resolve && err == -EDEADLK) { + ww_mutex_unlock(&abba.a_mutex); + ww_mutex_lock_slow(&abba.b_mutex, &ctx); + err = ww_mutex_lock(&abba.a_mutex, &ctx); + } + + if (!err) + ww_mutex_unlock(&abba.b_mutex); + ww_mutex_unlock(&abba.a_mutex); + ww_acquire_fini(&ctx); + + flush_work(&abba.work); + destroy_work_on_stack(&abba.work); + + ret = 0; + if (resolve) { + if (err || abba.result) { + pr_err("%s: failed to resolve ABBA deadlock, A err=%d, B err=%d\n", + __func__, err, abba.result); + ret = -EINVAL; + } + } else { + if (err != -EDEADLK && abba.result != -EDEADLK) { + pr_err("%s: missed ABBA deadlock, A err=%d, B err=%d\n", + __func__, err, abba.result); + ret = -EINVAL; + } + } + return ret; +} + +struct test_cycle { + struct work_struct work; + struct ww_mutex a_mutex; + struct ww_mutex *b_mutex; + struct completion *a_signal; + struct completion b_signal; + int result; +}; + +static void test_cycle_work(struct work_struct *work) +{ + struct test_cycle *cycle = container_of(work, typeof(*cycle), work); + struct ww_acquire_ctx ctx; + int err; + + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&cycle->a_mutex, &ctx); + + complete(cycle->a_signal); + wait_for_completion(&cycle->b_signal); + + err = ww_mutex_lock(cycle->b_mutex, &ctx); + if (err == -EDEADLK) { + ww_mutex_unlock(&cycle->a_mutex); + ww_mutex_lock_slow(cycle->b_mutex, &ctx); + err = ww_mutex_lock(&cycle->a_mutex, &ctx); + } + + if (!err) + ww_mutex_unlock(cycle->b_mutex); + ww_mutex_unlock(&cycle->a_mutex); + ww_acquire_fini(&ctx); + + cycle->result = err; +} + +static int __test_cycle(unsigned int nthreads) +{ + struct test_cycle *cycles; + unsigned int n, last = nthreads - 1; + int ret; + + cycles = kmalloc_array(nthreads, sizeof(*cycles), GFP_KERNEL); + if (!cycles) + return -ENOMEM; + + for (n = 0; n < nthreads; n++) { + struct test_cycle *cycle = &cycles[n]; + + ww_mutex_init(&cycle->a_mutex, &ww_class); + if (n == last) + cycle->b_mutex = &cycles[0].a_mutex; + else + cycle->b_mutex = &cycles[n + 1].a_mutex; + + if (n == 0) + cycle->a_signal = &cycles[last].b_signal; + else + cycle->a_signal = &cycles[n - 1].b_signal; + init_completion(&cycle->b_signal); + + INIT_WORK(&cycle->work, test_cycle_work); + cycle->result = 0; + } + + for (n = 0; n < nthreads; n++) + queue_work(wq, &cycles[n].work); + + flush_workqueue(wq); + + ret = 0; + for (n = 0; n < nthreads; n++) { + struct test_cycle *cycle = &cycles[n]; + + if (!cycle->result) + continue; + + pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n", + n, nthreads, cycle->result); + ret = -EINVAL; + break; + } + + for (n = 0; n < nthreads; n++) + ww_mutex_destroy(&cycles[n].a_mutex); + kfree(cycles); + return ret; +} + +static int test_cycle(unsigned int ncpus) +{ + unsigned int n; + int ret; + + for (n = 2; n <= ncpus + 1; n++) { + ret = __test_cycle(n); + if (ret) + return ret; + } + + return 0; +} + +struct stress { + struct work_struct work; + struct ww_mutex *locks; + int nlocks; + int nloops; +}; + +static int *get_random_order(int count) +{ + int *order; + int n, r, tmp; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (n = 0; n < count; n++) + order[n] = n; + + for (n = count - 1; n > 1; n--) { + r = get_random_int() % (n + 1); + if (r != n) { + tmp = order[n]; + order[n] = order[r]; + order[r] = tmp; + } + } + + return order; +} + +static void dummy_load(struct stress *stress) +{ + usleep_range(1000, 2000); +} + +static void stress_inorder_work(struct work_struct *work) +{ + struct stress *stress = container_of(work, typeof(*stress), work); + const int nlocks = stress->nlocks; + struct ww_mutex *locks = stress->locks; + struct ww_acquire_ctx ctx; + int *order; + + order = get_random_order(nlocks); + if (!order) + return; + + ww_acquire_init(&ctx, &ww_class); + + do { + int contended = -1; + int n, err; + +retry: + err = 0; + for (n = 0; n < nlocks; n++) { + if (n == contended) + continue; + + err = ww_mutex_lock(&locks[order[n]], &ctx); + if (err < 0) + break; + } + if (!err) + dummy_load(stress); + + if (contended > n) + ww_mutex_unlock(&locks[order[contended]]); + contended = n; + while (n--) + ww_mutex_unlock(&locks[order[n]]); + + if (err == -EDEADLK) { + ww_mutex_lock_slow(&locks[order[contended]], &ctx); + goto retry; + } + + if (err) { + pr_err_once("stress (%s) failed with %d\n", + __func__, err); + break; + } + } while (--stress->nloops); + + ww_acquire_fini(&ctx); + + kfree(order); + kfree(stress); +} + +struct reorder_lock { + struct list_head link; + struct ww_mutex *lock; +}; + +static void stress_reorder_work(struct work_struct *work) +{ + struct stress *stress = container_of(work, typeof(*stress), work); + LIST_HEAD(locks); + struct ww_acquire_ctx ctx; + struct reorder_lock *ll, *ln; + int *order; + int n, err; + + order = get_random_order(stress->nlocks); + if (!order) + return; + + for (n = 0; n < stress->nlocks; n++) { + ll = kmalloc(sizeof(*ll), GFP_KERNEL); + if (!ll) + goto out; + + ll->lock = &stress->locks[order[n]]; + list_add(&ll->link, &locks); + } + kfree(order); + order = NULL; + + ww_acquire_init(&ctx, &ww_class); + + do { + list_for_each_entry(ll, &locks, link) { + err = ww_mutex_lock(ll->lock, &ctx); + if (!err) + continue; + + ln = ll; + list_for_each_entry_continue_reverse(ln, &locks, link) + ww_mutex_unlock(ln->lock); + + if (err != -EDEADLK) { + pr_err_once("stress (%s) failed with %d\n", + __func__, err); + break; + } + + ww_mutex_lock_slow(ll->lock, &ctx); + list_move(&ll->link, &locks); /* restarts iteration */ + } + + dummy_load(stress); + list_for_each_entry(ll, &locks, link) + ww_mutex_unlock(ll->lock); + } while (--stress->nloops); + + ww_acquire_fini(&ctx); + +out: + list_for_each_entry_safe(ll, ln, &locks, link) + kfree(ll); + kfree(order); + kfree(stress); +} + +static void stress_one_work(struct work_struct *work) +{ + struct stress *stress = container_of(work, typeof(*stress), work); + const int nlocks = stress->nlocks; + struct ww_mutex *lock = stress->locks + (get_random_int() % nlocks); + int err; + + do { + err = ww_mutex_lock(lock, NULL); + if (!err) { + dummy_load(stress); + ww_mutex_unlock(lock); + } else { + pr_err_once("stress (%s) failed with %d\n", + __func__, err); + break; + } + } while (--stress->nloops); + + kfree(stress); +} + +#define STRESS_INORDER BIT(0) +#define STRESS_REORDER BIT(1) +#define STRESS_ONE BIT(2) +#define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE) + +static int stress(int nlocks, int nthreads, int nloops, unsigned int flags) +{ + struct ww_mutex *locks; + int n; + + locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); + if (!locks) + return -ENOMEM; + + for (n = 0; n < nlocks; n++) + ww_mutex_init(&locks[n], &ww_class); + + for (n = 0; nthreads; n++) { + struct stress *stress; + void (*fn)(struct work_struct *work); + + fn = NULL; + switch (n & 3) { + case 0: + if (flags & STRESS_INORDER) + fn = stress_inorder_work; + break; + case 1: + if (flags & STRESS_REORDER) + fn = stress_reorder_work; + break; + case 2: + if (flags & STRESS_ONE) + fn = stress_one_work; + break; + } + + if (!fn) + continue; + + stress = kmalloc(sizeof(*stress), GFP_KERNEL); + if (!stress) + break; + + INIT_WORK(&stress->work, fn); + stress->locks = locks; + stress->nlocks = nlocks; + stress->nloops = nloops; + + queue_work(wq, &stress->work); + nthreads--; + } + + flush_workqueue(wq); + + for (n = 0; n < nlocks; n++) + ww_mutex_destroy(&locks[n]); + kfree(locks); + + return 0; +} + +static int __init test_ww_mutex_init(void) +{ + int ncpus = num_online_cpus(); + int ret; + + wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0); + if (!wq) + return -ENOMEM; + + ret = test_mutex(); + if (ret) + return ret; + + ret = test_aa(); + if (ret) + return ret; + + ret = test_abba(false); + if (ret) + return ret; + + ret = test_abba(true); + if (ret) + return ret; + + ret = test_cycle(ncpus); + if (ret) + return ret; + + ret = stress(16, 2*ncpus, 1<<10, STRESS_INORDER); + if (ret) + return ret; + + ret = stress(16, 2*ncpus, 1<<10, STRESS_REORDER); + if (ret) + return ret; + + ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + if (ret) + return ret; + + return 0; +} + +static void __exit test_ww_mutex_exit(void) +{ + destroy_workqueue(wq); +} + +module_init(test_ww_mutex_init); +module_exit(test_ww_mutex_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/kernel/pid.c b/kernel/pid.c index f66162f2359b..0291804151b5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -68,9 +68,7 @@ static inline int mk_pid(struct pid_namespace *pid_ns, * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, + .kref = KREF_INIT(2), .pidmap = { [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 07c89e4b5d60..acedbe626d47 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -716,6 +716,19 @@ source "lib/Kconfig.kmemcheck" source "lib/Kconfig.kasan" +config DEBUG_REFCOUNT + bool "Verbose refcount checks" + help + Say Y here if you want reference counters (refcount_t and kref) to + generate WARNs on dubious usage. Without this refcount_t will still + be a saturating counter and avoid Use-After-Free by turning it into + a resource leak Denial-Of-Service. + + Use of this option will increase kernel text size but will alert the + admin of potential abuse. + + If in doubt, say "N". + endmenu # "Memory Debugging" config ARCH_HAS_KCOV @@ -1166,6 +1179,18 @@ config LOCK_TORTURE_TEST Say M if you want these torture tests to build as a module. Say N if you are unsure. +config WW_MUTEX_SELFTEST + tristate "Wait/wound mutex selftests" + help + This option provides a kernel module that runs tests on the + on the struct ww_mutex locking API. + + It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction + with this test harness. + + Say M if you want these self tests to build as a module. + Say N if you are unsure. + endmenu # lock debugging config TRACE_IRQFLAGS diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 1904a93f47d5..d491529332f4 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -920,7 +920,7 @@ static void chan_close_cb(struct l2cap_chan *chan) BT_DBG("dev %p removing %speer %p", dev, last ? "last " : "1 ", peer); BT_DBG("chan %p orig refcnt %d", chan, - atomic_read(&chan->kref.refcount)); + kref_read(&chan->kref)); l2cap_chan_put(chan); break; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 5f123c3320a7..f0095fd79818 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -810,7 +810,7 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked) /* AMP Manager functions */ struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr) { - BT_DBG("mgr %p orig refcnt %d", mgr, atomic_read(&mgr->kref.refcount)); + BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref)); kref_get(&mgr->kref); @@ -833,7 +833,7 @@ static void amp_mgr_destroy(struct kref *kref) int amp_mgr_put(struct amp_mgr *mgr) { - BT_DBG("mgr %p orig refcnt %d", mgr, atomic_read(&mgr->kref.refcount)); + BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref)); return kref_put(&mgr->kref, &_mgr_destroy); } diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index e32f34189007..02a4ccc04e1e 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -24,7 +24,7 @@ void amp_ctrl_get(struct amp_ctrl *ctrl) { BT_DBG("ctrl %p orig refcnt %d", ctrl, - atomic_read(&ctrl->kref.refcount)); + kref_read(&ctrl->kref)); kref_get(&ctrl->kref); } @@ -42,7 +42,7 @@ static void amp_ctrl_destroy(struct kref *kref) int amp_ctrl_put(struct amp_ctrl *ctrl) { BT_DBG("ctrl %p orig refcnt %d", ctrl, - atomic_read(&ctrl->kref.refcount)); + kref_read(&ctrl->kref)); return kref_put(&ctrl->kref, &_ctrl_destroy); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ce0b5dd01953..fc7f321a3823 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -481,14 +481,14 @@ static void l2cap_chan_destroy(struct kref *kref) void l2cap_chan_hold(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); + BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); kref_get(&c->kref); } void l2cap_chan_put(struct l2cap_chan *c) { - BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); + BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); kref_put(&c->kref, l2cap_chan_destroy); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 770c52701efa..bad3d4ae43f6 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3425,7 +3425,7 @@ static void ceph_msg_release(struct kref *kref) struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) { dout("%s %p (was %d)\n", __func__, msg, - atomic_read(&msg->kref.refcount)); + kref_read(&msg->kref)); kref_get(&msg->kref); return msg; } @@ -3434,7 +3434,7 @@ EXPORT_SYMBOL(ceph_msg_get); void ceph_msg_put(struct ceph_msg *msg) { dout("%s %p (was %d)\n", __func__, msg, - atomic_read(&msg->kref.refcount)); + kref_read(&msg->kref)); kref_put(&msg->kref, ceph_msg_release); } EXPORT_SYMBOL(ceph_msg_put); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 842f049abb86..f3378ba1a828 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -438,7 +438,7 @@ static void ceph_osdc_release_request(struct kref *kref) void ceph_osdc_get_request(struct ceph_osd_request *req) { dout("%s %p (was %d)\n", __func__, req, - atomic_read(&req->r_kref.refcount)); + kref_read(&req->r_kref)); kref_get(&req->r_kref); } EXPORT_SYMBOL(ceph_osdc_get_request); @@ -447,7 +447,7 @@ void ceph_osdc_put_request(struct ceph_osd_request *req) { if (req) { dout("%s %p (was %d)\n", __func__, req, - atomic_read(&req->r_kref.refcount)); + kref_read(&req->r_kref)); kref_put(&req->r_kref, ceph_osdc_release_request); } } @@ -487,11 +487,11 @@ static void request_reinit(struct ceph_osd_request *req) struct ceph_msg *reply_msg = req->r_reply; dout("%s req %p\n", __func__, req); - WARN_ON(atomic_read(&req->r_kref.refcount) != 1); + WARN_ON(kref_read(&req->r_kref) != 1); request_release_checks(req); - WARN_ON(atomic_read(&request_msg->kref.refcount) != 1); - WARN_ON(atomic_read(&reply_msg->kref.refcount) != 1); + WARN_ON(kref_read(&request_msg->kref) != 1); + WARN_ON(kref_read(&reply_msg->kref) != 1); target_destroy(&req->r_t); request_init(req); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8147e8d56eb2..f39e3e11f9aa 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1358,7 +1358,7 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), - atomic_read(&cp->ref.refcount), cp->flags); + kref_read(&cp->ref), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9c9db55a0c1e..7bfe1fb42add 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -490,7 +490,7 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) svc_xprt_get(xprt); dprintk("svc: transport %p dequeued, inuse=%d\n", - xprt, atomic_read(&xprt->xpt_ref.refcount)); + xprt, kref_read(&xprt->xpt_ref)); } spin_unlock_bh(&pool->sp_lock); out: @@ -822,7 +822,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, rqstp->rq_pool->sp_id, xprt, - atomic_read(&xprt->xpt_ref.refcount)); + kref_read(&xprt->xpt_ref)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); @@ -980,7 +980,7 @@ static void svc_age_temp_xprts(unsigned long closure) * through, close it. */ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) continue; - if (atomic_read(&xprt->xpt_ref.refcount) > 1 || + if (kref_read(&xprt->xpt_ref) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; list_del_init(le); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index e112da8005b5..bb8db3cb8032 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -126,13 +126,18 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister); static struct hlist_head auth_domain_table[DN_HASHMAX]; static DEFINE_SPINLOCK(auth_domain_lock); +static void auth_domain_release(struct kref *kref) +{ + struct auth_domain *dom = container_of(kref, struct auth_domain, ref); + + hlist_del(&dom->hash); + dom->flavour->domain_release(dom); + spin_unlock(&auth_domain_lock); +} + void auth_domain_put(struct auth_domain *dom) { - if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) { - hlist_del(&dom->hash); - dom->flavour->domain_release(dom); - spin_unlock(&auth_domain_lock); - } + kref_put_lock(&dom->ref, auth_domain_release, &auth_domain_lock); } EXPORT_SYMBOL_GPL(auth_domain_put); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ca2799af05a6..39652d390a9c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1201,9 +1201,9 @@ static void __svc_rdma_free(struct work_struct *work) ib_drain_qp(rdma->sc_qp); /* We should only be called from kref_put */ - if (atomic_read(&xprt->xpt_ref.refcount) != 0) + if (kref_read(&xprt->xpt_ref) != 0) pr_err("svcrdma: sc_xprt still in use? (%d)\n", - atomic_read(&xprt->xpt_ref.refcount)); + kref_read(&xprt->xpt_ref)); /* * Destroy queued, but not processed read completions. Note diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 5d721e990876..f067be814626 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -78,12 +78,6 @@ static inline void *kvzalloc(size_t size) return __aa_kvmalloc(size, __GFP_ZERO); } -/* returns 0 if kref not incremented */ -static inline int kref_get_not0(struct kref *kref) -{ - return atomic_inc_not_zero(&kref->refcount); -} - /** * aa_strneq - compare null terminated @str to a non null terminated substring * @str: a null terminated string diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 52275f040a5f..46467aaa557b 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -287,7 +287,7 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p) */ static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p) { - if (p && kref_get_not0(&p->count)) + if (p && kref_get_unless_zero(&p->count)) return p; return NULL; @@ -307,7 +307,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p) rcu_read_lock(); do { c = rcu_dereference(*p); - } while (c && !kref_get_not0(&c->count)); + } while (c && !kref_get_unless_zero(&c->count)); rcu_read_unlock(); return c; diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh new file mode 100644 index 000000000000..6905da965f3b --- /dev/null +++ b/tools/testing/selftests/locking/ww_mutex.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs API tests for struct ww_mutex (Wait/Wound mutexes) + +if /sbin/modprobe -q test-ww_mutex; then + /sbin/modprobe -q -r test-ww_mutex + echo "locking/ww_mutex: ok" +else + echo "locking/ww_mutex: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST index b9611c523723..41bae5824339 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST @@ -4,3 +4,4 @@ LOCK03 LOCK04 LOCK05 LOCK06 +LOCK07 diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07 new file mode 100644 index 000000000000..1d1da1477fc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07 @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot new file mode 100644 index 000000000000..97dadd1a9e45 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot @@ -0,0 +1 @@ +locktorture.torture_type=ww_mutex_lock |