diff options
Diffstat (limited to 'kernel/kernel/locking')
-rw-r--r-- | kernel/kernel/locking/mcs_spinlock.h | 8 | ||||
-rw-r--r-- | kernel/kernel/locking/mutex.c | 9 | ||||
-rw-r--r-- | kernel/kernel/locking/qspinlock.c | 60 | ||||
-rw-r--r-- | kernel/kernel/locking/rtmutex.c | 121 | ||||
-rw-r--r-- | kernel/kernel/locking/rtmutex_common.h | 5 |
5 files changed, 188 insertions, 15 deletions
diff --git a/kernel/kernel/locking/mcs_spinlock.h b/kernel/kernel/locking/mcs_spinlock.h index 5b9102a47..c835270f0 100644 --- a/kernel/kernel/locking/mcs_spinlock.h +++ b/kernel/kernel/locking/mcs_spinlock.h @@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) node->locked = 0; node->next = NULL; - prev = xchg_acquire(lock, node); + /* + * We rely on the full barrier with global transitivity implied by the + * below xchg() to order the initialization stores above against any + * observation of @node. And to provide the ACQUIRE ordering associated + * with a LOCK primitive. + */ + prev = xchg(lock, node); if (likely(prev == NULL)) { /* * Lock acquired, don't need to set node->locked to 1. Threads diff --git a/kernel/kernel/locking/mutex.c b/kernel/kernel/locking/mutex.c index 0551c219c..89350f924 100644 --- a/kernel/kernel/locking/mutex.c +++ b/kernel/kernel/locking/mutex.c @@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) if (!hold_ctx) return 0; - if (unlikely(ctx == hold_ctx)) - return -EALREADY; - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { #ifdef CONFIG_DEBUG_MUTEXES @@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, unsigned long flags; int ret; + if (use_ww_ctx) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) + return -EALREADY; + } + preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); diff --git a/kernel/kernel/locking/qspinlock.c b/kernel/kernel/locking/qspinlock.c index 87e9ce6a6..8173bc7fe 100644 --- a/kernel/kernel/locking/qspinlock.c +++ b/kernel/kernel/locking/qspinlock.c @@ -255,6 +255,66 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif +/* + * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before + * issuing an _unordered_ store to set _Q_LOCKED_VAL. + * + * This means that the store can be delayed, but no later than the + * store-release from the unlock. This means that simply observing + * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. + * + * There are two paths that can issue the unordered store: + * + * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 + * + * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 + * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 + * + * However, in both cases we have other !0 state we've set before to queue + * ourseves: + * + * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our + * load is constrained by that ACQUIRE to not pass before that, and thus must + * observe the store. + * + * For (2) we have a more intersting scenario. We enqueue ourselves using + * xchg_tail(), which ends up being a RELEASE. This in itself is not + * sufficient, however that is followed by an smp_cond_acquire() on the same + * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and + * guarantees we must observe that store. + * + * Therefore both cases have other !0 state that is observable before the + * unordered locked byte store comes through. This means we can use that to + * wait for the lock store, and then wait for an unlock. + */ +#ifndef queued_spin_unlock_wait +void queued_spin_unlock_wait(struct qspinlock *lock) +{ + u32 val; + + for (;;) { + val = atomic_read(&lock->val); + + if (!val) /* not locked, we're done */ + goto done; + + if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ + break; + + /* not locked, but pending, wait until we observe the lock */ + cpu_relax(); + } + + /* any unlock is good */ + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) + cpu_relax(); + +done: + smp_rmb(); /* CTRL + RMB -> ACQUIRE */ +} +EXPORT_SYMBOL(queued_spin_unlock_wait); +#endif + #endif /* _GEN_PV_LOCK_SLOWPATH */ /** diff --git a/kernel/kernel/locking/rtmutex.c b/kernel/kernel/locking/rtmutex.c index 30777e813..86a78c068 100644 --- a/kernel/kernel/locking/rtmutex.c +++ b/kernel/kernel/locking/rtmutex.c @@ -71,8 +71,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) static void fixup_rt_mutex_waiters(struct rt_mutex *lock) { - if (!rt_mutex_has_waiters(lock)) - clear_rt_mutex_waiters(lock); + unsigned long owner, *p = (unsigned long *) &lock->owner; + + if (rt_mutex_has_waiters(lock)) + return; + + /* + * The rbtree has no waiters enqueued, now make sure that the + * lock->owner still has the waiters bit set, otherwise the + * following can happen: + * + * CPU 0 CPU 1 CPU2 + * l->owner=T1 + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T2) + * boost() + * unlock(l->lock) + * block() + * + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T3) + * boost() + * unlock(l->lock) + * block() + * signal(->T2) signal(->T3) + * lock(l->lock) + * dequeue(T2) + * deboost() + * unlock(l->lock) + * lock(l->lock) + * dequeue(T3) + * ==> wait list is empty + * deboost() + * unlock(l->lock) + * lock(l->lock) + * fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * l->owner = owner + * owner = l->owner & ~HAS_WAITERS; + * ==> l->owner = T1 + * } + * lock(l->lock) + * rt_mutex_unlock(l) fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * owner = l->owner & ~HAS_WAITERS; + * cmpxchg(l->owner, T1, NULL) + * ===> Success (l->owner = NULL) + * + * l->owner = owner + * ==> l->owner = T1 + * } + * + * With the check for the waiter bit in place T3 on CPU2 will not + * overwrite. All tasks fiddling with the waiters bit are + * serialized by l->lock, so nothing else can modify the waiters + * bit. If the bit is set then nothing can change l->owner either + * so the simple RMW is safe. The cmpxchg() will simply fail if it + * happens in the middle of the RMW because the waiters bit is + * still set. + */ + owner = READ_ONCE(*p); + if (owner & RT_MUTEX_HAS_WAITERS) + WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); } static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) @@ -939,13 +1003,14 @@ static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, slowfn(lock, do_mig_dis); } -static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, - void (*slowfn)(struct rt_mutex *lock)) +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock, + int (*slowfn)(struct rt_mutex *lock)) { - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { rt_mutex_deadlock_account_unlock(current); - else - slowfn(lock); + return 0; + } + return slowfn(lock); } #ifdef CONFIG_SMP /* @@ -1086,7 +1151,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, /* * Slow path to release a rt_mutex spin_lock style */ -static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) { unsigned long flags; WAKE_Q(wake_q); @@ -1101,7 +1166,7 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) if (!rt_mutex_has_waiters(lock)) { lock->owner = NULL; raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - return; + return 0; } mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); @@ -1112,6 +1177,33 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) /* Undo pi boosting.when necessary */ rt_mutex_adjust_prio(current); + return 0; +} + +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock) +{ + unsigned long flags; + WAKE_Q(wake_q); + WAKE_Q(wake_sleeper_q); + + raw_spin_lock_irqsave(&lock->wait_lock, flags); + + debug_rt_mutex_unlock(lock); + + rt_mutex_deadlock_account_unlock(current); + + if (!rt_mutex_has_waiters(lock)) { + lock->owner = NULL; + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + return 0; + } + + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + wake_up_q(&wake_q); + wake_up_q_sleeper(&wake_sleeper_q); + return 1; } void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) @@ -1166,6 +1258,17 @@ void __lockfunc rt_spin_unlock(spinlock_t *lock) } EXPORT_SYMBOL(rt_spin_unlock); +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock) +{ + int ret; + + /* NOTE: we always pass in '1' for nested, for simplicity */ + spin_release(&lock->dep_map, 1, _RET_IP_); + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost); + migrate_enable(); + return ret; +} + void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) { rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); diff --git a/kernel/kernel/locking/rtmutex_common.h b/kernel/kernel/locking/rtmutex_common.h index 289f062f2..f457c7574 100644 --- a/kernel/kernel/locking/rtmutex_common.h +++ b/kernel/kernel/locking/rtmutex_common.h @@ -76,8 +76,9 @@ task_top_pi_waiter(struct task_struct *p) static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) { - return (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); + unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + + return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL); } /* |