summaryrefslogtreecommitdiffstats
path: root/kernel/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kernel/locking')
-rw-r--r--kernel/kernel/locking/mcs_spinlock.h8
-rw-r--r--kernel/kernel/locking/mutex.c9
-rw-r--r--kernel/kernel/locking/qspinlock.c60
-rw-r--r--kernel/kernel/locking/rtmutex.c121
-rw-r--r--kernel/kernel/locking/rtmutex_common.h5
5 files changed, 188 insertions, 15 deletions
diff --git a/kernel/kernel/locking/mcs_spinlock.h b/kernel/kernel/locking/mcs_spinlock.h
index 5b9102a47..c835270f0 100644
--- a/kernel/kernel/locking/mcs_spinlock.h
+++ b/kernel/kernel/locking/mcs_spinlock.h
@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0;
node->next = NULL;
- prev = xchg_acquire(lock, node);
+ /*
+ * We rely on the full barrier with global transitivity implied by the
+ * below xchg() to order the initialization stores above against any
+ * observation of @node. And to provide the ACQUIRE ordering associated
+ * with a LOCK primitive.
+ */
+ prev = xchg(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
diff --git a/kernel/kernel/locking/mutex.c b/kernel/kernel/locking/mutex.c
index 0551c219c..89350f924 100644
--- a/kernel/kernel/locking/mutex.c
+++ b/kernel/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
if (!hold_ctx)
return 0;
- if (unlikely(ctx == hold_ctx))
- return -EALREADY;
-
if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
(ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
#ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
unsigned long flags;
int ret;
+ if (use_ww_ctx) {
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+ if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
+ return -EALREADY;
+ }
+
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
diff --git a/kernel/kernel/locking/qspinlock.c b/kernel/kernel/locking/qspinlock.c
index 87e9ce6a6..8173bc7fe 100644
--- a/kernel/kernel/locking/qspinlock.c
+++ b/kernel/kernel/locking/qspinlock.c
@@ -255,6 +255,66 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock,
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
#endif
+/*
+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
+ *
+ * This means that the store can be delayed, but no later than the
+ * store-release from the unlock. This means that simply observing
+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
+ *
+ * There are two paths that can issue the unordered store:
+ *
+ * (1) clear_pending_set_locked(): *,1,0 -> *,0,1
+ *
+ * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
+ * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
+ *
+ * However, in both cases we have other !0 state we've set before to queue
+ * ourseves:
+ *
+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
+ * load is constrained by that ACQUIRE to not pass before that, and thus must
+ * observe the store.
+ *
+ * For (2) we have a more intersting scenario. We enqueue ourselves using
+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
+ * sufficient, however that is followed by an smp_cond_acquire() on the same
+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
+ * guarantees we must observe that store.
+ *
+ * Therefore both cases have other !0 state that is observable before the
+ * unordered locked byte store comes through. This means we can use that to
+ * wait for the lock store, and then wait for an unlock.
+ */
+#ifndef queued_spin_unlock_wait
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+ u32 val;
+
+ for (;;) {
+ val = atomic_read(&lock->val);
+
+ if (!val) /* not locked, we're done */
+ goto done;
+
+ if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+ break;
+
+ /* not locked, but pending, wait until we observe the lock */
+ cpu_relax();
+ }
+
+ /* any unlock is good */
+ while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+ cpu_relax();
+
+done:
+ smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
+#endif
+
#endif /* _GEN_PV_LOCK_SLOWPATH */
/**
diff --git a/kernel/kernel/locking/rtmutex.c b/kernel/kernel/locking/rtmutex.c
index 30777e813..86a78c068 100644
--- a/kernel/kernel/locking/rtmutex.c
+++ b/kernel/kernel/locking/rtmutex.c
@@ -71,8 +71,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
- if (!rt_mutex_has_waiters(lock))
- clear_rt_mutex_waiters(lock);
+ unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+ if (rt_mutex_has_waiters(lock))
+ return;
+
+ /*
+ * The rbtree has no waiters enqueued, now make sure that the
+ * lock->owner still has the waiters bit set, otherwise the
+ * following can happen:
+ *
+ * CPU 0 CPU 1 CPU2
+ * l->owner=T1
+ * rt_mutex_lock(l)
+ * lock(l->lock)
+ * l->owner = T1 | HAS_WAITERS;
+ * enqueue(T2)
+ * boost()
+ * unlock(l->lock)
+ * block()
+ *
+ * rt_mutex_lock(l)
+ * lock(l->lock)
+ * l->owner = T1 | HAS_WAITERS;
+ * enqueue(T3)
+ * boost()
+ * unlock(l->lock)
+ * block()
+ * signal(->T2) signal(->T3)
+ * lock(l->lock)
+ * dequeue(T2)
+ * deboost()
+ * unlock(l->lock)
+ * lock(l->lock)
+ * dequeue(T3)
+ * ==> wait list is empty
+ * deboost()
+ * unlock(l->lock)
+ * lock(l->lock)
+ * fixup_rt_mutex_waiters()
+ * if (wait_list_empty(l) {
+ * l->owner = owner
+ * owner = l->owner & ~HAS_WAITERS;
+ * ==> l->owner = T1
+ * }
+ * lock(l->lock)
+ * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
+ * if (wait_list_empty(l) {
+ * owner = l->owner & ~HAS_WAITERS;
+ * cmpxchg(l->owner, T1, NULL)
+ * ===> Success (l->owner = NULL)
+ *
+ * l->owner = owner
+ * ==> l->owner = T1
+ * }
+ *
+ * With the check for the waiter bit in place T3 on CPU2 will not
+ * overwrite. All tasks fiddling with the waiters bit are
+ * serialized by l->lock, so nothing else can modify the waiters
+ * bit. If the bit is set then nothing can change l->owner either
+ * so the simple RMW is safe. The cmpxchg() will simply fail if it
+ * happens in the middle of the RMW because the waiters bit is
+ * still set.
+ */
+ owner = READ_ONCE(*p);
+ if (owner & RT_MUTEX_HAS_WAITERS)
+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
}
static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
@@ -939,13 +1003,14 @@ static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
slowfn(lock, do_mig_dis);
}
-static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
- void (*slowfn)(struct rt_mutex *lock))
+static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock,
+ int (*slowfn)(struct rt_mutex *lock))
{
- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
- else
- slowfn(lock);
+ return 0;
+ }
+ return slowfn(lock);
}
#ifdef CONFIG_SMP
/*
@@ -1086,7 +1151,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
/*
* Slow path to release a rt_mutex spin_lock style
*/
-static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
+static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
{
unsigned long flags;
WAKE_Q(wake_q);
@@ -1101,7 +1166,7 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
if (!rt_mutex_has_waiters(lock)) {
lock->owner = NULL;
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- return;
+ return 0;
}
mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
@@ -1112,6 +1177,33 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
/* Undo pi boosting.when necessary */
rt_mutex_adjust_prio(current);
+ return 0;
+}
+
+static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock)
+{
+ unsigned long flags;
+ WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+ debug_rt_mutex_unlock(lock);
+
+ rt_mutex_deadlock_account_unlock(current);
+
+ if (!rt_mutex_has_waiters(lock)) {
+ lock->owner = NULL;
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ return 0;
+ }
+
+ mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
+
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ wake_up_q(&wake_q);
+ wake_up_q_sleeper(&wake_sleeper_q);
+ return 1;
}
void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
@@ -1166,6 +1258,17 @@ void __lockfunc rt_spin_unlock(spinlock_t *lock)
}
EXPORT_SYMBOL(rt_spin_unlock);
+int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock)
+{
+ int ret;
+
+ /* NOTE: we always pass in '1' for nested, for simplicity */
+ spin_release(&lock->dep_map, 1, _RET_IP_);
+ ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost);
+ migrate_enable();
+ return ret;
+}
+
void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
{
rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
diff --git a/kernel/kernel/locking/rtmutex_common.h b/kernel/kernel/locking/rtmutex_common.h
index 289f062f2..f457c7574 100644
--- a/kernel/kernel/locking/rtmutex_common.h
+++ b/kernel/kernel/locking/rtmutex_common.h
@@ -76,8 +76,9 @@ task_top_pi_waiter(struct task_struct *p)
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
- return (struct task_struct *)
- ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
+ unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+
+ return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL);
}
/*