* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
+ * Adaptive Spinlocks:
+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
+ * and Peter Morreale,
+ * Adaptive Spinlocks simplification:
+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
*
* See Documentation/locking/rt-mutex-design.rst for details.
*/
#include <linux/sched/signal.h>
#include <linux/sched/rt.h>
#include <linux/sched/wake_q.h>
+#include <linux/ww_mutex.h>
#include "rtmutex_common.h"
+#ifndef WW_RT
+# define build_ww_mutex() (false)
+# define ww_container_of(rtm) NULL
+
+static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
+ struct rt_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ return 0;
+}
+
+static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+}
+
+static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+}
+
+static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ return 0;
+}
+
+#else
+# define build_ww_mutex() (true)
+# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
+# include "ww_mutex.h"
+#endif
+
/*
* lock->owner state tracking:
*
return 1;
}
+static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+ struct rt_mutex_waiter *top_waiter)
+{
+ if (rt_mutex_waiter_less(waiter, top_waiter))
+ return true;
+
+#ifdef RT_MUTEX_BUILD_SPINLOCKS
+ /*
+ * Note that RT tasks are excluded from same priority (lateral)
+ * steals to prevent the introduction of an unbounded latency.
+ */
+ if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
+ return false;
+
+ return rt_mutex_waiter_equal(waiter, top_waiter);
+#else
+ return false;
+#endif
+}
+
#define __node_2_waiter(node) \
rb_entry((node), struct rt_mutex_waiter, tree_entry)
static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
{
- return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b));
+ struct rt_mutex_waiter *aw = __node_2_waiter(a);
+ struct rt_mutex_waiter *bw = __node_2_waiter(b);
+
+ if (rt_mutex_waiter_less(aw, bw))
+ return 1;
+
+ if (!build_ww_mutex())
+ return 0;
+
+ if (rt_mutex_waiter_less(bw, aw))
+ return 0;
+
+ /* NOTE: relies on waiter->ww_ctx being set before insertion */
+ if (aw->ww_ctx) {
+ if (!bw->ww_ctx)
+ return 1;
+
+ return (signed long)(aw->ww_ctx->stamp -
+ bw->ww_ctx->stamp) < 0;
+ }
+
+ return 0;
}
static __always_inline void
if (next_lock != waiter->lock)
goto out_unlock_pi;
+ /*
+ * There could be 'spurious' loops in the lock graph due to ww_mutex,
+ * consider:
+ *
+ * P1: A, ww_A, ww_B
+ * P2: ww_B, ww_A
+ * P3: A
+ *
+ * P3 should not return -EDEADLK because it gets trapped in the cycle
+ * created by P1 and P2 (which will resolve -- and runs into
+ * max_lock_depth above). Therefore disable detect_deadlock such that
+ * the below termination condition can trigger once all relevant tasks
+ * are boosted.
+ *
+ * Even when we start with ww_mutex we can disable deadlock detection,
+ * since we would supress a ww_mutex induced deadlock at [6] anyway.
+ * Supressing it here however is not sufficient since we might still
+ * hit [6] due to adjustment driven iteration.
+ *
+ * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
+ * utterly fail to report it; lockdep should.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
+ detect_deadlock = false;
+
/*
* Drop out, when the task has no waiters. Note,
* top_waiter can be NULL, when we are in the deboosting
* walk, we detected a deadlock.
*/
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
- raw_spin_unlock(&lock->wait_lock);
ret = -EDEADLK;
+
+ /*
+ * When the deadlock is due to ww_mutex; also see above. Don't
+ * report the deadlock and instead let the ww_mutex wound/die
+ * logic pick which of the contending threads gets -EDEADLK.
+ *
+ * NOTE: assumes the cycle only contains a single ww_class; any
+ * other configuration and we fail to report; also, see
+ * lockdep.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
+ ret = 0;
+
+ raw_spin_unlock(&lock->wait_lock);
goto out_unlock_pi;
}
* trylock attempt.
*/
if (waiter) {
- /*
- * If waiter is not the highest priority waiter of
- * @lock, give up.
- */
- if (waiter != rt_mutex_top_waiter(lock))
- return 0;
+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
/*
- * We can acquire the lock. Remove the waiter from the
- * lock waiters tree.
+ * If waiter is the highest priority waiter of @lock,
+ * or allowed to steal it, take it over.
*/
- rt_mutex_dequeue(lock, waiter);
-
+ if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
+ /*
+ * We can acquire the lock. Remove the waiter from the
+ * lock waiters tree.
+ */
+ rt_mutex_dequeue(lock, waiter);
+ } else {
+ return 0;
+ }
} else {
/*
* If the lock has waiters already we check whether @task is
* not need to be dequeued.
*/
if (rt_mutex_has_waiters(lock)) {
- /*
- * If @task->prio is greater than or equal to
- * the top waiter priority (kernel view),
- * @task lost.
- */
- if (!rt_mutex_waiter_less(task_to_waiter(task),
- rt_mutex_top_waiter(lock)))
+ /* Check whether the trylock can steal it. */
+ if (!rt_mutex_steal(task_to_waiter(task),
+ rt_mutex_top_waiter(lock)))
return 0;
/*
static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
+ struct ww_acquire_ctx *ww_ctx,
enum rtmutex_chainwalk chwalk)
{
struct task_struct *owner = rt_mutex_owner(lock);
raw_spin_unlock(&task->pi_lock);
+ if (build_ww_mutex() && ww_ctx) {
+ struct rt_mutex *rtm;
+
+ /* Check whether the waiter should back out immediately */
+ rtm = container_of(lock, struct rt_mutex, rtmutex);
+ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+ if (res) {
+ raw_spin_lock(&task->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+ task->pi_blocked_on = NULL;
+ raw_spin_unlock(&task->pi_lock);
+ return res;
+ }
+ }
+
if (!owner)
return 0;
rt_mutex_slowunlock(lock);
}
+#ifdef CONFIG_SMP
+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *owner)
+{
+ bool res = true;
+
+ rcu_read_lock();
+ for (;;) {
+ /* If owner changed, trylock again. */
+ if (owner != rt_mutex_owner(lock))
+ break;
+ /*
+ * Ensure that @owner is dereferenced after checking that
+ * the lock owner still matches @owner. If that fails,
+ * @owner might point to freed memory. If it still matches,
+ * the rcu_read_lock() ensures the memory stays valid.
+ */
+ barrier();
+ /*
+ * Stop spinning when:
+ * - the lock owner has been scheduled out
+ * - current is not longer the top waiter
+ * - current is requested to reschedule (redundant
+ * for CONFIG_PREEMPT_RCU=y)
+ * - the VCPU on which owner runs is preempted
+ */
+ if (!owner->on_cpu || need_resched() ||
+ rt_mutex_waiter_is_top_waiter(lock, waiter) ||
+ vcpu_is_preempted(task_cpu(owner))) {
+ res = false;
+ break;
+ }
+ cpu_relax();
+ }
+ rcu_read_unlock();
+ return res;
+}
+#else
+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *owner)
+{
+ return false;
+}
+#endif
+
#ifdef RT_MUTEX_BUILD_MUTEX
/*
* Functions required for:
/**
* rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
+ * @ww_ctx: WW mutex context pointer
* @state: the state the task should block in (TASK_INTERRUPTIBLE
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* Must be called with lock->wait_lock held and interrupts disabled
*/
static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
unsigned int state,
struct hrtimer_sleeper *timeout,
struct rt_mutex_waiter *waiter)
{
+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+ struct task_struct *owner;
int ret = 0;
for (;;) {
break;
}
+ if (build_ww_mutex() && ww_ctx) {
+ ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
+ if (ret)
+ break;
+ }
+
+ if (waiter == rt_mutex_top_waiter(lock))
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
raw_spin_unlock_irq(&lock->wait_lock);
- schedule();
+ if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
+ schedule();
raw_spin_lock_irq(&lock->wait_lock);
set_current_state(state);
if (res != -EDEADLOCK || detect_deadlock)
return;
+ if (build_ww_mutex() && w->ww_ctx)
+ return;
+
/*
* Yell loudly and stop the task right here.
*/
/**
* __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
* @lock: The rtmutex to block lock
+ * @ww_ctx: WW mutex context pointer
* @state: The task state for sleeping
* @chwalk: Indicator whether full or partial chainwalk is requested
* @waiter: Initializer waiter for blocking
*/
static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
unsigned int state,
enum rtmutex_chainwalk chwalk,
struct rt_mutex_waiter *waiter)
{
+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+ struct ww_mutex *ww = ww_container_of(rtm);
int ret;
lockdep_assert_held(&lock->wait_lock);
/* Try to acquire the lock again: */
- if (try_to_take_rt_mutex(lock, current, NULL))
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+ if (build_ww_mutex() && ww_ctx) {
+ __ww_mutex_check_waiters(rtm, ww_ctx);
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ }
return 0;
+ }
set_current_state(state);
- ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
-
+ ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
if (likely(!ret))
- ret = rt_mutex_slowlock_block(lock, state, NULL, waiter);
-
- if (unlikely(ret)) {
+ ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
+
+ if (likely(!ret)) {
+ /* acquired the lock */
+ if (build_ww_mutex() && ww_ctx) {
+ if (!ww_ctx->is_wait_die)
+ __ww_mutex_check_waiters(rtm, ww_ctx);
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ }
+ } else {
__set_current_state(TASK_RUNNING);
remove_waiter(lock, waiter);
rt_mutex_handle_deadlock(ret, chwalk, waiter);
}
static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
unsigned int state)
{
struct rt_mutex_waiter waiter;
int ret;
rt_mutex_init_waiter(&waiter);
+ waiter.ww_ctx = ww_ctx;
- ret = __rt_mutex_slowlock(lock, state, RT_MUTEX_MIN_CHAINWALK, &waiter);
+ ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
+ &waiter);
debug_rt_mutex_free_waiter(&waiter);
return ret;
/*
* rt_mutex_slowlock - Locking slowpath invoked when fast path fails
* @lock: The rtmutex to block lock
+ * @ww_ctx: WW mutex context pointer
* @state: The task state for sleeping
*/
static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
+ struct ww_acquire_ctx *ww_ctx,
unsigned int state)
{
unsigned long flags;
* irqsave/restore variants.
*/
raw_spin_lock_irqsave(&lock->wait_lock, flags);
- ret = __rt_mutex_slowlock_locked(lock, state);
+ ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return ret;
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 0;
- return rt_mutex_slowlock(lock, state);
+ return rt_mutex_slowlock(lock, NULL, state);
}
#endif /* RT_MUTEX_BUILD_MUTEX */
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
{
struct rt_mutex_waiter waiter;
+ struct task_struct *owner;
lockdep_assert_held(&lock->wait_lock);
/* Save current state and set state to TASK_RTLOCK_WAIT */
current_save_and_set_rtlock_wait_state();
- task_blocks_on_rt_mutex(lock, &waiter, current, RT_MUTEX_MIN_CHAINWALK);
+ task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
for (;;) {
/* Try to acquire the lock again */
if (try_to_take_rt_mutex(lock, current, &waiter))
break;
+ if (&waiter == rt_mutex_top_waiter(lock))
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
raw_spin_unlock_irq(&lock->wait_lock);
- schedule_rtlock();
+ if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
+ schedule_rtlock();
raw_spin_lock_irq(&lock->wait_lock);
set_current_state(TASK_RTLOCK_WAIT);