Merge tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux...
[linux-2.6-microblaze.git] / kernel / locking / rtmutex.c
index ac8fb2f..6bb116c 100644 (file)
@@ -8,6 +8,11 @@
  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  *  Copyright (C) 2006 Esben Nielsen
+ * Adaptive Spinlocks:
+ *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
+ *                                  and Peter Morreale,
+ * Adaptive Spinlocks simplification:
+ *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  *
  *  See Documentation/locking/rt-mutex-design.rst for details.
  */
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/ww_mutex.h>
 
 #include "rtmutex_common.h"
 
+#ifndef WW_RT
+# define build_ww_mutex()      (false)
+# define ww_container_of(rtm)  NULL
+
+static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
+                                       struct rt_mutex *lock,
+                                       struct ww_acquire_ctx *ww_ctx)
+{
+       return 0;
+}
+
+static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
+                                           struct ww_acquire_ctx *ww_ctx)
+{
+}
+
+static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
+                                         struct ww_acquire_ctx *ww_ctx)
+{
+}
+
+static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
+                                       struct rt_mutex_waiter *waiter,
+                                       struct ww_acquire_ctx *ww_ctx)
+{
+       return 0;
+}
+
+#else
+# define build_ww_mutex()      (true)
+# define ww_container_of(rtm)  container_of(rtm, struct ww_mutex, base)
+# include "ww_mutex.h"
+#endif
+
 /*
  * lock->owner state tracking:
  *
@@ -303,12 +343,53 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
        return 1;
 }
 
+static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+                                 struct rt_mutex_waiter *top_waiter)
+{
+       if (rt_mutex_waiter_less(waiter, top_waiter))
+               return true;
+
+#ifdef RT_MUTEX_BUILD_SPINLOCKS
+       /*
+        * Note that RT tasks are excluded from same priority (lateral)
+        * steals to prevent the introduction of an unbounded latency.
+        */
+       if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
+               return false;
+
+       return rt_mutex_waiter_equal(waiter, top_waiter);
+#else
+       return false;
+#endif
+}
+
 #define __node_2_waiter(node) \
        rb_entry((node), struct rt_mutex_waiter, tree_entry)
 
 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
 {
-       return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b));
+       struct rt_mutex_waiter *aw = __node_2_waiter(a);
+       struct rt_mutex_waiter *bw = __node_2_waiter(b);
+
+       if (rt_mutex_waiter_less(aw, bw))
+               return 1;
+
+       if (!build_ww_mutex())
+               return 0;
+
+       if (rt_mutex_waiter_less(bw, aw))
+               return 0;
+
+       /* NOTE: relies on waiter->ww_ctx being set before insertion */
+       if (aw->ww_ctx) {
+               if (!bw->ww_ctx)
+                       return 1;
+
+               return (signed long)(aw->ww_ctx->stamp -
+                                    bw->ww_ctx->stamp) < 0;
+       }
+
+       return 0;
 }
 
 static __always_inline void
@@ -575,6 +656,31 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
        if (next_lock != waiter->lock)
                goto out_unlock_pi;
 
+       /*
+        * There could be 'spurious' loops in the lock graph due to ww_mutex,
+        * consider:
+        *
+        *   P1: A, ww_A, ww_B
+        *   P2: ww_B, ww_A
+        *   P3: A
+        *
+        * P3 should not return -EDEADLK because it gets trapped in the cycle
+        * created by P1 and P2 (which will resolve -- and runs into
+        * max_lock_depth above). Therefore disable detect_deadlock such that
+        * the below termination condition can trigger once all relevant tasks
+        * are boosted.
+        *
+        * Even when we start with ww_mutex we can disable deadlock detection,
+        * since we would supress a ww_mutex induced deadlock at [6] anyway.
+        * Supressing it here however is not sufficient since we might still
+        * hit [6] due to adjustment driven iteration.
+        *
+        * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
+        * utterly fail to report it; lockdep should.
+        */
+       if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
+               detect_deadlock = false;
+
        /*
         * Drop out, when the task has no waiters. Note,
         * top_waiter can be NULL, when we are in the deboosting
@@ -636,8 +742,21 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
         * walk, we detected a deadlock.
         */
        if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
-               raw_spin_unlock(&lock->wait_lock);
                ret = -EDEADLK;
+
+               /*
+                * When the deadlock is due to ww_mutex; also see above. Don't
+                * report the deadlock and instead let the ww_mutex wound/die
+                * logic pick which of the contending threads gets -EDEADLK.
+                *
+                * NOTE: assumes the cycle only contains a single ww_class; any
+                * other configuration and we fail to report; also, see
+                * lockdep.
+                */
+               if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
+                       ret = 0;
+
+               raw_spin_unlock(&lock->wait_lock);
                goto out_unlock_pi;
        }
 
@@ -876,19 +995,21 @@ try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
         * trylock attempt.
         */
        if (waiter) {
-               /*
-                * If waiter is not the highest priority waiter of
-                * @lock, give up.
-                */
-               if (waiter != rt_mutex_top_waiter(lock))
-                       return 0;
+               struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
 
                /*
-                * We can acquire the lock. Remove the waiter from the
-                * lock waiters tree.
+                * If waiter is the highest priority waiter of @lock,
+                * or allowed to steal it, take it over.
                 */
-               rt_mutex_dequeue(lock, waiter);
-
+               if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
+                       /*
+                        * We can acquire the lock. Remove the waiter from the
+                        * lock waiters tree.
+                        */
+                       rt_mutex_dequeue(lock, waiter);
+               } else {
+                       return 0;
+               }
        } else {
                /*
                 * If the lock has waiters already we check whether @task is
@@ -899,13 +1020,9 @@ try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
                 * not need to be dequeued.
                 */
                if (rt_mutex_has_waiters(lock)) {
-                       /*
-                        * If @task->prio is greater than or equal to
-                        * the top waiter priority (kernel view),
-                        * @task lost.
-                        */
-                       if (!rt_mutex_waiter_less(task_to_waiter(task),
-                                                 rt_mutex_top_waiter(lock)))
+                       /* Check whether the trylock can steal it. */
+                       if (!rt_mutex_steal(task_to_waiter(task),
+                                           rt_mutex_top_waiter(lock)))
                                return 0;
 
                        /*
@@ -961,6 +1078,7 @@ takeit:
 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
                                           struct rt_mutex_waiter *waiter,
                                           struct task_struct *task,
+                                          struct ww_acquire_ctx *ww_ctx,
                                           enum rtmutex_chainwalk chwalk)
 {
        struct task_struct *owner = rt_mutex_owner(lock);
@@ -996,6 +1114,21 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
 
        raw_spin_unlock(&task->pi_lock);
 
+       if (build_ww_mutex() && ww_ctx) {
+               struct rt_mutex *rtm;
+
+               /* Check whether the waiter should back out immediately */
+               rtm = container_of(lock, struct rt_mutex, rtmutex);
+               res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+               if (res) {
+                       raw_spin_lock(&task->pi_lock);
+                       rt_mutex_dequeue(lock, waiter);
+                       task->pi_blocked_on = NULL;
+                       raw_spin_unlock(&task->pi_lock);
+                       return res;
+               }
+       }
+
        if (!owner)
                return 0;
 
@@ -1212,6 +1345,53 @@ static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
        rt_mutex_slowunlock(lock);
 }
 
+#ifdef CONFIG_SMP
+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+                                 struct rt_mutex_waiter *waiter,
+                                 struct task_struct *owner)
+{
+       bool res = true;
+
+       rcu_read_lock();
+       for (;;) {
+               /* If owner changed, trylock again. */
+               if (owner != rt_mutex_owner(lock))
+                       break;
+               /*
+                * Ensure that @owner is dereferenced after checking that
+                * the lock owner still matches @owner. If that fails,
+                * @owner might point to freed memory. If it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+               /*
+                * Stop spinning when:
+                *  - the lock owner has been scheduled out
+                *  - current is not longer the top waiter
+                *  - current is requested to reschedule (redundant
+                *    for CONFIG_PREEMPT_RCU=y)
+                *  - the VCPU on which owner runs is preempted
+                */
+               if (!owner->on_cpu || need_resched() ||
+                   rt_mutex_waiter_is_top_waiter(lock, waiter) ||
+                   vcpu_is_preempted(task_cpu(owner))) {
+                       res = false;
+                       break;
+               }
+               cpu_relax();
+       }
+       rcu_read_unlock();
+       return res;
+}
+#else
+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+                                 struct rt_mutex_waiter *waiter,
+                                 struct task_struct *owner)
+{
+       return false;
+}
+#endif
+
 #ifdef RT_MUTEX_BUILD_MUTEX
 /*
  * Functions required for:
@@ -1281,6 +1461,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
 /**
  * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
  * @lock:               the rt_mutex to take
+ * @ww_ctx:             WW mutex context pointer
  * @state:              the state the task should block in (TASK_INTERRUPTIBLE
  *                      or TASK_UNINTERRUPTIBLE)
  * @timeout:            the pre-initialized and started timer, or NULL for none
@@ -1289,10 +1470,13 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
  * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
+                                          struct ww_acquire_ctx *ww_ctx,
                                           unsigned int state,
                                           struct hrtimer_sleeper *timeout,
                                           struct rt_mutex_waiter *waiter)
 {
+       struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+       struct task_struct *owner;
        int ret = 0;
 
        for (;;) {
@@ -1309,9 +1493,20 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
                        break;
                }
 
+               if (build_ww_mutex() && ww_ctx) {
+                       ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
+                       if (ret)
+                               break;
+               }
+
+               if (waiter == rt_mutex_top_waiter(lock))
+                       owner = rt_mutex_owner(lock);
+               else
+                       owner = NULL;
                raw_spin_unlock_irq(&lock->wait_lock);
 
-               schedule();
+               if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
+                       schedule();
 
                raw_spin_lock_irq(&lock->wait_lock);
                set_current_state(state);
@@ -1331,6 +1526,9 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
        if (res != -EDEADLOCK || detect_deadlock)
                return;
 
+       if (build_ww_mutex() && w->ww_ctx)
+               return;
+
        /*
         * Yell loudly and stop the task right here.
         */
@@ -1344,31 +1542,46 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
 /**
  * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
  * @lock:      The rtmutex to block lock
+ * @ww_ctx:    WW mutex context pointer
  * @state:     The task state for sleeping
  * @chwalk:    Indicator whether full or partial chainwalk is requested
  * @waiter:    Initializer waiter for blocking
  */
 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
+                                      struct ww_acquire_ctx *ww_ctx,
                                       unsigned int state,
                                       enum rtmutex_chainwalk chwalk,
                                       struct rt_mutex_waiter *waiter)
 {
+       struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+       struct ww_mutex *ww = ww_container_of(rtm);
        int ret;
 
        lockdep_assert_held(&lock->wait_lock);
 
        /* Try to acquire the lock again: */
-       if (try_to_take_rt_mutex(lock, current, NULL))
+       if (try_to_take_rt_mutex(lock, current, NULL)) {
+               if (build_ww_mutex() && ww_ctx) {
+                       __ww_mutex_check_waiters(rtm, ww_ctx);
+                       ww_mutex_lock_acquired(ww, ww_ctx);
+               }
                return 0;
+       }
 
        set_current_state(state);
 
-       ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
-
+       ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
        if (likely(!ret))
-               ret = rt_mutex_slowlock_block(lock, state, NULL, waiter);
-
-       if (unlikely(ret)) {
+               ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
+
+       if (likely(!ret)) {
+               /* acquired the lock */
+               if (build_ww_mutex() && ww_ctx) {
+                       if (!ww_ctx->is_wait_die)
+                               __ww_mutex_check_waiters(rtm, ww_ctx);
+                       ww_mutex_lock_acquired(ww, ww_ctx);
+               }
+       } else {
                __set_current_state(TASK_RUNNING);
                remove_waiter(lock, waiter);
                rt_mutex_handle_deadlock(ret, chwalk, waiter);
@@ -1383,14 +1596,17 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 }
 
 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
+                                            struct ww_acquire_ctx *ww_ctx,
                                             unsigned int state)
 {
        struct rt_mutex_waiter waiter;
        int ret;
 
        rt_mutex_init_waiter(&waiter);
+       waiter.ww_ctx = ww_ctx;
 
-       ret = __rt_mutex_slowlock(lock, state, RT_MUTEX_MIN_CHAINWALK, &waiter);
+       ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
+                                 &waiter);
 
        debug_rt_mutex_free_waiter(&waiter);
        return ret;
@@ -1399,9 +1615,11 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
 /*
  * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
  * @lock:      The rtmutex to block lock
+ * @ww_ctx:    WW mutex context pointer
  * @state:     The task state for sleeping
  */
 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
+                                    struct ww_acquire_ctx *ww_ctx,
                                     unsigned int state)
 {
        unsigned long flags;
@@ -1416,7 +1634,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
         * irqsave/restore variants.
         */
        raw_spin_lock_irqsave(&lock->wait_lock, flags);
-       ret = __rt_mutex_slowlock_locked(lock, state);
+       ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
        raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        return ret;
@@ -1428,7 +1646,7 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
        if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
                return 0;
 
-       return rt_mutex_slowlock(lock, state);
+       return rt_mutex_slowlock(lock, NULL, state);
 }
 #endif /* RT_MUTEX_BUILD_MUTEX */
 
@@ -1444,6 +1662,7 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
 {
        struct rt_mutex_waiter waiter;
+       struct task_struct *owner;
 
        lockdep_assert_held(&lock->wait_lock);
 
@@ -1455,16 +1674,21 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
        /* Save current state and set state to TASK_RTLOCK_WAIT */
        current_save_and_set_rtlock_wait_state();
 
-       task_blocks_on_rt_mutex(lock, &waiter, current, RT_MUTEX_MIN_CHAINWALK);
+       task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
 
        for (;;) {
                /* Try to acquire the lock again */
                if (try_to_take_rt_mutex(lock, current, &waiter))
                        break;
 
+               if (&waiter == rt_mutex_top_waiter(lock))
+                       owner = rt_mutex_owner(lock);
+               else
+                       owner = NULL;
                raw_spin_unlock_irq(&lock->wait_lock);
 
-               schedule_rtlock();
+               if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
+                       schedule_rtlock();
 
                raw_spin_lock_irq(&lock->wait_lock);
                set_current_state(TASK_RTLOCK_WAIT);