kernel/locking/rtmutex.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   4  *
   5  * started by Ingo Molnar and Thomas Gleixner.
   6  *
   7  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   8  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   9  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  10  *  Copyright (C) 2006 Esben Nielsen
  11  *
  12  *  See Documentation/locking/rt-mutex-design.rst for details.
  13  */
  14 #include <linux/sched.h>
  15 #include <linux/sched/debug.h>
  16 #include <linux/sched/deadline.h>
  17 #include <linux/sched/signal.h>
  18 #include <linux/sched/rt.h>
  19 #include <linux/sched/wake_q.h>
  20
  21 #include "rtmutex_common.h"
  22
  23 /*
  24  * lock->owner state tracking:
  25  *
  26  * lock->owner holds the task_struct pointer of the owner. Bit 0
  27  * is used to keep track of the "lock has waiters" state.
  28  *
  29  * owner        bit0
  30  * NULL         0       lock is free (fast acquire possible)
  31  * NULL         1       lock is free and has waiters and the top waiter
  32  *                              is going to take the lock*
  33  * taskpointer  0       lock is held (fast release possible)
  34  * taskpointer  1       lock is held and has waiters**
  35  *
  36  * The fast atomic compare exchange based acquire and release is only
  37  * possible when bit 0 of lock->owner is 0.
  38  *
  39  * (*) It also can be a transitional state when grabbing the lock
  40  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  41  * we need to set the bit0 before looking at the lock, and the owner may be
  42  * NULL in this small time, hence this can be a transitional state.
  43  *
  44  * (**) There is a small time when bit 0 is set but there are no
  45  * waiters. This can happen when grabbing the lock in the slow path.
  46  * To prevent a cmpxchg of the owner releasing the lock, we need to
  47  * set this bit before looking at the lock.
  48  */
  49
  50 static __always_inline void
  51 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
  52 {
  53         unsigned long val = (unsigned long)owner;
  54
  55         if (rt_mutex_has_waiters(lock))
  56                 val |= RT_MUTEX_HAS_WAITERS;
  57
  58         WRITE_ONCE(lock->owner, (struct task_struct *)val);
  59 }
  60
  61 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
  62 {
  63         lock->owner = (struct task_struct *)
  64                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  65 }
  66
  67 static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
  68 {
  69         unsigned long owner, *p = (unsigned long *) &lock->owner;
  70
  71         if (rt_mutex_has_waiters(lock))
  72                 return;
  73
  74         /*
  75          * The rbtree has no waiters enqueued, now make sure that the
  76          * lock->owner still has the waiters bit set, otherwise the
  77          * following can happen:
  78          *
  79          * CPU 0        CPU 1           CPU2
  80          * l->owner=T1
  81          *              rt_mutex_lock(l)
  82          *              lock(l->lock)
  83          *              l->owner = T1 | HAS_WAITERS;
  84          *              enqueue(T2)
  85          *              boost()
  86          *                unlock(l->lock)
  87          *              block()
  88          *
  89          *                              rt_mutex_lock(l)
  90          *                              lock(l->lock)
  91          *                              l->owner = T1 | HAS_WAITERS;
  92          *                              enqueue(T3)
  93          *                              boost()
  94          *                                unlock(l->lock)
  95          *                              block()
  96          *              signal(->T2)    signal(->T3)
  97          *              lock(l->lock)
  98          *              dequeue(T2)
  99          *              deboost()
 100          *                unlock(l->lock)
 101          *                              lock(l->lock)
 102          *                              dequeue(T3)
 103          *                               ==> wait list is empty
 104          *                              deboost()
 105          *                               unlock(l->lock)
 106          *              lock(l->lock)
 107          *              fixup_rt_mutex_waiters()
 108          *                if (wait_list_empty(l) {
 109          *                  l->owner = owner
 110          *                  owner = l->owner & ~HAS_WAITERS;
 111          *                    ==> l->owner = T1
 112          *                }
 113          *                              lock(l->lock)
 114          * rt_mutex_unlock(l)           fixup_rt_mutex_waiters()
 115          *                                if (wait_list_empty(l) {
 116          *                                  owner = l->owner & ~HAS_WAITERS;
 117          * cmpxchg(l->owner, T1, NULL)
 118          *  ===> Success (l->owner = NULL)
 119          *
 120          *                                  l->owner = owner
 121          *                                    ==> l->owner = T1
 122          *                                }
 123          *
 124          * With the check for the waiter bit in place T3 on CPU2 will not
 125          * overwrite. All tasks fiddling with the waiters bit are
 126          * serialized by l->lock, so nothing else can modify the waiters
 127          * bit. If the bit is set then nothing can change l->owner either
 128          * so the simple RMW is safe. The cmpxchg() will simply fail if it
 129          * happens in the middle of the RMW because the waiters bit is
 130          * still set.
 131          */
 132         owner = READ_ONCE(*p);
 133         if (owner & RT_MUTEX_HAS_WAITERS)
 134                 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
 135 }
 136
 137 /*
 138  * We can speed up the acquire/release, if there's no debugging state to be
 139  * set up.
 140  */
 141 #ifndef CONFIG_DEBUG_RT_MUTEXES
 142 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
 143                                                      struct task_struct *old,
 144                                                      struct task_struct *new)
 145 {
 146         return try_cmpxchg_acquire(&lock->owner, &old, new);
 147 }
 148
 149 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
 150                                                      struct task_struct *old,
 151                                                      struct task_struct *new)
 152 {
 153         return try_cmpxchg_release(&lock->owner, &old, new);
 154 }
 155
 156 /*
 157  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
 158  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
 159  * relaxed semantics suffice.
 160  */
 161 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
 162 {
 163         unsigned long owner, *p = (unsigned long *) &lock->owner;
 164
 165         do {
 166                 owner = *p;
 167         } while (cmpxchg_relaxed(p, owner,
 168                                  owner | RT_MUTEX_HAS_WAITERS) != owner);
 169 }
 170
 171 /*
 172  * Safe fastpath aware unlock:
 173  * 1) Clear the waiters bit
 174  * 2) Drop lock->wait_lock
 175  * 3) Try to unlock the lock with cmpxchg
 176  */
 177 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
 178                                                  unsigned long flags)
 179         __releases(lock->wait_lock)
 180 {
 181         struct task_struct *owner = rt_mutex_owner(lock);
 182
 183         clear_rt_mutex_waiters(lock);
 184         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 185         /*
 186          * If a new waiter comes in between the unlock and the cmpxchg
 187          * we have two situations:
 188          *
 189          * unlock(wait_lock);
 190          *                                      lock(wait_lock);
 191          * cmpxchg(p, owner, 0) == owner
 192          *                                      mark_rt_mutex_waiters(lock);
 193          *                                      acquire(lock);
 194          * or:
 195          *
 196          * unlock(wait_lock);
 197          *                                      lock(wait_lock);
 198          *                                      mark_rt_mutex_waiters(lock);
 199          *
 200          * cmpxchg(p, owner, 0) != owner
 201          *                                      enqueue_waiter();
 202          *                                      unlock(wait_lock);
 203          * lock(wait_lock);
 204          * wake waiter();
 205          * unlock(wait_lock);
 206          *                                      lock(wait_lock);
 207          *                                      acquire(lock);
 208          */
 209         return rt_mutex_cmpxchg_release(lock, owner, NULL);
 210 }
 211
 212 #else
 213 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
 214                                                      struct task_struct *old,
 215                                                      struct task_struct *new)
 216 {
 217         return false;
 218
 219 }
 220
 221 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
 222                                                      struct task_struct *old,
 223                                                      struct task_struct *new)
 224 {
 225         return false;
 226 }
 227
 228 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
 229 {
 230         lock->owner = (struct task_struct *)
 231                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 232 }
 233
 234 /*
 235  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 236  */
 237 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
 238                                                  unsigned long flags)
 239         __releases(lock->wait_lock)
 240 {
 241         lock->owner = NULL;
 242         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 243         return true;
 244 }
 245 #endif
 246
 247 static __always_inline int __waiter_prio(struct task_struct *task)
 248 {
 249         int prio = task->prio;
 250
 251         if (!rt_prio(prio))
 252                 return DEFAULT_PRIO;
 253
 254         return prio;
 255 }
 256
 257 static __always_inline void
 258 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
 259 {
 260         waiter->prio = __waiter_prio(task);
 261         waiter->deadline = task->dl.deadline;
 262 }
 263
 264 /*
 265  * Only use with rt_mutex_waiter_{less,equal}()
 266  */
 267 #define task_to_waiter(p)       \
 268         &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
 269
 270 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
 271                                                 struct rt_mutex_waiter *right)
 272 {
 273         if (left->prio < right->prio)
 274                 return 1;
 275
 276         /*
 277          * If both waiters have dl_prio(), we check the deadlines of the
 278          * associated tasks.
 279          * If left waiter has a dl_prio(), and we didn't return 1 above,
 280          * then right waiter has a dl_prio() too.
 281          */
 282         if (dl_prio(left->prio))
 283                 return dl_time_before(left->deadline, right->deadline);
 284
 285         return 0;
 286 }
 287
 288 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
 289                                                  struct rt_mutex_waiter *right)
 290 {
 291         if (left->prio != right->prio)
 292                 return 0;
 293
 294         /*
 295          * If both waiters have dl_prio(), we check the deadlines of the
 296          * associated tasks.
 297          * If left waiter has a dl_prio(), and we didn't return 0 above,
 298          * then right waiter has a dl_prio() too.
 299          */
 300         if (dl_prio(left->prio))
 301                 return left->deadline == right->deadline;
 302
 303         return 1;
 304 }
 305
 306 #define __node_2_waiter(node) \
 307         rb_entry((node), struct rt_mutex_waiter, tree_entry)
 308
 309 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
 310 {
 311         return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b));
 312 }
 313
 314 static __always_inline void
 315 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
 316 {
 317         rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
 318 }
 319
 320 static __always_inline void
 321 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
 322 {
 323         if (RB_EMPTY_NODE(&waiter->tree_entry))
 324                 return;
 325
 326         rb_erase_cached(&waiter->tree_entry, &lock->waiters);
 327         RB_CLEAR_NODE(&waiter->tree_entry);
 328 }
 329
 330 #define __node_2_pi_waiter(node) \
 331         rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
 332
 333 static __always_inline bool
 334 __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
 335 {
 336         return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
 337 }
 338
 339 static __always_inline void
 340 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 341 {
 342         rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
 343 }
 344
 345 static __always_inline void
 346 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 347 {
 348         if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
 349                 return;
 350
 351         rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
 352         RB_CLEAR_NODE(&waiter->pi_tree_entry);
 353 }
 354
 355 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
 356 {
 357         struct task_struct *pi_task = NULL;
 358
 359         lockdep_assert_held(&p->pi_lock);
 360
 361         if (task_has_pi_waiters(p))
 362                 pi_task = task_top_pi_waiter(p)->task;
 363
 364         rt_mutex_setprio(p, pi_task);
 365 }
 366
 367 /* RT mutex specific wake_q wrappers */
 368 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
 369                                                 struct rt_mutex_waiter *w)
 370 {
 371         if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) {
 372                 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
 373                         WARN_ON_ONCE(wqh->rtlock_task);
 374                 get_task_struct(w->task);
 375                 wqh->rtlock_task = w->task;
 376         } else {
 377                 wake_q_add(&wqh->head, w->task);
 378         }
 379 }
 380
 381 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
 382 {
 383         if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
 384                 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
 385                 put_task_struct(wqh->rtlock_task);
 386                 wqh->rtlock_task = NULL;
 387         }
 388
 389         if (!wake_q_empty(&wqh->head))
 390                 wake_up_q(&wqh->head);
 391
 392         /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
 393         preempt_enable();
 394 }
 395
 396 /*
 397  * Deadlock detection is conditional:
 398  *
 399  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
 400  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
 401  *
 402  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
 403  * conducted independent of the detect argument.
 404  *
 405  * If the waiter argument is NULL this indicates the deboost path and
 406  * deadlock detection is disabled independent of the detect argument
 407  * and the config settings.
 408  */
 409 static __always_inline bool
 410 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
 411                               enum rtmutex_chainwalk chwalk)
 412 {
 413         if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
 414                 return waiter != NULL;
 415         return chwalk == RT_MUTEX_FULL_CHAINWALK;
 416 }
 417
 418 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
 419 {
 420         return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
 421 }
 422
 423 /*
 424  * Adjust the priority chain. Also used for deadlock detection.
 425  * Decreases task's usage by one - may thus free the task.
 426  *
 427  * @task:       the task owning the mutex (owner) for which a chain walk is
 428  *              probably needed
 429  * @chwalk:     do we have to carry out deadlock detection?
 430  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
 431  *              things for a task that has just got its priority adjusted, and
 432  *              is waiting on a mutex)
 433  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
 434  *              we dropped its pi_lock. Is never dereferenced, only used for
 435  *              comparison to detect lock chain changes.
 436  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
 437  *              its priority to the mutex owner (can be NULL in the case
 438  *              depicted above or if the top waiter is gone away and we are
 439  *              actually deboosting the owner)
 440  * @top_task:   the current top waiter
 441  *
 442  * Returns 0 or -EDEADLK.
 443  *
 444  * Chain walk basics and protection scope
 445  *
 446  * [R] refcount on task
 447  * [P] task->pi_lock held
 448  * [L] rtmutex->wait_lock held
 449  *
 450  * Step Description                             Protected by
 451  *      function arguments:
 452  *      @task                                   [R]
 453  *      @orig_lock if != NULL                   @top_task is blocked on it
 454  *      @next_lock                              Unprotected. Cannot be
 455  *                                              dereferenced. Only used for
 456  *                                              comparison.
 457  *      @orig_waiter if != NULL                 @top_task is blocked on it
 458  *      @top_task                               current, or in case of proxy
 459  *                                              locking protected by calling
 460  *                                              code
 461  *      again:
 462  *        loop_sanity_check();
 463  *      retry:
 464  * [1]    lock(task->pi_lock);                  [R] acquire [P]
 465  * [2]    waiter = task->pi_blocked_on;         [P]
 466  * [3]    check_exit_conditions_1();            [P]
 467  * [4]    lock = waiter->lock;                  [P]
 468  * [5]    if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
 469  *          unlock(task->pi_lock);              release [P]
 470  *          goto retry;
 471  *        }
 472  * [6]    check_exit_conditions_2();            [P] + [L]
 473  * [7]    requeue_lock_waiter(lock, waiter);    [P] + [L]
 474  * [8]    unlock(task->pi_lock);                release [P]
 475  *        put_task_struct(task);                release [R]
 476  * [9]    check_exit_conditions_3();            [L]
 477  * [10]   task = owner(lock);                   [L]
 478  *        get_task_struct(task);                [L] acquire [R]
 479  *        lock(task->pi_lock);                  [L] acquire [P]
 480  * [11]   requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
 481  * [12]   check_exit_conditions_4();            [P] + [L]
 482  * [13]   unlock(task->pi_lock);                release [P]
 483  *        unlock(lock->wait_lock);              release [L]
 484  *        goto again;
 485  */
 486 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
 487                                               enum rtmutex_chainwalk chwalk,
 488                                               struct rt_mutex_base *orig_lock,
 489                                               struct rt_mutex_base *next_lock,
 490                                               struct rt_mutex_waiter *orig_waiter,
 491                                               struct task_struct *top_task)
 492 {
 493         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 494         struct rt_mutex_waiter *prerequeue_top_waiter;
 495         int ret = 0, depth = 0;
 496         struct rt_mutex_base *lock;
 497         bool detect_deadlock;
 498         bool requeue = true;
 499
 500         detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
 501
 502         /*
 503          * The (de)boosting is a step by step approach with a lot of
 504          * pitfalls. We want this to be preemptible and we want hold a
 505          * maximum of two locks per step. So we have to check
 506          * carefully whether things change under us.
 507          */
 508  again:
 509         /*
 510          * We limit the lock chain length for each invocation.
 511          */
 512         if (++depth > max_lock_depth) {
 513                 static int prev_max;
 514
 515                 /*
 516                  * Print this only once. If the admin changes the limit,
 517                  * print a new message when reaching the limit again.
 518                  */
 519                 if (prev_max != max_lock_depth) {
 520                         prev_max = max_lock_depth;
 521                         printk(KERN_WARNING "Maximum lock depth %d reached "
 522                                "task: %s (%d)\n", max_lock_depth,
 523                                top_task->comm, task_pid_nr(top_task));
 524                 }
 525                 put_task_struct(task);
 526
 527                 return -EDEADLK;
 528         }
 529
 530         /*
 531          * We are fully preemptible here and only hold the refcount on
 532          * @task. So everything can have changed under us since the
 533          * caller or our own code below (goto retry/again) dropped all
 534          * locks.
 535          */
 536  retry:
 537         /*
 538          * [1] Task cannot go away as we did a get_task() before !
 539          */
 540         raw_spin_lock_irq(&task->pi_lock);
 541
 542         /*
 543          * [2] Get the waiter on which @task is blocked on.
 544          */
 545         waiter = task->pi_blocked_on;
 546
 547         /*
 548          * [3] check_exit_conditions_1() protected by task->pi_lock.
 549          */
 550
 551         /*
 552          * Check whether the end of the boosting chain has been
 553          * reached or the state of the chain has changed while we
 554          * dropped the locks.
 555          */
 556         if (!waiter)
 557                 goto out_unlock_pi;
 558
 559         /*
 560          * Check the orig_waiter state. After we dropped the locks,
 561          * the previous owner of the lock might have released the lock.
 562          */
 563         if (orig_waiter && !rt_mutex_owner(orig_lock))
 564                 goto out_unlock_pi;
 565
 566         /*
 567          * We dropped all locks after taking a refcount on @task, so
 568          * the task might have moved on in the lock chain or even left
 569          * the chain completely and blocks now on an unrelated lock or
 570          * on @orig_lock.
 571          *
 572          * We stored the lock on which @task was blocked in @next_lock,
 573          * so we can detect the chain change.
 574          */
 575         if (next_lock != waiter->lock)
 576                 goto out_unlock_pi;
 577
 578         /*
 579          * Drop out, when the task has no waiters. Note,
 580          * top_waiter can be NULL, when we are in the deboosting
 581          * mode!
 582          */
 583         if (top_waiter) {
 584                 if (!task_has_pi_waiters(task))
 585                         goto out_unlock_pi;
 586                 /*
 587                  * If deadlock detection is off, we stop here if we
 588                  * are not the top pi waiter of the task. If deadlock
 589                  * detection is enabled we continue, but stop the
 590                  * requeueing in the chain walk.
 591                  */
 592                 if (top_waiter != task_top_pi_waiter(task)) {
 593                         if (!detect_deadlock)
 594                                 goto out_unlock_pi;
 595                         else
 596                                 requeue = false;
 597                 }
 598         }
 599
 600         /*
 601          * If the waiter priority is the same as the task priority
 602          * then there is no further priority adjustment necessary.  If
 603          * deadlock detection is off, we stop the chain walk. If its
 604          * enabled we continue, but stop the requeueing in the chain
 605          * walk.
 606          */
 607         if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
 608                 if (!detect_deadlock)
 609                         goto out_unlock_pi;
 610                 else
 611                         requeue = false;
 612         }
 613
 614         /*
 615          * [4] Get the next lock
 616          */
 617         lock = waiter->lock;
 618         /*
 619          * [5] We need to trylock here as we are holding task->pi_lock,
 620          * which is the reverse lock order versus the other rtmutex
 621          * operations.
 622          */
 623         if (!raw_spin_trylock(&lock->wait_lock)) {
 624                 raw_spin_unlock_irq(&task->pi_lock);
 625                 cpu_relax();
 626                 goto retry;
 627         }
 628
 629         /*
 630          * [6] check_exit_conditions_2() protected by task->pi_lock and
 631          * lock->wait_lock.
 632          *
 633          * Deadlock detection. If the lock is the same as the original
 634          * lock which caused us to walk the lock chain or if the
 635          * current lock is owned by the task which initiated the chain
 636          * walk, we detected a deadlock.
 637          */
 638         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 639                 raw_spin_unlock(&lock->wait_lock);
 640                 ret = -EDEADLK;
 641                 goto out_unlock_pi;
 642         }
 643
 644         /*
 645          * If we just follow the lock chain for deadlock detection, no
 646          * need to do all the requeue operations. To avoid a truckload
 647          * of conditionals around the various places below, just do the
 648          * minimum chain walk checks.
 649          */
 650         if (!requeue) {
 651                 /*
 652                  * No requeue[7] here. Just release @task [8]
 653                  */
 654                 raw_spin_unlock(&task->pi_lock);
 655                 put_task_struct(task);
 656
 657                 /*
 658                  * [9] check_exit_conditions_3 protected by lock->wait_lock.
 659                  * If there is no owner of the lock, end of chain.
 660                  */
 661                 if (!rt_mutex_owner(lock)) {
 662                         raw_spin_unlock_irq(&lock->wait_lock);
 663                         return 0;
 664                 }
 665
 666                 /* [10] Grab the next task, i.e. owner of @lock */
 667                 task = get_task_struct(rt_mutex_owner(lock));
 668                 raw_spin_lock(&task->pi_lock);
 669
 670                 /*
 671                  * No requeue [11] here. We just do deadlock detection.
 672                  *
 673                  * [12] Store whether owner is blocked
 674                  * itself. Decision is made after dropping the locks
 675                  */
 676                 next_lock = task_blocked_on_lock(task);
 677                 /*
 678                  * Get the top waiter for the next iteration
 679                  */
 680                 top_waiter = rt_mutex_top_waiter(lock);
 681
 682                 /* [13] Drop locks */
 683                 raw_spin_unlock(&task->pi_lock);
 684                 raw_spin_unlock_irq(&lock->wait_lock);
 685
 686                 /* If owner is not blocked, end of chain. */
 687                 if (!next_lock)
 688                         goto out_put_task;
 689                 goto again;
 690         }
 691
 692         /*
 693          * Store the current top waiter before doing the requeue
 694          * operation on @lock. We need it for the boost/deboost
 695          * decision below.
 696          */
 697         prerequeue_top_waiter = rt_mutex_top_waiter(lock);
 698
 699         /* [7] Requeue the waiter in the lock waiter tree. */
 700         rt_mutex_dequeue(lock, waiter);
 701
 702         /*
 703          * Update the waiter prio fields now that we're dequeued.
 704          *
 705          * These values can have changed through either:
 706          *
 707          *   sys_sched_set_scheduler() / sys_sched_setattr()
 708          *
 709          * or
 710          *
 711          *   DL CBS enforcement advancing the effective deadline.
 712          *
 713          * Even though pi_waiters also uses these fields, and that tree is only
 714          * updated in [11], we can do this here, since we hold [L], which
 715          * serializes all pi_waiters access and rb_erase() does not care about
 716          * the values of the node being removed.
 717          */
 718         waiter_update_prio(waiter, task);
 719
 720         rt_mutex_enqueue(lock, waiter);
 721
 722         /* [8] Release the task */
 723         raw_spin_unlock(&task->pi_lock);
 724         put_task_struct(task);
 725
 726         /*
 727          * [9] check_exit_conditions_3 protected by lock->wait_lock.
 728          *
 729          * We must abort the chain walk if there is no lock owner even
 730          * in the dead lock detection case, as we have nothing to
 731          * follow here. This is the end of the chain we are walking.
 732          */
 733         if (!rt_mutex_owner(lock)) {
 734                 /*
 735                  * If the requeue [7] above changed the top waiter,
 736                  * then we need to wake the new top waiter up to try
 737                  * to get the lock.
 738                  */
 739                 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
 740                         wake_up_state(waiter->task, waiter->wake_state);
 741                 raw_spin_unlock_irq(&lock->wait_lock);
 742                 return 0;
 743         }
 744
 745         /* [10] Grab the next task, i.e. the owner of @lock */
 746         task = get_task_struct(rt_mutex_owner(lock));
 747         raw_spin_lock(&task->pi_lock);
 748
 749         /* [11] requeue the pi waiters if necessary */
 750         if (waiter == rt_mutex_top_waiter(lock)) {
 751                 /*
 752                  * The waiter became the new top (highest priority)
 753                  * waiter on the lock. Replace the previous top waiter
 754                  * in the owner tasks pi waiters tree with this waiter
 755                  * and adjust the priority of the owner.
 756                  */
 757                 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
 758                 rt_mutex_enqueue_pi(task, waiter);
 759                 rt_mutex_adjust_prio(task);
 760
 761         } else if (prerequeue_top_waiter == waiter) {
 762                 /*
 763                  * The waiter was the top waiter on the lock, but is
 764                  * no longer the top priority waiter. Replace waiter in
 765                  * the owner tasks pi waiters tree with the new top
 766                  * (highest priority) waiter and adjust the priority
 767                  * of the owner.
 768                  * The new top waiter is stored in @waiter so that
 769                  * @waiter == @top_waiter evaluates to true below and
 770                  * we continue to deboost the rest of the chain.
 771                  */
 772                 rt_mutex_dequeue_pi(task, waiter);
 773                 waiter = rt_mutex_top_waiter(lock);
 774                 rt_mutex_enqueue_pi(task, waiter);
 775                 rt_mutex_adjust_prio(task);
 776         } else {
 777                 /*
 778                  * Nothing changed. No need to do any priority
 779                  * adjustment.
 780                  */
 781         }
 782
 783         /*
 784          * [12] check_exit_conditions_4() protected by task->pi_lock
 785          * and lock->wait_lock. The actual decisions are made after we
 786          * dropped the locks.
 787          *
 788          * Check whether the task which owns the current lock is pi
 789          * blocked itself. If yes we store a pointer to the lock for
 790          * the lock chain change detection above. After we dropped
 791          * task->pi_lock next_lock cannot be dereferenced anymore.
 792          */
 793         next_lock = task_blocked_on_lock(task);
 794         /*
 795          * Store the top waiter of @lock for the end of chain walk
 796          * decision below.
 797          */
 798         top_waiter = rt_mutex_top_waiter(lock);
 799
 800         /* [13] Drop the locks */
 801         raw_spin_unlock(&task->pi_lock);
 802         raw_spin_unlock_irq(&lock->wait_lock);
 803
 804         /*
 805          * Make the actual exit decisions [12], based on the stored
 806          * values.
 807          *
 808          * We reached the end of the lock chain. Stop right here. No
 809          * point to go back just to figure that out.
 810          */
 811         if (!next_lock)
 812                 goto out_put_task;
 813
 814         /*
 815          * If the current waiter is not the top waiter on the lock,
 816          * then we can stop the chain walk here if we are not in full
 817          * deadlock detection mode.
 818          */
 819         if (!detect_deadlock && waiter != top_waiter)
 820                 goto out_put_task;
 821
 822         goto again;
 823
 824  out_unlock_pi:
 825         raw_spin_unlock_irq(&task->pi_lock);
 826  out_put_task:
 827         put_task_struct(task);
 828
 829         return ret;
 830 }
 831
 832 /*
 833  * Try to take an rt-mutex
 834  *
 835  * Must be called with lock->wait_lock held and interrupts disabled
 836  *
 837  * @lock:   The lock to be acquired.
 838  * @task:   The task which wants to acquire the lock
 839  * @waiter: The waiter that is queued to the lock's wait tree if the
 840  *          callsite called task_blocked_on_lock(), otherwise NULL
 841  */
 842 static int __sched
 843 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
 844                      struct rt_mutex_waiter *waiter)
 845 {
 846         lockdep_assert_held(&lock->wait_lock);
 847
 848         /*
 849          * Before testing whether we can acquire @lock, we set the
 850          * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
 851          * other tasks which try to modify @lock into the slow path
 852          * and they serialize on @lock->wait_lock.
 853          *
 854          * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
 855          * as explained at the top of this file if and only if:
 856          *
 857          * - There is a lock owner. The caller must fixup the
 858          *   transient state if it does a trylock or leaves the lock
 859          *   function due to a signal or timeout.
 860          *
 861          * - @task acquires the lock and there are no other
 862          *   waiters. This is undone in rt_mutex_set_owner(@task) at
 863          *   the end of this function.
 864          */
 865         mark_rt_mutex_waiters(lock);
 866
 867         /*
 868          * If @lock has an owner, give up.
 869          */
 870         if (rt_mutex_owner(lock))
 871                 return 0;
 872
 873         /*
 874          * If @waiter != NULL, @task has already enqueued the waiter
 875          * into @lock waiter tree. If @waiter == NULL then this is a
 876          * trylock attempt.
 877          */
 878         if (waiter) {
 879                 /*
 880                  * If waiter is not the highest priority waiter of
 881                  * @lock, give up.
 882                  */
 883                 if (waiter != rt_mutex_top_waiter(lock))
 884                         return 0;
 885
 886                 /*
 887                  * We can acquire the lock. Remove the waiter from the
 888                  * lock waiters tree.
 889                  */
 890                 rt_mutex_dequeue(lock, waiter);
 891
 892         } else {
 893                 /*
 894                  * If the lock has waiters already we check whether @task is
 895                  * eligible to take over the lock.
 896                  *
 897                  * If there are no other waiters, @task can acquire
 898                  * the lock.  @task->pi_blocked_on is NULL, so it does
 899                  * not need to be dequeued.
 900                  */
 901                 if (rt_mutex_has_waiters(lock)) {
 902                         /*
 903                          * If @task->prio is greater than or equal to
 904                          * the top waiter priority (kernel view),
 905                          * @task lost.
 906                          */
 907                         if (!rt_mutex_waiter_less(task_to_waiter(task),
 908                                                   rt_mutex_top_waiter(lock)))
 909                                 return 0;
 910
 911                         /*
 912                          * The current top waiter stays enqueued. We
 913                          * don't have to change anything in the lock
 914                          * waiters order.
 915                          */
 916                 } else {
 917                         /*
 918                          * No waiters. Take the lock without the
 919                          * pi_lock dance.@task->pi_blocked_on is NULL
 920                          * and we have no waiters to enqueue in @task
 921                          * pi waiters tree.
 922                          */
 923                         goto takeit;
 924                 }
 925         }
 926
 927         /*
 928          * Clear @task->pi_blocked_on. Requires protection by
 929          * @task->pi_lock. Redundant operation for the @waiter == NULL
 930          * case, but conditionals are more expensive than a redundant
 931          * store.
 932          */
 933         raw_spin_lock(&task->pi_lock);
 934         task->pi_blocked_on = NULL;
 935         /*
 936          * Finish the lock acquisition. @task is the new owner. If
 937          * other waiters exist we have to insert the highest priority
 938          * waiter into @task->pi_waiters tree.
 939          */
 940         if (rt_mutex_has_waiters(lock))
 941                 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
 942         raw_spin_unlock(&task->pi_lock);
 943
 944 takeit:
 945         /*
 946          * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
 947          * are still waiters or clears it.
 948          */
 949         rt_mutex_set_owner(lock, task);
 950
 951         return 1;
 952 }
 953
 954 /*
 955  * Task blocks on lock.
 956  *
 957  * Prepare waiter and propagate pi chain
 958  *
 959  * This must be called with lock->wait_lock held and interrupts disabled
 960  */
 961 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
 962                                            struct rt_mutex_waiter *waiter,
 963                                            struct task_struct *task,
 964                                            enum rtmutex_chainwalk chwalk)
 965 {
 966         struct task_struct *owner = rt_mutex_owner(lock);
 967         struct rt_mutex_waiter *top_waiter = waiter;
 968         struct rt_mutex_base *next_lock;
 969         int chain_walk = 0, res;
 970
 971         lockdep_assert_held(&lock->wait_lock);
 972
 973         /*
 974          * Early deadlock detection. We really don't want the task to
 975          * enqueue on itself just to untangle the mess later. It's not
 976          * only an optimization. We drop the locks, so another waiter
 977          * can come in before the chain walk detects the deadlock. So
 978          * the other will detect the deadlock and return -EDEADLOCK,
 979          * which is wrong, as the other waiter is not in a deadlock
 980          * situation.
 981          */
 982         if (owner == task)
 983                 return -EDEADLK;
 984
 985         raw_spin_lock(&task->pi_lock);
 986         waiter->task = task;
 987         waiter->lock = lock;
 988         waiter_update_prio(waiter, task);
 989
 990         /* Get the top priority waiter on the lock */
 991         if (rt_mutex_has_waiters(lock))
 992                 top_waiter = rt_mutex_top_waiter(lock);
 993         rt_mutex_enqueue(lock, waiter);
 994
 995         task->pi_blocked_on = waiter;
 996
 997         raw_spin_unlock(&task->pi_lock);
 998
 999         if (!owner)
1000                 return 0;
1001
1002         raw_spin_lock(&owner->pi_lock);
1003         if (waiter == rt_mutex_top_waiter(lock)) {
1004                 rt_mutex_dequeue_pi(owner, top_waiter);
1005                 rt_mutex_enqueue_pi(owner, waiter);
1006
1007                 rt_mutex_adjust_prio(owner);
1008                 if (owner->pi_blocked_on)
1009                         chain_walk = 1;
1010         } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1011                 chain_walk = 1;
1012         }
1013
1014         /* Store the lock on which owner is blocked or NULL */
1015         next_lock = task_blocked_on_lock(owner);
1016
1017         raw_spin_unlock(&owner->pi_lock);
1018         /*
1019          * Even if full deadlock detection is on, if the owner is not
1020          * blocked itself, we can avoid finding this out in the chain
1021          * walk.
1022          */
1023         if (!chain_walk || !next_lock)
1024                 return 0;
1025
1026         /*
1027          * The owner can't disappear while holding a lock,
1028          * so the owner struct is protected by wait_lock.
1029          * Gets dropped in rt_mutex_adjust_prio_chain()!
1030          */
1031         get_task_struct(owner);
1032
1033         raw_spin_unlock_irq(&lock->wait_lock);
1034
1035         res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1036                                          next_lock, waiter, task);
1037
1038         raw_spin_lock_irq(&lock->wait_lock);
1039
1040         return res;
1041 }
1042
1043 /*
1044  * Remove the top waiter from the current tasks pi waiter tree and
1045  * queue it up.
1046  *
1047  * Called with lock->wait_lock held and interrupts disabled.
1048  */
1049 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1050                                             struct rt_mutex_base *lock)
1051 {
1052         struct rt_mutex_waiter *waiter;
1053
1054         raw_spin_lock(&current->pi_lock);
1055
1056         waiter = rt_mutex_top_waiter(lock);
1057
1058         /*
1059          * Remove it from current->pi_waiters and deboost.
1060          *
1061          * We must in fact deboost here in order to ensure we call
1062          * rt_mutex_setprio() to update p->pi_top_task before the
1063          * task unblocks.
1064          */
1065         rt_mutex_dequeue_pi(current, waiter);
1066         rt_mutex_adjust_prio(current);
1067
1068         /*
1069          * As we are waking up the top waiter, and the waiter stays
1070          * queued on the lock until it gets the lock, this lock
1071          * obviously has waiters. Just set the bit here and this has
1072          * the added benefit of forcing all new tasks into the
1073          * slow path making sure no task of lower priority than
1074          * the top waiter can steal this lock.
1075          */
1076         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1077
1078         /*
1079          * We deboosted before waking the top waiter task such that we don't
1080          * run two tasks with the 'same' priority (and ensure the
1081          * p->pi_top_task pointer points to a blocked task). This however can
1082          * lead to priority inversion if we would get preempted after the
1083          * deboost but before waking our donor task, hence the preempt_disable()
1084          * before unlock.
1085          *
1086          * Pairs with preempt_enable() in rt_mutex_wake_up_q();
1087          */
1088         preempt_disable();
1089         rt_mutex_wake_q_add(wqh, waiter);
1090         raw_spin_unlock(&current->pi_lock);
1091 }
1092
1093 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1094 {
1095         int ret = try_to_take_rt_mutex(lock, current, NULL);
1096
1097         /*
1098          * try_to_take_rt_mutex() sets the lock waiters bit
1099          * unconditionally. Clean this up.
1100          */
1101         fixup_rt_mutex_waiters(lock);
1102
1103         return ret;
1104 }
1105
1106 /*
1107  * Slow path try-lock function:
1108  */
1109 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1110 {
1111         unsigned long flags;
1112         int ret;
1113
1114         /*
1115          * If the lock already has an owner we fail to get the lock.
1116          * This can be done without taking the @lock->wait_lock as
1117          * it is only being read, and this is a trylock anyway.
1118          */
1119         if (rt_mutex_owner(lock))
1120                 return 0;
1121
1122         /*
1123          * The mutex has currently no owner. Lock the wait lock and try to
1124          * acquire the lock. We use irqsave here to support early boot calls.
1125          */
1126         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1127
1128         ret = __rt_mutex_slowtrylock(lock);
1129
1130         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1131
1132         return ret;
1133 }
1134
1135 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1136 {
1137         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1138                 return 1;
1139
1140         return rt_mutex_slowtrylock(lock);
1141 }
1142
1143 /*
1144  * Slow path to release a rt-mutex.
1145  */
1146 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1147 {
1148         DEFINE_RT_WAKE_Q(wqh);
1149         unsigned long flags;
1150
1151         /* irqsave required to support early boot calls */
1152         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1153
1154         debug_rt_mutex_unlock(lock);
1155
1156         /*
1157          * We must be careful here if the fast path is enabled. If we
1158          * have no waiters queued we cannot set owner to NULL here
1159          * because of:
1160          *
1161          * foo->lock->owner = NULL;
1162          *                      rtmutex_lock(foo->lock);   <- fast path
1163          *                      free = atomic_dec_and_test(foo->refcnt);
1164          *                      rtmutex_unlock(foo->lock); <- fast path
1165          *                      if (free)
1166          *                              kfree(foo);
1167          * raw_spin_unlock(foo->lock->wait_lock);
1168          *
1169          * So for the fastpath enabled kernel:
1170          *
1171          * Nothing can set the waiters bit as long as we hold
1172          * lock->wait_lock. So we do the following sequence:
1173          *
1174          *      owner = rt_mutex_owner(lock);
1175          *      clear_rt_mutex_waiters(lock);
1176          *      raw_spin_unlock(&lock->wait_lock);
1177          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
1178          *              return;
1179          *      goto retry;
1180          *
1181          * The fastpath disabled variant is simple as all access to
1182          * lock->owner is serialized by lock->wait_lock:
1183          *
1184          *      lock->owner = NULL;
1185          *      raw_spin_unlock(&lock->wait_lock);
1186          */
1187         while (!rt_mutex_has_waiters(lock)) {
1188                 /* Drops lock->wait_lock ! */
1189                 if (unlock_rt_mutex_safe(lock, flags) == true)
1190                         return;
1191                 /* Relock the rtmutex and try again */
1192                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1193         }
1194
1195         /*
1196          * The wakeup next waiter path does not suffer from the above
1197          * race. See the comments there.
1198          *
1199          * Queue the next waiter for wakeup once we release the wait_lock.
1200          */
1201         mark_wakeup_next_waiter(&wqh, lock);
1202         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1203
1204         rt_mutex_wake_up_q(&wqh);
1205 }
1206
1207 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1208 {
1209         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1210                 return;
1211
1212         rt_mutex_slowunlock(lock);
1213 }
1214
1215 #ifdef RT_MUTEX_BUILD_MUTEX
1216 /*
1217  * Functions required for:
1218  *      - rtmutex, futex on all kernels
1219  *      - mutex and rwsem substitutions on RT kernels
1220  */
1221
1222 /*
1223  * Remove a waiter from a lock and give up
1224  *
1225  * Must be called with lock->wait_lock held and interrupts disabled. It must
1226  * have just failed to try_to_take_rt_mutex().
1227  */
1228 static void __sched remove_waiter(struct rt_mutex_base *lock,
1229                                   struct rt_mutex_waiter *waiter)
1230 {
1231         bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1232         struct task_struct *owner = rt_mutex_owner(lock);
1233         struct rt_mutex_base *next_lock;
1234
1235         lockdep_assert_held(&lock->wait_lock);
1236
1237         raw_spin_lock(&current->pi_lock);
1238         rt_mutex_dequeue(lock, waiter);
1239         current->pi_blocked_on = NULL;
1240         raw_spin_unlock(&current->pi_lock);
1241
1242         /*
1243          * Only update priority if the waiter was the highest priority
1244          * waiter of the lock and there is an owner to update.
1245          */
1246         if (!owner || !is_top_waiter)
1247                 return;
1248
1249         raw_spin_lock(&owner->pi_lock);
1250
1251         rt_mutex_dequeue_pi(owner, waiter);
1252
1253         if (rt_mutex_has_waiters(lock))
1254                 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1255
1256         rt_mutex_adjust_prio(owner);
1257
1258         /* Store the lock on which owner is blocked or NULL */
1259         next_lock = task_blocked_on_lock(owner);
1260
1261         raw_spin_unlock(&owner->pi_lock);
1262
1263         /*
1264          * Don't walk the chain, if the owner task is not blocked
1265          * itself.
1266          */
1267         if (!next_lock)
1268                 return;
1269
1270         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1271         get_task_struct(owner);
1272
1273         raw_spin_unlock_irq(&lock->wait_lock);
1274
1275         rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1276                                    next_lock, NULL, current);
1277
1278         raw_spin_lock_irq(&lock->wait_lock);
1279 }
1280
1281 /**
1282  * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1283  * @lock:                the rt_mutex to take
1284  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
1285  *                       or TASK_UNINTERRUPTIBLE)
1286  * @timeout:             the pre-initialized and started timer, or NULL for none
1287  * @waiter:              the pre-initialized rt_mutex_waiter
1288  *
1289  * Must be called with lock->wait_lock held and interrupts disabled
1290  */
1291 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1292                                            unsigned int state,
1293                                            struct hrtimer_sleeper *timeout,
1294                                            struct rt_mutex_waiter *waiter)
1295 {
1296         int ret = 0;
1297
1298         for (;;) {
1299                 /* Try to acquire the lock: */
1300                 if (try_to_take_rt_mutex(lock, current, waiter))
1301                         break;
1302
1303                 if (timeout && !timeout->task) {
1304                         ret = -ETIMEDOUT;
1305                         break;
1306                 }
1307                 if (signal_pending_state(state, current)) {
1308                         ret = -EINTR;
1309                         break;
1310                 }
1311
1312                 raw_spin_unlock_irq(&lock->wait_lock);
1313
1314                 schedule();
1315
1316                 raw_spin_lock_irq(&lock->wait_lock);
1317                 set_current_state(state);
1318         }
1319
1320         __set_current_state(TASK_RUNNING);
1321         return ret;
1322 }
1323
1324 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1325                                              struct rt_mutex_waiter *w)
1326 {
1327         /*
1328          * If the result is not -EDEADLOCK or the caller requested
1329          * deadlock detection, nothing to do here.
1330          */
1331         if (res != -EDEADLOCK || detect_deadlock)
1332                 return;
1333
1334         /*
1335          * Yell loudly and stop the task right here.
1336          */
1337         WARN(1, "rtmutex deadlock detected\n");
1338         while (1) {
1339                 set_current_state(TASK_INTERRUPTIBLE);
1340                 schedule();
1341         }
1342 }
1343
1344 /**
1345  * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1346  * @lock:       The rtmutex to block lock
1347  * @state:      The task state for sleeping
1348  * @chwalk:     Indicator whether full or partial chainwalk is requested
1349  * @waiter:     Initializer waiter for blocking
1350  */
1351 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1352                                        unsigned int state,
1353                                        enum rtmutex_chainwalk chwalk,
1354                                        struct rt_mutex_waiter *waiter)
1355 {
1356         int ret;
1357
1358         lockdep_assert_held(&lock->wait_lock);
1359
1360         /* Try to acquire the lock again: */
1361         if (try_to_take_rt_mutex(lock, current, NULL))
1362                 return 0;
1363
1364         set_current_state(state);
1365
1366         ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
1367
1368         if (likely(!ret))
1369                 ret = rt_mutex_slowlock_block(lock, state, NULL, waiter);
1370
1371         if (unlikely(ret)) {
1372                 __set_current_state(TASK_RUNNING);
1373                 remove_waiter(lock, waiter);
1374                 rt_mutex_handle_deadlock(ret, chwalk, waiter);
1375         }
1376
1377         /*
1378          * try_to_take_rt_mutex() sets the waiter bit
1379          * unconditionally. We might have to fix that up.
1380          */
1381         fixup_rt_mutex_waiters(lock);
1382         return ret;
1383 }
1384
1385 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1386                                              unsigned int state)
1387 {
1388         struct rt_mutex_waiter waiter;
1389         int ret;
1390
1391         rt_mutex_init_waiter(&waiter);
1392
1393         ret = __rt_mutex_slowlock(lock, state, RT_MUTEX_MIN_CHAINWALK, &waiter);
1394
1395         debug_rt_mutex_free_waiter(&waiter);
1396         return ret;
1397 }
1398
1399 /*
1400  * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1401  * @lock:       The rtmutex to block lock
1402  * @state:      The task state for sleeping
1403  */
1404 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1405                                      unsigned int state)
1406 {
1407         unsigned long flags;
1408         int ret;
1409
1410         /*
1411          * Technically we could use raw_spin_[un]lock_irq() here, but this can
1412          * be called in early boot if the cmpxchg() fast path is disabled
1413          * (debug, no architecture support). In this case we will acquire the
1414          * rtmutex with lock->wait_lock held. But we cannot unconditionally
1415          * enable interrupts in that early boot case. So we need to use the
1416          * irqsave/restore variants.
1417          */
1418         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1419         ret = __rt_mutex_slowlock_locked(lock, state);
1420         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1421
1422         return ret;
1423 }
1424
1425 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1426                                            unsigned int state)
1427 {
1428         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1429                 return 0;
1430
1431         return rt_mutex_slowlock(lock, state);
1432 }
1433 #endif /* RT_MUTEX_BUILD_MUTEX */
1434
1435 #ifdef RT_MUTEX_BUILD_SPINLOCKS
1436 /*
1437  * Functions required for spin/rw_lock substitution on RT kernels
1438  */
1439
1440 /**
1441  * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1442  * @lock:       The underlying RT mutex
1443  */
1444 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1445 {
1446         struct rt_mutex_waiter waiter;
1447
1448         lockdep_assert_held(&lock->wait_lock);
1449
1450         if (try_to_take_rt_mutex(lock, current, NULL))
1451                 return;
1452
1453         rt_mutex_init_rtlock_waiter(&waiter);
1454
1455         /* Save current state and set state to TASK_RTLOCK_WAIT */
1456         current_save_and_set_rtlock_wait_state();
1457
1458         task_blocks_on_rt_mutex(lock, &waiter, current, RT_MUTEX_MIN_CHAINWALK);
1459
1460         for (;;) {
1461                 /* Try to acquire the lock again */
1462                 if (try_to_take_rt_mutex(lock, current, &waiter))
1463                         break;
1464
1465                 raw_spin_unlock_irq(&lock->wait_lock);
1466
1467                 schedule_rtlock();
1468
1469                 raw_spin_lock_irq(&lock->wait_lock);
1470                 set_current_state(TASK_RTLOCK_WAIT);
1471         }
1472
1473         /* Restore the task state */
1474         current_restore_rtlock_saved_state();
1475
1476         /*
1477          * try_to_take_rt_mutex() sets the waiter bit unconditionally.
1478          * We might have to fix that up:
1479          */
1480         fixup_rt_mutex_waiters(lock);
1481         debug_rt_mutex_free_waiter(&waiter);
1482 }
1483
1484 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1485 {
1486         unsigned long flags;
1487
1488         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1489         rtlock_slowlock_locked(lock);
1490         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1491 }
1492
1493 #endif /* RT_MUTEX_BUILD_SPINLOCKS */