futex: Restructure futex_requeue()
[linux-2.6-microblaze.git] / kernel / futex.c
index 2ecb075..6cb6b5d 100644 (file)
@@ -179,7 +179,7 @@ struct futex_pi_state {
        /*
         * The PI object:
         */
-       struct rt_mutex pi_mutex;
+       struct rt_mutex_base pi_mutex;
 
        struct task_struct *owner;
        refcount_t refcount;
@@ -1299,27 +1299,6 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
        return 0;
 }
 
-static int lookup_pi_state(u32 __user *uaddr, u32 uval,
-                          struct futex_hash_bucket *hb,
-                          union futex_key *key, struct futex_pi_state **ps,
-                          struct task_struct **exiting)
-{
-       struct futex_q *top_waiter = futex_top_waiter(hb, key);
-
-       /*
-        * If there is a waiter on that futex, validate it and
-        * attach to the pi_state when the validation succeeds.
-        */
-       if (top_waiter)
-               return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
-
-       /*
-        * We are the first waiter - try to look up the owner based on
-        * @uval and attach to it.
-        */
-       return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
-}
-
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
 {
        int err;
@@ -1354,7 +1333,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
  *  -  1 - acquired the lock;
  *  - <0 - error
  *
- * The hb->lock and futex_key refs shall be held by the caller.
+ * The hb->lock must be held by the caller.
  *
  * @exiting is only set when the return value is -EBUSY. If so, this holds
  * a refcount on the exiting task on return and the caller needs to drop it
@@ -1493,11 +1472,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
  */
 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
 {
-       u32 curval, newval;
        struct rt_mutex_waiter *top_waiter;
        struct task_struct *new_owner;
        bool postunlock = false;
-       DEFINE_WAKE_Q(wake_q);
+       DEFINE_RT_WAKE_Q(wqh);
+       u32 curval, newval;
        int ret = 0;
 
        top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
@@ -1549,14 +1528,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
                 * not fail.
                 */
                pi_state_update_owner(pi_state, new_owner);
-               postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+               postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
        }
 
 out_unlock:
        raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
        if (postunlock)
-               rt_mutex_postunlock(&wake_q);
+               rt_mutex_postunlock(&wqh);
 
        return ret;
 }
@@ -1879,6 +1858,13 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
        if (!top_waiter)
                return 0;
 
+       /*
+        * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
+        * and waiting on the 'waitqueue' futex which is always !PI.
+        */
+       if (!top_waiter->rt_waiter || top_waiter->pi_state)
+               ret = -EINVAL;
+
        /* Ensure we requeue to the expected futex. */
        if (!match_futex(top_waiter->requeue_pi_key, key2))
                return -EINVAL;
@@ -2014,7 +2000,7 @@ retry_private:
                }
        }
 
-       if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+       if (requeue_pi) {
                struct task_struct *exiting = NULL;
 
                /*
@@ -2031,8 +2017,8 @@ retry_private:
                 * At this point the top_waiter has either taken uaddr2 or is
                 * waiting on it.  If the former, then the pi_state will not
                 * exist yet, look it up one more time to ensure we have a
-                * reference to it. If the lock was taken, ret contains the
-                * vpid of the top waiter task.
+                * reference to it. If the lock was taken, @ret contains the
+                * VPID of the top waiter task.
                 * If the lock was not taken, we have pi_state and an initial
                 * refcount on it. In case of an error we have nothing.
                 */
@@ -2040,19 +2026,25 @@ retry_private:
                        WARN_ON(pi_state);
                        task_count++;
                        /*
-                        * If we acquired the lock, then the user space value
-                        * of uaddr2 should be vpid. It cannot be changed by
-                        * the top waiter as it is blocked on hb2 lock if it
-                        * tries to do so. If something fiddled with it behind
-                        * our back the pi state lookup might unearth it. So
-                        * we rather use the known value than rereading and
-                        * handing potential crap to lookup_pi_state.
+                        * If futex_proxy_trylock_atomic() acquired the
+                        * user space futex, then the user space value
+                        * @uaddr2 has been set to the @hb1's top waiter
+                        * task VPID. This task is guaranteed to be alive
+                        * and cannot be exiting because it is either
+                        * sleeping or blocked on @hb2 lock.
                         *
-                        * If that call succeeds then we have pi_state and an
-                        * initial refcount on it.
+                        * The @uaddr2 futex cannot have waiters either as
+                        * otherwise futex_proxy_trylock_atomic() would not
+                        * have succeeded.
+                        *
+                        * In order to requeue waiters to @hb2, pi state is
+                        * required. Hand in the VPID value (@ret) and
+                        * allocate PI state with an initial refcount on
+                        * it.
                         */
-                       ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
-                                             &pi_state, &exiting);
+                       ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state,
+                                                &exiting);
+                       WARN_ON(ret);
                }
 
                switch (ret) {
@@ -2112,18 +2104,17 @@ retry_private:
                        break;
                }
 
-               /*
-                * Wake nr_wake waiters.  For requeue_pi, if we acquired the
-                * lock, we already woke the top_waiter.  If not, it will be
-                * woken by futex_unlock_pi().
-                */
-               if (++task_count <= nr_wake && !requeue_pi) {
-                       mark_wake_futex(&wake_q, this);
+               /* Plain futexes just wake or requeue and are done */
+               if (!requeue_pi) {
+                       if (++task_count <= nr_wake)
+                               mark_wake_futex(&wake_q, this);
+                       else
+                               requeue_futex(this, hb1, hb2, &key2);
                        continue;
                }
 
                /* Ensure we requeue to the expected futex for requeue_pi. */
-               if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
+               if (!match_futex(this->requeue_pi_key, &key2)) {
                        ret = -EINVAL;
                        break;
                }
@@ -2131,54 +2122,51 @@ retry_private:
                /*
                 * Requeue nr_requeue waiters and possibly one more in the case
                 * of requeue_pi if we couldn't acquire the lock atomically.
+                *
+                * Prepare the waiter to take the rt_mutex. Take a refcount
+                * on the pi_state and store the pointer in the futex_q
+                * object of the waiter.
                 */
-               if (requeue_pi) {
+               get_pi_state(pi_state);
+               this->pi_state = pi_state;
+               ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+                                               this->rt_waiter, this->task);
+               if (ret == 1) {
                        /*
-                        * Prepare the waiter to take the rt_mutex. Take a
-                        * refcount on the pi_state and store the pointer in
-                        * the futex_q object of the waiter.
+                        * We got the lock. We do neither drop the refcount
+                        * on pi_state nor clear this->pi_state because the
+                        * waiter needs the pi_state for cleaning up the
+                        * user space value. It will drop the refcount
+                        * after doing so.
                         */
-                       get_pi_state(pi_state);
-                       this->pi_state = pi_state;
-                       ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
-                                                       this->rt_waiter,
-                                                       this->task);
-                       if (ret == 1) {
-                               /*
-                                * We got the lock. We do neither drop the
-                                * refcount on pi_state nor clear
-                                * this->pi_state because the waiter needs the
-                                * pi_state for cleaning up the user space
-                                * value. It will drop the refcount after
-                                * doing so.
-                                */
-                               requeue_pi_wake_futex(this, &key2, hb2);
-                               continue;
-                       } else if (ret) {
-                               /*
-                                * rt_mutex_start_proxy_lock() detected a
-                                * potential deadlock when we tried to queue
-                                * that waiter. Drop the pi_state reference
-                                * which we took above and remove the pointer
-                                * to the state from the waiters futex_q
-                                * object.
-                                */
-                               this->pi_state = NULL;
-                               put_pi_state(pi_state);
-                               /*
-                                * We stop queueing more waiters and let user
-                                * space deal with the mess.
-                                */
-                               break;
-                       }
+                       requeue_pi_wake_futex(this, &key2, hb2);
+                       task_count++;
+                       continue;
+               } else if (ret) {
+                       /*
+                        * rt_mutex_start_proxy_lock() detected a potential
+                        * deadlock when we tried to queue that waiter.
+                        * Drop the pi_state reference which we took above
+                        * and remove the pointer to the state from the
+                        * waiters futex_q object.
+                        */
+                       this->pi_state = NULL;
+                       put_pi_state(pi_state);
+                       /*
+                        * We stop queueing more waiters and let user space
+                        * deal with the mess.
+                        */
+                       break;
                }
+               /* Waiter is queued, move it to hb2 */
                requeue_futex(this, hb1, hb2, &key2);
+               task_count++;
        }
 
        /*
-        * We took an extra initial reference to the pi_state either
-        * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
-        * need to drop it here again.
+        * We took an extra initial reference to the pi_state either in
+        * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
+        * to drop it here again.
         */
        put_pi_state(pi_state);
 
@@ -2357,7 +2345,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
         * Modifying pi_state _before_ the user space value would leave the
         * pi_state in an inconsistent state when we fault here, because we
         * need to drop the locks to handle the fault. This might be observed
-        * in the PID check in lookup_pi_state.
+        * in the PID checks when attaching to PI state .
         */
 retry:
        if (!argowner) {
@@ -2614,8 +2602,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
  *
  * Setup the futex_q and locate the hash_bucket.  Get the futex value and
  * compare it with the expected value.  Handle atomic faults internally.
- * Return with the hb lock held and a q.key reference on success, and unlocked
- * with no q.key reference on failure.
+ * Return with the hb lock held on success, and unlocked on failure.
  *
  * Return:
  *  -  0 - uaddr contains val and hb has been locked;
@@ -2693,8 +2680,8 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
                               current->timer_slack_ns);
 retry:
        /*
-        * Prepare to wait on uaddr. On success, holds hb lock and increments
-        * q.key refs.
+        * Prepare to wait on uaddr. On success, it holds hb->lock and q
+        * is initialized.
         */
        ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
@@ -2705,7 +2692,6 @@ retry:
 
        /* If we were woken (and unqueued), we succeeded, whatever. */
        ret = 0;
-       /* unqueue_me() drops q.key ref */
        if (!unqueue_me(&q))
                goto out;
        ret = -ETIMEDOUT;
@@ -3198,8 +3184,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
        q.requeue_pi_key = &key2;
 
        /*
-        * Prepare to wait on uaddr. On success, increments q.key (key1) ref
-        * count.
+        * Prepare to wait on uaddr. On success, it holds hb->lock and q
+        * is initialized.
         */
        ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
@@ -3228,9 +3214,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
         * In order for us to be here, we know our q.key == key2, and since
         * we took the hb->lock above, we also know that futex_requeue() has
         * completed and we no longer have to concern ourselves with a wakeup
-        * race with the atomic proxy lock acquisition by the requeue code. The
-        * futex_requeue dropped our key1 reference and incremented our key2
-        * reference count.
+        * race with the atomic proxy lock acquisition by the requeue code.
         */
 
        /*
@@ -3254,7 +3238,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                        ret = ret < 0 ? ret : 0;
                }
        } else {
-               struct rt_mutex *pi_mutex;
+               struct rt_mutex_base *pi_mutex;
 
                /*
                 * We have been woken up by futex_unlock_pi(), a timeout, or a