futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op
[linux-2.6-microblaze.git] / kernel / futex.c
index dfc86e9..8a310e2 100644 (file)
@@ -276,10 +276,10 @@ static struct {
 static struct {
        struct fault_attr attr;
 
-       u32 ignore_private;
+       bool ignore_private;
 } fail_futex = {
        .attr = FAULT_ATTR_INITIALIZER,
-       .ignore_private = 0,
+       .ignore_private = false,
 };
 
 static int __init setup_fail_futex(char *str)
@@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void)
 }
 
 /*
+ * Drops a reference to the pi_state object and frees or caches it
+ * when the last reference is gone.
+ *
  * Must be called with the hb lock held.
  */
-static void free_pi_state(struct futex_pi_state *pi_state)
+static void put_pi_state(struct futex_pi_state *pi_state)
 {
        if (!pi_state)
                return;
@@ -1706,31 +1709,35 @@ retry_private:
                 * exist yet, look it up one more time to ensure we have a
                 * reference to it. If the lock was taken, ret contains the
                 * vpid of the top waiter task.
+                * If the lock was not taken, we have pi_state and an initial
+                * refcount on it. In case of an error we have nothing.
                 */
                if (ret > 0) {
                        WARN_ON(pi_state);
                        drop_count++;
                        task_count++;
                        /*
-                        * If we acquired the lock, then the user
-                        * space value of uaddr2 should be vpid. It
-                        * cannot be changed by the top waiter as it
-                        * is blocked on hb2 lock if it tries to do
-                        * so. If something fiddled with it behind our
-                        * back the pi state lookup might unearth
-                        * it. So we rather use the known value than
-                        * rereading and handing potential crap to
-                        * lookup_pi_state.
+                        * If we acquired the lock, then the user space value
+                        * of uaddr2 should be vpid. It cannot be changed by
+                        * the top waiter as it is blocked on hb2 lock if it
+                        * tries to do so. If something fiddled with it behind
+                        * our back the pi state lookup might unearth it. So
+                        * we rather use the known value than rereading and
+                        * handing potential crap to lookup_pi_state.
+                        *
+                        * If that call succeeds then we have pi_state and an
+                        * initial refcount on it.
                         */
                        ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
                }
 
                switch (ret) {
                case 0:
+                       /* We hold a reference on the pi state. */
                        break;
+
+                       /* If the above failed, then pi_state is NULL */
                case -EFAULT:
-                       free_pi_state(pi_state);
-                       pi_state = NULL;
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
                        put_futex_key(&key2);
@@ -1746,8 +1753,6 @@ retry_private:
                         *   exit to complete.
                         * - The user space value changed.
                         */
-                       free_pi_state(pi_state);
-                       pi_state = NULL;
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
                        put_futex_key(&key2);
@@ -1801,30 +1806,58 @@ retry_private:
                 * of requeue_pi if we couldn't acquire the lock atomically.
                 */
                if (requeue_pi) {
-                       /* Prepare the waiter to take the rt_mutex. */
+                       /*
+                        * Prepare the waiter to take the rt_mutex. Take a
+                        * refcount on the pi_state and store the pointer in
+                        * the futex_q object of the waiter.
+                        */
                        atomic_inc(&pi_state->refcount);
                        this->pi_state = pi_state;
                        ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
                                                        this->rt_waiter,
                                                        this->task);
                        if (ret == 1) {
-                               /* We got the lock. */
+                               /*
+                                * We got the lock. We do neither drop the
+                                * refcount on pi_state nor clear
+                                * this->pi_state because the waiter needs the
+                                * pi_state for cleaning up the user space
+                                * value. It will drop the refcount after
+                                * doing so.
+                                */
                                requeue_pi_wake_futex(this, &key2, hb2);
                                drop_count++;
                                continue;
                        } else if (ret) {
-                               /* -EDEADLK */
+                               /*
+                                * rt_mutex_start_proxy_lock() detected a
+                                * potential deadlock when we tried to queue
+                                * that waiter. Drop the pi_state reference
+                                * which we took above and remove the pointer
+                                * to the state from the waiters futex_q
+                                * object.
+                                */
                                this->pi_state = NULL;
-                               free_pi_state(pi_state);
-                               goto out_unlock;
+                               put_pi_state(pi_state);
+                               /*
+                                * We stop queueing more waiters and let user
+                                * space deal with the mess.
+                                */
+                               break;
                        }
                }
                requeue_futex(this, hb1, hb2, &key2);
                drop_count++;
        }
 
+       /*
+        * We took an extra initial reference to the pi_state either
+        * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
+        * need to drop it here again.
+        */
+       put_pi_state(pi_state);
+
 out_unlock:
-       free_pi_state(pi_state);
        double_unlock_hb(hb1, hb2);
        wake_up_q(&wake_q);
        hb_waiters_dec(hb2);
@@ -1973,7 +2006,7 @@ static void unqueue_me_pi(struct futex_q *q)
        __unqueue_futex(q);
 
        BUG_ON(!q->pi_state);
-       free_pi_state(q->pi_state);
+       put_pi_state(q->pi_state);
        q->pi_state = NULL;
 
        spin_unlock(q->lock_ptr);
@@ -2755,6 +2788,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                if (q.pi_state && (q.pi_state->owner != current)) {
                        spin_lock(q.lock_ptr);
                        ret = fixup_pi_state_owner(uaddr2, &q, current);
+                       /*
+                        * Drop the reference to the pi state which
+                        * the requeue_pi() code acquired for us.
+                        */
+                       put_pi_state(q.pi_state);
                        spin_unlock(q.lock_ptr);
                }
        } else {
@@ -3046,7 +3084,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 
        if (op & FUTEX_CLOCK_REALTIME) {
                flags |= FLAGS_CLOCKRT;
-               if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+               if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
+                   cmd != FUTEX_WAIT_REQUEUE_PI)
                        return -ENOSYS;
        }