Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 16 May 2019 17:54:19 +0000 (10:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 16 May 2019 17:54:19 +0000 (10:54 -0700)
Pull locking fix from Ingo Molnar:
 "A single rwsem fix"

* 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/rwsem: Prevent decrement of reader count before increment

kernel/locking/rwsem-xadd.c

index 6b3ee99..0b1f779 100644 (file)
@@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 {
        struct rwsem_waiter *waiter, *tmp;
        long oldcount, woken = 0, adjustment = 0;
+       struct list_head wlist;
 
        /*
         * Take a peek at the queue head waiter such that we can determine
@@ -188,18 +189,43 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
         * of the queue. We know that woken will be at least 1 as we accounted
         * for above. Note we increment the 'active part' of the count by the
         * number of readers before waking any processes up.
+        *
+        * We have to do wakeup in 2 passes to prevent the possibility that
+        * the reader count may be decremented before it is incremented. It
+        * is because the to-be-woken waiter may not have slept yet. So it
+        * may see waiter->task got cleared, finish its critical section and
+        * do an unlock before the reader count increment.
+        *
+        * 1) Collect the read-waiters in a separate list, count them and
+        *    fully increment the reader count in rwsem.
+        * 2) For each waiters in the new list, clear waiter->task and
+        *    put them into wake_q to be woken up later.
         */
-       list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
-               struct task_struct *tsk;
-
+       list_for_each_entry(waiter, &sem->wait_list, list) {
                if (waiter->type == RWSEM_WAITING_FOR_WRITE)
                        break;
 
                woken++;
-               tsk = waiter->task;
+       }
+       list_cut_before(&wlist, &sem->wait_list, &waiter->list);
+
+       adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+       lockevent_cond_inc(rwsem_wake_reader, woken);
+       if (list_empty(&sem->wait_list)) {
+               /* hit end of list above */
+               adjustment -= RWSEM_WAITING_BIAS;
+       }
+
+       if (adjustment)
+               atomic_long_add(adjustment, &sem->count);
+
+       /* 2nd pass */
+       list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+               struct task_struct *tsk;
 
+               tsk = waiter->task;
                get_task_struct(tsk);
-               list_del(&waiter->list);
+
                /*
                 * Ensure calling get_task_struct() before setting the reader
                 * waiter to nil such that rwsem_down_read_failed() cannot
@@ -213,16 +239,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
                 */
                wake_q_add_safe(wake_q, tsk);
        }
-
-       adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-       lockevent_cond_inc(rwsem_wake_reader, woken);
-       if (list_empty(&sem->wait_list)) {
-               /* hit end of list above */
-               adjustment -= RWSEM_WAITING_BIAS;
-       }
-
-       if (adjustment)
-               atomic_long_add(adjustment, &sem->count);
 }
 
 /*