tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary...
authorEric W. Biederman <ebiederm@xmission.com>
Sat, 14 Sep 2019 12:34:30 +0000 (07:34 -0500)
committerIngo Molnar <mingo@kernel.org>
Wed, 25 Sep 2019 15:42:29 +0000 (17:42 +0200)
Remove work arounds that were written before there was a grace period
after tasks left the runqueue in finish_task_switch().

In particular now that there tasks exiting the runqueue exprience
a RCU grace period none of the work performed by task_rcu_dereference()
excpet the rcu_dereference() is necessary so replace task_rcu_dereference()
with rcu_dereference().

Remove the code in rcuwait_wait_event() that checks to ensure the current
task has not exited.  It is no longer necessary as it is guaranteed
that any running task will experience a RCU grace period after it
leaves the run queueue.

Remove the comment in rcuwait_wake_up() as it is no longer relevant.

Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery")
Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()")
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/87lfurdpk9.fsf_-_@x220.int.ebiederm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/rcuwait.h
include/linux/sched/task.h
kernel/exit.c
kernel/sched/fair.c
kernel/sched/membarrier.c

index 563290f..75c97e4 100644 (file)
@@ -6,16 +6,11 @@
 
 /*
  * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
  *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
  */
 struct rcuwait {
        struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
  */
 #define rcuwait_wait_event(w, condition)                               \
 ({                                                                     \
-       /*                                                              \
-        * Complain if we are called after do_exit()/exit_notify(),     \
-        * as we cannot rely on the rcu critical region for the         \
-        * wakeup side.                                                 \
-        */                                                             \
-       WARN_ON(current->exit_state);                                   \
-                                                                       \
        rcu_assign_pointer((w)->task, current);                         \
        for (;;) {                                                      \
                /*                                                      \
index 153a683..4b1c3b6 100644 (file)
@@ -119,7 +119,6 @@ static inline void put_task_struct(struct task_struct *t)
                __put_task_struct(t);
 }
 
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
index 3bcaec2..a46a50d 100644 (file)
@@ -234,69 +234,6 @@ repeat:
                goto repeat;
 }
 
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-       struct sighand_struct *sighand;
-       struct task_struct *task;
-
-       /*
-        * We need to verify that release_task() was not called and thus
-        * delayed_put_task_struct() can't run and drop the last reference
-        * before rcu_read_unlock(). We check task->sighand != NULL,
-        * but we can read the already freed and reused memory.
-        */
-retry:
-       task = rcu_dereference(*ptask);
-       if (!task)
-               return NULL;
-
-       probe_kernel_address(&task->sighand, sighand);
-
-       /*
-        * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-        * was already freed we can not miss the preceding update of this
-        * pointer.
-        */
-       smp_rmb();
-       if (unlikely(task != READ_ONCE(*ptask)))
-               goto retry;
-
-       /*
-        * We've re-checked that "task == *ptask", now we have two different
-        * cases:
-        *
-        * 1. This is actually the same task/task_struct. In this case
-        *    sighand != NULL tells us it is still alive.
-        *
-        * 2. This is another task which got the same memory for task_struct.
-        *    We can't know this of course, and we can not trust
-        *    sighand != NULL.
-        *
-        *    In this case we actually return a random value, but this is
-        *    correct.
-        *
-        *    If we return NULL - we can pretend that we actually noticed that
-        *    *ptask was updated when the previous task has exited. Or pretend
-        *    that probe_slab_address(&sighand) reads NULL.
-        *
-        *    If we return the new task (because sighand is not NULL for any
-        *    reason) - this is fine too. This (new) task can't go away before
-        *    another gp pass.
-        *
-        *    And note: We could even eliminate the false positive if re-read
-        *    task->sighand once again to avoid the falsely NULL. But this case
-        *    is very unlikely so we don't care.
-        */
-       if (!sighand)
-               return NULL;
-
-       return task;
-}
-
 void rcuwait_wake_up(struct rcuwait *w)
 {
        struct task_struct *task;
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
         */
        smp_mb(); /* (B) */
 
-       /*
-        * Avoid using task_rcu_dereference() magic as long as we are careful,
-        * see comment in rcuwait_wait_event() regarding ->exit_state.
-        */
        task = rcu_dereference(w->task);
        if (task)
                wake_up_process(task);
index 3101c66..5bc2399 100644 (file)
@@ -1602,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
                return;
 
        rcu_read_lock();
-       cur = task_rcu_dereference(&dst_rq->curr);
+       cur = rcu_dereference(dst_rq->curr);
        if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
                cur = NULL;
 
index aa8d758..b14250a 100644 (file)
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void)
                        continue;
 
                rcu_read_lock();
-               p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+               p = rcu_dereference(cpu_rq(cpu)->curr);
                if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
                                   MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
                        if (!fallback)
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags)
                if (cpu == raw_smp_processor_id())
                        continue;
                rcu_read_lock();
-               p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+               p = rcu_dereference(cpu_rq(cpu)->curr);
                if (p && p->mm == current->mm) {
                        if (!fallback)
                                __cpumask_set_cpu(cpu, tmpmask);