Merge tag 'x86-urgent-2022-08-06' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / kernel / context_tracking.c
index e485b6b..77978e3 100644 (file)
@@ -1,18 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Context tracking: Probe on high level context boundaries such as kernel
- * and userspace. This includes syscalls and exceptions entry/exit.
+ * Context tracking: Probe on high level context boundaries such as kernel,
+ * userspace, guest or idle.
  *
  * This is used by RCU to remove its dependency on the timer tick while a CPU
- * runs in userspace.
+ * runs in idle, userspace or guest mode.
  *
- *  Started by Frederic Weisbecker:
+ * User/guest tracking started by Frederic Weisbecker:
  *
- * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker
  *
  * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
  * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
  *
+ * RCU extended quiescent state bits imported from kernel/rcu/tree.c
+ * where the relevant authorship may be found.
  */
 
 #include <linux/context_tracking.h>
@@ -28,8 +30,8 @@ DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
        .dynticks_nesting = 1,
        .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
-       .dynticks = ATOMIC_INIT(1),
 #endif
+       .state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
 };
 EXPORT_SYMBOL_GPL(context_tracking);
 
@@ -76,7 +78,7 @@ static __always_inline void rcu_dynticks_task_trace_exit(void)
  * RCU is watching prior to the call to this function and is no longer
  * watching upon return.
  */
-static noinstr void rcu_dynticks_eqs_enter(void)
+static noinstr void ct_kernel_exit_state(int offset)
 {
        int seq;
 
@@ -86,9 +88,9 @@ static noinstr void rcu_dynticks_eqs_enter(void)
         * next idle sojourn.
         */
        rcu_dynticks_task_trace_enter();  // Before ->dynticks update!
-       seq = rcu_dynticks_inc(1);
+       seq = ct_state_inc(offset);
        // RCU is no longer watching.  Better be in extended quiescent state!
-       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
 }
 
 /*
@@ -96,7 +98,7 @@ static noinstr void rcu_dynticks_eqs_enter(void)
  * called from an extended quiescent state, that is, RCU is not watching
  * prior to the call to this function and is watching upon return.
  */
-static noinstr void rcu_dynticks_eqs_exit(void)
+static noinstr void ct_kernel_enter_state(int offset)
 {
        int seq;
 
@@ -105,10 +107,10 @@ static noinstr void rcu_dynticks_eqs_exit(void)
         * and we also must force ordering with the next RCU read-side
         * critical section.
         */
-       seq = rcu_dynticks_inc(1);
+       seq = ct_state_inc(offset);
        // RCU is now watching.  Better not be in an extended quiescent state!
        rcu_dynticks_task_trace_exit();  // After ->dynticks update!
-       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
 }
 
 /*
@@ -119,7 +121,7 @@ static noinstr void rcu_dynticks_eqs_exit(void)
  * the possibility of usermode upcalls having messed up our count
  * of interrupt nesting level during the prior busy period.
  */
-static void noinstr rcu_eqs_enter(bool user)
+static void noinstr ct_kernel_exit(bool user, int offset)
 {
        struct context_tracking *ct = this_cpu_ptr(&context_tracking);
 
@@ -139,13 +141,13 @@ static void noinstr rcu_eqs_enter(bool user)
        WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
        rcu_preempt_deferred_qs(current);
 
-       // instrumentation for the noinstr rcu_dynticks_eqs_enter()
-       instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+       // instrumentation for the noinstr ct_kernel_exit_state()
+       instrument_atomic_write(&ct->state, sizeof(ct->state));
 
        instrumentation_end();
        WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
        // RCU is watching here ...
-       rcu_dynticks_eqs_enter();
+       ct_kernel_exit_state(offset);
        // ... but is no longer watching here.
        rcu_dynticks_task_enter();
 }
@@ -158,7 +160,7 @@ static void noinstr rcu_eqs_enter(bool user)
  * allow for the possibility of usermode upcalls messing up our count of
  * interrupt nesting level during the busy period that is just now starting.
  */
-static void noinstr rcu_eqs_exit(bool user)
+static void noinstr ct_kernel_enter(bool user, int offset)
 {
        struct context_tracking *ct = this_cpu_ptr(&context_tracking);
        long oldval;
@@ -173,12 +175,12 @@ static void noinstr rcu_eqs_exit(bool user)
        }
        rcu_dynticks_task_exit();
        // RCU is not watching here ...
-       rcu_dynticks_eqs_exit();
+       ct_kernel_enter_state(offset);
        // ... but is watching here.
        instrumentation_begin();
 
-       // instrumentation for the noinstr rcu_dynticks_eqs_exit()
-       instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+       // instrumentation for the noinstr ct_kernel_enter_state()
+       instrument_atomic_write(&ct->state, sizeof(ct->state));
 
        trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
        WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
@@ -192,7 +194,7 @@ static void noinstr rcu_eqs_exit(bool user)
  * ct_nmi_exit - inform RCU of exit from NMI context
  *
  * If we are returning from the outermost NMI handler that interrupted an
- * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
+ * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
  * to let the RCU grace-period handling know that the CPU is back to
  * being RCU-idle.
  *
@@ -229,12 +231,12 @@ void noinstr ct_nmi_exit(void)
        trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
        WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 
-       // instrumentation for the noinstr rcu_dynticks_eqs_enter()
-       instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+       // instrumentation for the noinstr ct_kernel_exit_state()
+       instrument_atomic_write(&ct->state, sizeof(ct->state));
        instrumentation_end();
 
        // RCU is watching here ...
-       rcu_dynticks_eqs_enter();
+       ct_kernel_exit_state(RCU_DYNTICKS_IDX);
        // ... but is no longer watching here.
 
        if (!in_nmi())
@@ -244,7 +246,7 @@ void noinstr ct_nmi_exit(void)
 /**
  * ct_nmi_enter - inform RCU of entry to NMI context
  *
- * If the CPU was idle from RCU's viewpoint, update ct->dynticks and
+ * If the CPU was idle from RCU's viewpoint, update ct->state and
  * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
  * that the CPU is active.  This implementation permits nested NMIs, as
  * long as the nesting level does not overflow an int.  (You will probably
@@ -275,14 +277,14 @@ void noinstr ct_nmi_enter(void)
                        rcu_dynticks_task_exit();
 
                // RCU is not watching here ...
-               rcu_dynticks_eqs_exit();
+               ct_kernel_enter_state(RCU_DYNTICKS_IDX);
                // ... but is watching here.
 
                instrumentation_begin();
                // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
-               instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
-               // instrumentation for the noinstr rcu_dynticks_eqs_exit()
-               instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+               instrument_atomic_read(&ct->state, sizeof(ct->state));
+               // instrumentation for the noinstr ct_kernel_enter_state()
+               instrument_atomic_write(&ct->state, sizeof(ct->state));
 
                incby = 1;
        } else if (!in_nmi()) {
@@ -315,7 +317,7 @@ void noinstr ct_nmi_enter(void)
 void noinstr ct_idle_enter(void)
 {
        WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
-       rcu_eqs_enter(false);
+       ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
 }
 EXPORT_SYMBOL_GPL(ct_idle_enter);
 
@@ -333,7 +335,7 @@ void noinstr ct_idle_exit(void)
        unsigned long flags;
 
        raw_local_irq_save(flags);
-       rcu_eqs_exit(false);
+       ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
        raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(ct_idle_exit);
@@ -421,8 +423,8 @@ void ct_irq_exit_irqson(void)
        local_irq_restore(flags);
 }
 #else
-static __always_inline void rcu_eqs_enter(bool user) { }
-static __always_inline void rcu_eqs_exit(bool user) { }
+static __always_inline void ct_kernel_exit(bool user, int offset) { }
+static __always_inline void ct_kernel_enter(bool user, int offset) { }
 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
 
 #ifdef CONFIG_CONTEXT_TRACKING_USER
@@ -463,6 +465,7 @@ static __always_inline void context_tracking_recursion_exit(void)
  */
 void noinstr __ct_user_enter(enum ctx_state state)
 {
+       struct context_tracking *ct = this_cpu_ptr(&context_tracking);
        lockdep_assert_irqs_disabled();
 
        /* Kernel threads aren't supposed to go to userspace */
@@ -471,8 +474,8 @@ void noinstr __ct_user_enter(enum ctx_state state)
        if (!context_tracking_recursion_enter())
                return;
 
-       if ( __this_cpu_read(context_tracking.state) != state) {
-               if (__this_cpu_read(context_tracking.active)) {
+       if (__ct_state() != state) {
+               if (ct->active) {
                        /*
                         * At this stage, only low level arch entry code remains and
                         * then we'll run in userspace. We can assume there won't be
@@ -492,28 +495,49 @@ void noinstr __ct_user_enter(enum ctx_state state)
                         * that will fire and reschedule once we resume in user/guest mode.
                         */
                        rcu_irq_work_resched();
+
                        /*
                         * Enter RCU idle mode right before resuming userspace.  No use of RCU
                         * is permitted between this call and rcu_eqs_exit(). This way the
                         * CPU doesn't need to maintain the tick for RCU maintenance purposes
                         * when the CPU runs in userspace.
                         */
-                       rcu_eqs_enter(true);
+                       ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
+
+                       /*
+                        * Special case if we only track user <-> kernel transitions for tickless
+                        * cputime accounting but we don't support RCU extended quiescent state.
+                        * In this we case we don't care about any concurrency/ordering.
+                        */
+                       if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
+                               atomic_set(&ct->state, state);
+               } else {
+                       /*
+                        * Even if context tracking is disabled on this CPU, because it's outside
+                        * the full dynticks mask for example, we still have to keep track of the
+                        * context transitions and states to prevent inconsistency on those of
+                        * other CPUs.
+                        * If a task triggers an exception in userspace, sleep on the exception
+                        * handler and then migrate to another CPU, that new CPU must know where
+                        * the exception returns by the time we call exception_exit().
+                        * This information can only be provided by the previous CPU when it called
+                        * exception_enter().
+                        * OTOH we can spare the calls to vtime and RCU when context_tracking.active
+                        * is false because we know that CPU is not tickless.
+                        */
+                       if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
+                               /* Tracking for vtime only, no concurrent RCU EQS accounting */
+                               atomic_set(&ct->state, state);
+                       } else {
+                               /*
+                                * Tracking for vtime and RCU EQS. Make sure we don't race
+                                * with NMIs. OTOH we don't care about ordering here since
+                                * RCU only requires RCU_DYNTICKS_IDX increments to be fully
+                                * ordered.
+                                */
+                               atomic_add(state, &ct->state);
+                       }
                }
-               /*
-                * Even if context tracking is disabled on this CPU, because it's outside
-                * the full dynticks mask for example, we still have to keep track of the
-                * context transitions and states to prevent inconsistency on those of
-                * other CPUs.
-                * If a task triggers an exception in userspace, sleep on the exception
-                * handler and then migrate to another CPU, that new CPU must know where
-                * the exception returns by the time we call exception_exit().
-                * This information can only be provided by the previous CPU when it called
-                * exception_enter().
-                * OTOH we can spare the calls to vtime and RCU when context_tracking.active
-                * is false because we know that CPU is not tickless.
-                */
-               __this_cpu_write(context_tracking.state, state);
        }
        context_tracking_recursion_exit();
 }
@@ -581,24 +605,47 @@ NOKPROBE_SYMBOL(user_enter_callable);
  */
 void noinstr __ct_user_exit(enum ctx_state state)
 {
+       struct context_tracking *ct = this_cpu_ptr(&context_tracking);
+
        if (!context_tracking_recursion_enter())
                return;
 
-       if (__this_cpu_read(context_tracking.state) == state) {
-               if (__this_cpu_read(context_tracking.active)) {
+       if (__ct_state() == state) {
+               if (ct->active) {
                        /*
                         * Exit RCU idle mode while entering the kernel because it can
                         * run a RCU read side critical section anytime.
                         */
-                       rcu_eqs_exit(true);
+                       ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
                        if (state == CONTEXT_USER) {
                                instrumentation_begin();
                                vtime_user_exit(current);
                                trace_user_exit(0);
                                instrumentation_end();
                        }
+
+                       /*
+                        * Special case if we only track user <-> kernel transitions for tickless
+                        * cputime accounting but we don't support RCU extended quiescent state.
+                        * In this we case we don't care about any concurrency/ordering.
+                        */
+                       if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
+                               atomic_set(&ct->state, CONTEXT_KERNEL);
+
+               } else {
+                       if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
+                               /* Tracking for vtime only, no concurrent RCU EQS accounting */
+                               atomic_set(&ct->state, CONTEXT_KERNEL);
+                       } else {
+                               /*
+                                * Tracking for vtime and RCU EQS. Make sure we don't race
+                                * with NMIs. OTOH we don't care about ordering here since
+                                * RCU only requires RCU_DYNTICKS_IDX increments to be fully
+                                * ordered.
+                                */
+                               atomic_sub(state, &ct->state);
+                       }
                }
-               __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
        }
        context_tracking_recursion_exit();
 }