cpuidle: Fix ct_idle_*() usage
authorPeter Zijlstra <peterz@infradead.org>
Thu, 12 Jan 2023 19:43:27 +0000 (20:43 +0100)
committerIngo Molnar <mingo@kernel.org>
Fri, 13 Jan 2023 10:48:15 +0000 (11:48 +0100)
The whole disable-RCU, enable-IRQS dance is very intricate since
changing IRQ state is traced, which depends on RCU.

Add two helpers for the cpuidle case that mirror the entry code:

  ct_cpuidle_enter()
  ct_cpuidle_exit()

And fix all the cases where the enter/exit dance was buggy.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Tony Lindgren <tony@atomide.com>
Tested-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/20230112195540.130014793@infradead.org
15 files changed:
arch/arm/mach-imx/cpuidle-imx6q.c
arch/arm/mach-imx/cpuidle-imx6sx.c
arch/arm/mach-omap2/cpuidle34xx.c
arch/arm/mach-omap2/cpuidle44xx.c
drivers/acpi/processor_idle.c
drivers/cpuidle/cpuidle-big_little.c
drivers/cpuidle/cpuidle-mvebu-v7.c
drivers/cpuidle/cpuidle-psci.c
drivers/cpuidle/cpuidle-riscv-sbi.c
drivers/cpuidle/cpuidle-tegra.c
drivers/cpuidle/cpuidle.c
include/linux/clockchips.h
include/linux/cpuidle.h
kernel/sched/idle.c
kernel/time/tick-broadcast.c

index d086cba..c24c78a 100644 (file)
@@ -25,9 +25,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev,
                imx6_set_lpm(WAIT_UNCLOCKED);
        raw_spin_unlock(&cpuidle_lock);
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
        cpu_do_idle();
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        raw_spin_lock(&cpuidle_lock);
        if (num_idle_cpus-- == num_online_cpus())
index 1dc01f6..479f062 100644 (file)
@@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpuidle_device *dev,
                cpu_pm_enter();
                cpu_cluster_pm_enter();
 
-               ct_idle_enter();
+               ct_cpuidle_enter();
                cpu_suspend(0, imx6sx_idle_finish);
-               ct_idle_exit();
+               ct_cpuidle_exit();
 
                cpu_cluster_pm_exit();
                cpu_pm_exit();
index cedf5cb..6d63769 100644 (file)
@@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuidle_device *dev,
        }
 
        /* Execute ARM wfi */
-       ct_idle_enter();
+       ct_cpuidle_enter();
        omap_sram_idle();
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        /*
         * Call idle CPU PM enter notifier chain to restore
index 953ad88..3c97d56 100644 (file)
@@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cpuidle_device *dev,
        }
        raw_spin_unlock_irqrestore(&mpu_lock, flag);
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
        omap4_enter_lowpower(dev->cpu, cx->cpu_state);
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        raw_spin_lock_irqsave(&mpu_lock, flag);
        if (cx->mpu_state_vote == num_online_cpus())
@@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
                }
        }
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
        omap4_enter_lowpower(dev->cpu, cx->cpu_state);
        cpu_done[dev->cpu] = true;
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        /* Wakeup CPU1 only if it is not offlined */
        if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
index 566f7db..a589cfa 100644 (file)
@@ -642,6 +642,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
         */
        bool dis_bm = pr->flags.bm_control;
 
+       instrumentation_begin();
+
        /* If we can skip BM, demote to a safe state. */
        if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
                dis_bm = false;
@@ -663,11 +665,11 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
                raw_spin_unlock(&c3_lock);
        }
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
 
        acpi_idle_do_entry(cx);
 
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        /* Re-enable bus master arbitration */
        if (dis_bm) {
@@ -677,6 +679,8 @@ static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
                raw_spin_unlock(&c3_lock);
        }
 
+       instrumentation_end();
+
        return index;
 }
 
index fffd4ed..5858db2 100644 (file)
@@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpuidle_device *dev,
                                struct cpuidle_driver *drv, int idx)
 {
        cpu_pm_enter();
-       ct_idle_enter();
+       ct_cpuidle_enter();
 
        cpu_suspend(0, bl_powerdown_finisher);
 
        /* signals the MCPM core that CPU is out of low power state */
        mcpm_cpu_powered_up();
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        cpu_pm_exit();
 
index c9568aa..20bfb26 100644 (file)
@@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
        if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
                deepidle = true;
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
        ret = mvebu_v7_cpu_suspend(deepidle);
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        cpu_pm_exit();
 
index 969808c..58b2cbb 100644 (file)
@@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
        else
                pm_runtime_put_sync_suspend(pd_dev);
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
 
        state = psci_get_domain_state();
        if (!state)
@@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
 
        ret = psci_cpu_suspend_enter(state) ? -1 : idx;
 
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        if (s2idle)
                dev_pm_genpd_resume(pd_dev);
index cbdbb11..0a480f5 100644 (file)
@@ -126,7 +126,7 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
        else
                pm_runtime_put_sync_suspend(pd_dev);
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
 
        if (sbi_is_domain_state_available())
                state = sbi_get_domain_state();
@@ -135,7 +135,7 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
 
        ret = sbi_suspend(state) ? -1 : idx;
 
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        if (s2idle)
                dev_pm_genpd_resume(pd_dev);
index 3ca5cfb..9c2903c 100644 (file)
@@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
        tegra_pm_set_cpu_in_lp2();
        cpu_pm_enter();
 
-       ct_idle_enter();
+       ct_cpuidle_enter();
 
        switch (index) {
        case TEGRA_C7:
@@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
                break;
        }
 
-       ct_idle_exit();
+       ct_cpuidle_exit();
 
        cpu_pm_exit();
        tegra_pm_clear_cpu_in_lp2();
@@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev,
 
        if (index == TEGRA_C1) {
                if (do_rcu)
-                       ct_idle_enter();
+                       ct_cpuidle_enter();
                ret = arm_cpuidle_simple_enter(dev, drv, index);
                if (do_rcu)
-                       ct_idle_exit();
+                       ct_cpuidle_exit();
        } else
                ret = tegra_cpuidle_state_enter(dev, index, cpu);
 
index 95c801f..08374c7 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
 #include <linux/notifier.h>
 #include <linux/pm_qos.h>
 #include <linux/cpu.h>
@@ -152,12 +153,12 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
         */
        stop_critical_timings();
        if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
-               ct_idle_enter();
+               ct_cpuidle_enter();
        target_state->enter_s2idle(dev, drv, index);
        if (WARN_ON_ONCE(!irqs_disabled()))
-               local_irq_disable();
+               raw_local_irq_disable();
        if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
-               ct_idle_exit();
+               ct_cpuidle_exit();
        tick_unfreeze();
        start_critical_timings();
 
@@ -235,14 +236,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
        stop_critical_timings();
        if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
-               ct_idle_enter();
+               ct_cpuidle_enter();
 
        entered_state = target_state->enter(dev, drv, index);
        if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
                raw_local_irq_disable();
 
        if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
-               ct_idle_exit();
+               ct_cpuidle_exit();
        start_critical_timings();
 
        sched_clock_idle_wakeup_event();
index 8ae9a95..9aac31d 100644 (file)
@@ -211,7 +211,7 @@ extern int tick_receive_broadcast(void);
 extern void tick_setup_hrtimer_broadcast(void);
 extern int tick_check_broadcast_expired(void);
 # else
-static inline int tick_check_broadcast_expired(void) { return 0; }
+static __always_inline int tick_check_broadcast_expired(void) { return 0; }
 static inline void tick_setup_hrtimer_broadcast(void) { }
 # endif
 
@@ -219,7 +219,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
 
 static inline void clockevents_suspend(void) { }
 static inline void clockevents_resume(void) { }
-static inline int tick_check_broadcast_expired(void) { return 0; }
+static __always_inline int tick_check_broadcast_expired(void) { return 0; }
 static inline void tick_setup_hrtimer_broadcast(void) { }
 
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
index 0ddc11e..630c879 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <linux/list.h>
 #include <linux/hrtimer.h>
+#include <linux/context_tracking.h>
 
 #define CPUIDLE_STATE_MAX      10
 #define CPUIDLE_NAME_LEN       16
@@ -115,6 +116,35 @@ struct cpuidle_device {
 DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 
+static __always_inline void ct_cpuidle_enter(void)
+{
+       lockdep_assert_irqs_disabled();
+       /*
+        * Idle is allowed to (temporary) enable IRQs. It
+        * will return with IRQs disabled.
+        *
+        * Trace IRQs enable here, then switch off RCU, and have
+        * arch_cpu_idle() use raw_local_irq_enable(). Note that
+        * ct_idle_enter() relies on lockdep IRQ state, so switch that
+        * last -- this is very similar to the entry code.
+        */
+       trace_hardirqs_on_prepare();
+       lockdep_hardirqs_on_prepare();
+       instrumentation_end();
+       ct_idle_enter();
+       lockdep_hardirqs_on(_RET_IP_);
+}
+
+static __always_inline void ct_cpuidle_exit(void)
+{
+       /*
+        * Carefully undo the above.
+        */
+       lockdep_hardirqs_off(_RET_IP_);
+       ct_idle_exit();
+       instrumentation_begin();
+}
+
 /****************************
  * CPUIDLE DRIVER INTERFACE *
  ****************************/
@@ -289,9 +319,9 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu);
        if (!is_retention)                                              \
                __ret =  cpu_pm_enter();                                \
        if (!__ret) {                                                   \
-               ct_idle_enter();                                        \
+               ct_cpuidle_enter();                                     \
                __ret = low_level_idle_enter(state);                    \
-               ct_idle_exit();                                         \
+               ct_cpuidle_exit();                                      \
                if (!is_retention)                                      \
                        cpu_pm_exit();                                  \
        }                                                               \
index f26ab26..e924602 100644 (file)
@@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup);
 
 static noinline int __cpuidle cpu_idle_poll(void)
 {
+       instrumentation_begin();
        trace_cpu_idle(0, smp_processor_id());
        stop_critical_timings();
-       ct_idle_enter();
-       local_irq_enable();
+       ct_cpuidle_enter();
 
+       raw_local_irq_enable();
        while (!tif_need_resched() &&
               (cpu_idle_force_poll || tick_check_broadcast_expired()))
                cpu_relax();
+       raw_local_irq_disable();
 
-       ct_idle_exit();
+       ct_cpuidle_exit();
        start_critical_timings();
        trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+       local_irq_enable();
+       instrumentation_end();
 
        return 1;
 }
@@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void)
  */
 void __cpuidle default_idle_call(void)
 {
-       if (current_clr_polling_and_test()) {
-               local_irq_enable();
-       } else {
-
+       instrumentation_begin();
+       if (!current_clr_polling_and_test()) {
                trace_cpu_idle(1, smp_processor_id());
                stop_critical_timings();
 
-               /*
-                * arch_cpu_idle() is supposed to enable IRQs, however
-                * we can't do that because of RCU and tracing.
-                *
-                * Trace IRQs enable here, then switch off RCU, and have
-                * arch_cpu_idle() use raw_local_irq_enable(). Note that
-                * ct_idle_enter() relies on lockdep IRQ state, so switch that
-                * last -- this is very similar to the entry code.
-                */
-               trace_hardirqs_on_prepare();
-               lockdep_hardirqs_on_prepare();
-               ct_idle_enter();
-               lockdep_hardirqs_on(_THIS_IP_);
-
+               ct_cpuidle_enter();
                arch_cpu_idle();
-
-               /*
-                * OK, so IRQs are enabled here, but RCU needs them disabled to
-                * turn itself back on.. funny thing is that disabling IRQs
-                * will cause tracing, which needs RCU. Jump through hoops to
-                * make it 'work'.
-                */
                raw_local_irq_disable();
-               lockdep_hardirqs_off(_THIS_IP_);
-               ct_idle_exit();
-               lockdep_hardirqs_on(_THIS_IP_);
-               raw_local_irq_enable();
+               ct_cpuidle_exit();
 
                start_critical_timings();
                trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
        }
+       local_irq_enable();
+       instrumentation_end();
 }
 
 static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
index f7fe6fe..93bf2b4 100644 (file)
@@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_oneshot_mask(void)
  * to avoid a deep idle transition as we are about to get the
  * broadcast IPI right away.
  */
-int tick_check_broadcast_expired(void)
+noinstr int tick_check_broadcast_expired(void)
 {
+#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
+       return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
+#else
        return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
+#endif
 }
 
 /*