powerpc/powernv/idle: add a basic stop 0-3 driver for POWER10
authorNicholas Piggin <npiggin@gmail.com>
Wed, 19 Aug 2020 09:47:00 +0000 (19:47 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Tue, 15 Sep 2020 12:13:38 +0000 (22:13 +1000)
This driver does not restore stop > 3 state, so it limits itself
to states which do not lose full state or TB.

The POWER10 SPRs are sufficiently different from P9 that it seems
easier to split out the P10 code. The POWER10 deep sleep code
(e.g., the BHRB restore) has been taken out, but it can be re-added
when stop > 3 support is added.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Pratik Rajesh Sampat<psampat@linux.ibm.com>
Tested-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
Reviewed-by: Pratik Rajesh Sampat<psampat@linux.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200819094700.493399-1-npiggin@gmail.com
arch/powerpc/include/asm/machdep.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/reg.h
arch/powerpc/platforms/powernv/idle.c
drivers/cpuidle/cpuidle-powernv.c

index a90b892..5082cd4 100644 (file)
@@ -222,8 +222,6 @@ struct machdep_calls {
 
 extern void e500_idle(void);
 extern void power4_idle(void);
 
 extern void e500_idle(void);
 extern void power4_idle(void);
-extern void power7_idle(void);
-extern void power9_idle(void);
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
index 36a71cd..22ffe85 100644 (file)
@@ -432,7 +432,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 extern int powersave_nap;      /* set if nap mode can be used in idle loop */
 
 extern void power7_idle_type(unsigned long type);
 extern int powersave_nap;      /* set if nap mode can be used in idle loop */
 
 extern void power7_idle_type(unsigned long type);
-extern void power9_idle_type(unsigned long stop_psscr_val,
+extern void arch300_idle_type(unsigned long stop_psscr_val,
                              unsigned long stop_psscr_mask);
 
 extern int fix_alignment(struct pt_regs *);
                              unsigned long stop_psscr_mask);
 
 extern int fix_alignment(struct pt_regs *);
index 5647006..d25c357 100644 (file)
 #define PVR_POWER8NVL  0x004C
 #define PVR_POWER8     0x004D
 #define PVR_POWER9     0x004E
 #define PVR_POWER8NVL  0x004C
 #define PVR_POWER8     0x004D
 #define PVR_POWER9     0x004E
+#define PVR_POWER10    0x0080
 #define PVR_BE         0x0070
 #define PVR_PA6T       0x0090
 
 #define PVR_BE         0x0070
 #define PVR_PA6T       0x0090
 
index 345ab06..1ed7c52 100644 (file)
@@ -565,7 +565,7 @@ void power7_idle_type(unsigned long type)
        irq_set_pending_from_srr1(srr1);
 }
 
        irq_set_pending_from_srr1(srr1);
 }
 
-void power7_idle(void)
+static void power7_idle(void)
 {
        if (!powersave_nap)
                return;
 {
        if (!powersave_nap)
                return;
@@ -659,20 +659,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
                mmcr0           = mfspr(SPRN_MMCR0);
        }
 
                mmcr0           = mfspr(SPRN_MMCR0);
        }
 
-       if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-               /*
-                * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit.
-                * If the user hasn't asked for the BHRB to be
-                * written, the value of MMCRA[BHRBRD] is 1.
-                * On wakeup from stop, MMCRA[BHRBD] will be 0,
-                * since it is previleged resource and will be lost.
-                * Thus, if we do not save and restore the MMCRA[BHRBD],
-                * hardware will be needlessly writing to the BHRB
-                * in problem mode.
-                */
-               mmcra           = mfspr(SPRN_MMCRA);
-       }
-
        if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
                sprs.lpcr       = mfspr(SPRN_LPCR);
                sprs.hfscr      = mfspr(SPRN_HFSCR);
        if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
                sprs.lpcr       = mfspr(SPRN_LPCR);
                sprs.hfscr      = mfspr(SPRN_HFSCR);
@@ -735,10 +721,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
                        mtspr(SPRN_MMCR0, mmcr0);
                }
 
                        mtspr(SPRN_MMCR0, mmcr0);
                }
 
-               /* Reload MMCRA to restore BHRB disable bit for POWER10 */
-               if (cpu_has_feature(CPU_FTR_ARCH_31))
-                       mtspr(SPRN_MMCRA, mmcra);
-
                /*
                 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
                 * to ensure the PMU starts running.
                /*
                 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
                 * to ensure the PMU starts running.
@@ -823,73 +805,6 @@ out:
        return srr1;
 }
 
        return srr1;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static unsigned long power9_offline_stop(unsigned long psscr)
-{
-       unsigned long srr1;
-
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-       __ppc64_runlatch_off();
-       srr1 = power9_idle_stop(psscr, true);
-       __ppc64_runlatch_on();
-#else
-       /*
-        * Tell KVM we're entering idle.
-        * This does not have to be done in real mode because the P9 MMU
-        * is independent per-thread. Some steppings share radix/hash mode
-        * between threads, but in that case KVM has a barrier sync in real
-        * mode before and after switching between radix and hash.
-        *
-        * kvm_start_guest must still be called in real mode though, hence
-        * the false argument.
-        */
-       local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
-       __ppc64_runlatch_off();
-       srr1 = power9_idle_stop(psscr, false);
-       __ppc64_runlatch_on();
-
-       local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
-       /* Order setting hwthread_state vs. testing hwthread_req */
-       smp_mb();
-       if (local_paca->kvm_hstate.hwthread_req)
-               srr1 = idle_kvm_start_guest(srr1);
-       mtmsr(MSR_KERNEL);
-#endif
-
-       return srr1;
-}
-#endif
-
-void power9_idle_type(unsigned long stop_psscr_val,
-                                     unsigned long stop_psscr_mask)
-{
-       unsigned long psscr;
-       unsigned long srr1;
-
-       if (!prep_irq_for_idle_irqsoff())
-               return;
-
-       psscr = mfspr(SPRN_PSSCR);
-       psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
-
-       __ppc64_runlatch_off();
-       srr1 = power9_idle_stop(psscr, true);
-       __ppc64_runlatch_on();
-
-       fini_irq_for_idle_irqsoff();
-
-       irq_set_pending_from_srr1(srr1);
-}
-
-/*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-void power9_idle(void)
-{
-       power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
-}
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * This is used in working around bugs in thread reconfiguration
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * This is used in working around bugs in thread reconfiguration
@@ -962,6 +877,198 @@ void pnv_power9_force_smt4_release(void)
 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
+struct p10_sprs {
+       /*
+        * SPRs that get lost in shallow states:
+        *
+        * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
+        * isa300 idle routines restore CR, LR.
+        * CTR is volatile
+        * idle thread doesn't use FP or VEC
+        * kernel doesn't use TAR
+        * HSPRG1 is only live in HV interrupt entry
+        * SPRG2 is only live in KVM guests, KVM handles it.
+        */
+};
+
+static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+{
+       int cpu = raw_smp_processor_id();
+       int first = cpu_first_thread_sibling(cpu);
+       unsigned long *state = &paca_ptrs[first]->idle_state;
+       unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+       unsigned long srr1;
+       unsigned long pls;
+//     struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
+       bool sprs_saved = false;
+
+       if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+               /* EC=ESL=0 case */
+
+               BUG_ON(!mmu_on);
+
+               /*
+                * Wake synchronously. SRESET via xscom may still cause
+                * a 0x100 powersave wakeup with SRR1 reason!
+                */
+               srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
+               if (likely(!srr1))
+                       return 0;
+
+               /*
+                * Registers not saved, can't recover!
+                * This would be a hardware bug
+                */
+               BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+               goto out;
+       }
+
+       /* EC=ESL=1 case */
+       if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+               /* XXX: save SPRs for deep state loss here. */
+
+               sprs_saved = true;
+
+               atomic_start_thread_idle();
+       }
+
+       srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
+
+       psscr = mfspr(SPRN_PSSCR);
+
+       WARN_ON_ONCE(!srr1);
+       WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+       if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+               hmi_exception_realmode(NULL);
+
+       /*
+        * On POWER10, SRR1 bits do not match exactly as expected.
+        * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+        * just always test PSSCR for SPR/TB state loss.
+        */
+       pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+       if (likely(pls < deep_spr_loss_state)) {
+               if (sprs_saved)
+                       atomic_stop_thread_idle();
+               goto out;
+       }
+
+       /* HV state loss */
+       BUG_ON(!sprs_saved);
+
+       atomic_lock_thread_idle();
+
+       if ((*state & core_thread_mask) != 0)
+               goto core_woken;
+
+       /* XXX: restore per-core SPRs here */
+
+       if (pls >= pnv_first_tb_loss_level) {
+               /* TB loss */
+               if (opal_resync_timebase() != OPAL_SUCCESS)
+                       BUG();
+       }
+
+       /*
+        * isync after restoring shared SPRs and before unlocking. Unlock
+        * only contains hwsync which does not necessarily do the right
+        * thing for SPRs.
+        */
+       isync();
+
+core_woken:
+       atomic_unlock_and_stop_thread_idle();
+
+       /* XXX: restore per-thread SPRs here */
+
+       if (!radix_enabled())
+               __slb_restore_bolted_realmode();
+
+out:
+       if (mmu_on)
+               mtmsr(MSR_KERNEL);
+
+       return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long arch300_offline_stop(unsigned long psscr)
+{
+       unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       __ppc64_runlatch_off();
+       if (cpu_has_feature(CPU_FTR_ARCH_31))
+               srr1 = power10_idle_stop(psscr, true);
+       else
+               srr1 = power9_idle_stop(psscr, true);
+       __ppc64_runlatch_on();
+#else
+       /*
+        * Tell KVM we're entering idle.
+        * This does not have to be done in real mode because the P9 MMU
+        * is independent per-thread. Some steppings share radix/hash mode
+        * between threads, but in that case KVM has a barrier sync in real
+        * mode before and after switching between radix and hash.
+        *
+        * kvm_start_guest must still be called in real mode though, hence
+        * the false argument.
+        */
+       local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+
+       __ppc64_runlatch_off();
+       if (cpu_has_feature(CPU_FTR_ARCH_31))
+               srr1 = power10_idle_stop(psscr, false);
+       else
+               srr1 = power9_idle_stop(psscr, false);
+       __ppc64_runlatch_on();
+
+       local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+       /* Order setting hwthread_state vs. testing hwthread_req */
+       smp_mb();
+       if (local_paca->kvm_hstate.hwthread_req)
+               srr1 = idle_kvm_start_guest(srr1);
+       mtmsr(MSR_KERNEL);
+#endif
+
+       return srr1;
+}
+#endif
+
+void arch300_idle_type(unsigned long stop_psscr_val,
+                                     unsigned long stop_psscr_mask)
+{
+       unsigned long psscr;
+       unsigned long srr1;
+
+       if (!prep_irq_for_idle_irqsoff())
+               return;
+
+       psscr = mfspr(SPRN_PSSCR);
+       psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+       __ppc64_runlatch_off();
+       if (cpu_has_feature(CPU_FTR_ARCH_31))
+               srr1 = power10_idle_stop(psscr, true);
+       else
+               srr1 = power9_idle_stop(psscr, true);
+       __ppc64_runlatch_on();
+
+       fini_irq_for_idle_irqsoff();
+
+       irq_set_pending_from_srr1(srr1);
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+static void arch300_idle(void)
+{
+       arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
 #ifdef CONFIG_HOTPLUG_CPU
 
 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
@@ -995,7 +1102,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
                psscr = mfspr(SPRN_PSSCR);
                psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
                                                pnv_deepest_stop_psscr_val;
                psscr = mfspr(SPRN_PSSCR);
                psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
                                                pnv_deepest_stop_psscr_val;
-               srr1 = power9_offline_stop(psscr);
+               srr1 = arch300_offline_stop(psscr);
        } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
                srr1 = power7_offline();
        } else {
        } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
                srr1 = power7_offline();
        } else {
@@ -1093,11 +1200,15 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
  * @dt_idle_states: Number of idle state entries
  * Returns 0 on success
  */
  * @dt_idle_states: Number of idle state entries
  * Returns 0 on success
  */
-static void __init pnv_power9_idle_init(void)
+static void __init pnv_arch300_idle_init(void)
 {
        u64 max_residency_ns = 0;
        int i;
 
 {
        u64 max_residency_ns = 0;
        int i;
 
+       /* stop is not really architected, we only have p9,p10 drivers */
+       if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+               return;
+
        /*
         * pnv_deepest_stop_{val,mask} should be set to values corresponding to
         * the deepest stop state.
        /*
         * pnv_deepest_stop_{val,mask} should be set to values corresponding to
         * the deepest stop state.
@@ -1112,6 +1223,11 @@ static void __init pnv_power9_idle_init(void)
                struct pnv_idle_states_t *state = &pnv_idle_states[i];
                u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
 
                struct pnv_idle_states_t *state = &pnv_idle_states[i];
                u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
 
+               /* No deep loss driver implemented for POWER10 yet */
+               if (pvr_version_is(PVR_POWER10) &&
+                               state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
+                       continue;
+
                if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
                     (pnv_first_tb_loss_level > psscr_rl))
                        pnv_first_tb_loss_level = psscr_rl;
                if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
                     (pnv_first_tb_loss_level > psscr_rl))
                        pnv_first_tb_loss_level = psscr_rl;
@@ -1162,7 +1278,7 @@ static void __init pnv_power9_idle_init(void)
        if (unlikely(!default_stop_found)) {
                pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
        } else {
        if (unlikely(!default_stop_found)) {
                pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
        } else {
-               ppc_md.power_save = power9_idle;
+               ppc_md.power_save = arch300_idle;
                pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
                        pnv_default_stop_val, pnv_default_stop_mask);
        }
                pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
                        pnv_default_stop_val, pnv_default_stop_mask);
        }
@@ -1224,7 +1340,7 @@ static void __init pnv_probe_idle_states(void)
        }
 
        if (cpu_has_feature(CPU_FTR_ARCH_300))
        }
 
        if (cpu_has_feature(CPU_FTR_ARCH_300))
-               pnv_power9_idle_init();
+               pnv_arch300_idle_init();
 
        for (i = 0; i < nr_pnv_idle_states; i++)
                supported_cpuidle_states |= pnv_idle_states[i].flags;
 
        for (i = 0; i < nr_pnv_idle_states; i++)
                supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -1295,7 +1411,7 @@ static int pnv_parse_cpuidle_dt(void)
        for (i = 0; i < nr_idle_states; i++)
                pnv_idle_states[i].residency_ns = temp_u32[i];
 
        for (i = 0; i < nr_idle_states; i++)
                pnv_idle_states[i].residency_ns = temp_u32[i];
 
-       /* For power9 */
+       /* For power9 and later */
        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
                /* Read pm_crtl_val */
                if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
                /* Read pm_crtl_val */
                if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
@@ -1358,8 +1474,8 @@ static int __init pnv_init_idle_states(void)
                if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
                        /* P7/P8 nap */
                        p->thread_idle_state = PNV_THREAD_RUNNING;
                if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
                        /* P7/P8 nap */
                        p->thread_idle_state = PNV_THREAD_RUNNING;
-               } else {
-                       /* P9 stop */
+               } else if (pvr_version_is(PVR_POWER9)) {
+                       /* P9 stop workarounds */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
                        p->requested_psscr = 0;
                        atomic_set(&p->dont_stop, 0);
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
                        p->requested_psscr = 0;
                        atomic_set(&p->dont_stop, 0);
index addaa6e..c32c600 100644 (file)
@@ -141,7 +141,7 @@ static int stop_loop(struct cpuidle_device *dev,
                     struct cpuidle_driver *drv,
                     int index)
 {
                     struct cpuidle_driver *drv,
                     int index)
 {
-       power9_idle_type(stop_psscr_table[index].val,
+       arch300_idle_type(stop_psscr_table[index].val,
                         stop_psscr_table[index].mask);
        return index;
 }
                         stop_psscr_table[index].mask);
        return index;
 }