powerpc/mm/hash: Don't memset pgd table if not needed
[linux-2.6-microblaze.git] / arch / powerpc / platforms / powernv / idle.c
1 /*
2  * PowerNV cpuidle code
3  *
4  * Copyright 2015 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/types.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/of.h>
16 #include <linux/device.h>
17 #include <linux/cpu.h>
18
19 #include <asm/firmware.h>
20 #include <asm/machdep.h>
21 #include <asm/opal.h>
22 #include <asm/cputhreads.h>
23 #include <asm/cpuidle.h>
24 #include <asm/code-patching.h>
25 #include <asm/smp.h>
26 #include <asm/runlatch.h>
27 #include <asm/dbell.h>
28
29 #include "powernv.h"
30 #include "subcore.h"
31
32 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
33 #define MAX_STOP_STATE  0xF
34
35 #define P9_STOP_SPR_MSR 2000
36 #define P9_STOP_SPR_PSSCR      855
37
38 static u32 supported_cpuidle_states;
39
40 /*
41  * The default stop state that will be used by ppc_md.power_save
42  * function on platforms that support stop instruction.
43  */
44 static u64 pnv_default_stop_val;
45 static u64 pnv_default_stop_mask;
46 static bool default_stop_found;
47
48 /*
49  * First deep stop state. Used to figure out when to save/restore
50  * hypervisor context.
51  */
52 u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
53
54 /*
55  * psscr value and mask of the deepest stop idle state.
56  * Used when a cpu is offlined.
57  */
58 static u64 pnv_deepest_stop_psscr_val;
59 static u64 pnv_deepest_stop_psscr_mask;
60 static u64 pnv_deepest_stop_flag;
61 static bool deepest_stop_found;
62
63 static int pnv_save_sprs_for_deep_states(void)
64 {
65         int cpu;
66         int rc;
67
68         /*
69          * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
70          * all cpus at boot. Get these reg values of current cpu and use the
71          * same across all cpus.
72          */
73         uint64_t lpcr_val = mfspr(SPRN_LPCR);
74         uint64_t hid0_val = mfspr(SPRN_HID0);
75         uint64_t hid1_val = mfspr(SPRN_HID1);
76         uint64_t hid4_val = mfspr(SPRN_HID4);
77         uint64_t hid5_val = mfspr(SPRN_HID5);
78         uint64_t hmeer_val = mfspr(SPRN_HMEER);
79         uint64_t msr_val = MSR_IDLE;
80         uint64_t psscr_val = pnv_deepest_stop_psscr_val;
81
82         for_each_possible_cpu(cpu) {
83                 uint64_t pir = get_hard_smp_processor_id(cpu);
84                 uint64_t hsprg0_val = (uint64_t)&paca[cpu];
85
86                 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
87                 if (rc != 0)
88                         return rc;
89
90                 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
91                 if (rc != 0)
92                         return rc;
93
94                 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
95                         rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
96                         if (rc)
97                                 return rc;
98
99                         rc = opal_slw_set_reg(pir,
100                                               P9_STOP_SPR_PSSCR, psscr_val);
101
102                         if (rc)
103                                 return rc;
104                 }
105
106                 /* HIDs are per core registers */
107                 if (cpu_thread_in_core(cpu) == 0) {
108
109                         rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
110                         if (rc != 0)
111                                 return rc;
112
113                         rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
114                         if (rc != 0)
115                                 return rc;
116
117                         /* Only p8 needs to set extra HID regiters */
118                         if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
119
120                                 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
121                                 if (rc != 0)
122                                         return rc;
123
124                                 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
125                                 if (rc != 0)
126                                         return rc;
127
128                                 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
129                                 if (rc != 0)
130                                         return rc;
131                         }
132                 }
133         }
134
135         return 0;
136 }
137
138 static void pnv_alloc_idle_core_states(void)
139 {
140         int i, j;
141         int nr_cores = cpu_nr_cores();
142         u32 *core_idle_state;
143
144         /*
145          * core_idle_state - The lower 8 bits track the idle state of
146          * each thread of the core.
147          *
148          * The most significant bit is the lock bit.
149          *
150          * Initially all the bits corresponding to threads_per_core
151          * are set. They are cleared when the thread enters deep idle
152          * state like sleep and winkle/stop.
153          *
154          * Initially the lock bit is cleared.  The lock bit has 2
155          * purposes:
156          *      a. While the first thread in the core waking up from
157          *         idle is restoring core state, it prevents other
158          *         threads in the core from switching to process
159          *         context.
160          *      b. While the last thread in the core is saving the
161          *         core state, it prevents a different thread from
162          *         waking up.
163          */
164         for (i = 0; i < nr_cores; i++) {
165                 int first_cpu = i * threads_per_core;
166                 int node = cpu_to_node(first_cpu);
167                 size_t paca_ptr_array_size;
168
169                 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
170                 *core_idle_state = (1 << threads_per_core) - 1;
171                 paca_ptr_array_size = (threads_per_core *
172                                        sizeof(struct paca_struct *));
173
174                 for (j = 0; j < threads_per_core; j++) {
175                         int cpu = first_cpu + j;
176
177                         paca[cpu].core_idle_state_ptr = core_idle_state;
178                         paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
179                         paca[cpu].thread_mask = 1 << j;
180                         if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
181                                 continue;
182                         paca[cpu].thread_sibling_pacas =
183                                 kmalloc_node(paca_ptr_array_size,
184                                              GFP_KERNEL, node);
185                 }
186         }
187
188         update_subcore_sibling_mask();
189
190         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
191                 int rc = pnv_save_sprs_for_deep_states();
192
193                 if (likely(!rc))
194                         return;
195
196                 /*
197                  * The stop-api is unable to restore hypervisor
198                  * resources on wakeup from platform idle states which
199                  * lose full context. So disable such states.
200                  */
201                 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
202                 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
203                 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
204
205                 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
206                     (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
207                         /*
208                          * Use the default stop state for CPU-Hotplug
209                          * if available.
210                          */
211                         if (default_stop_found) {
212                                 pnv_deepest_stop_psscr_val =
213                                         pnv_default_stop_val;
214                                 pnv_deepest_stop_psscr_mask =
215                                         pnv_default_stop_mask;
216                                 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
217                                         pnv_deepest_stop_psscr_val);
218                         } else { /* Fallback to snooze loop for CPU-Hotplug */
219                                 deepest_stop_found = false;
220                                 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
221                         }
222                 }
223         }
224 }
225
226 u32 pnv_get_supported_cpuidle_states(void)
227 {
228         return supported_cpuidle_states;
229 }
230 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
231
232 static void pnv_fastsleep_workaround_apply(void *info)
233
234 {
235         int rc;
236         int *err = info;
237
238         rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
239                                         OPAL_CONFIG_IDLE_APPLY);
240         if (rc)
241                 *err = 1;
242 }
243
244 /*
245  * Used to store fastsleep workaround state
246  * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
247  * 1 - Workaround applied once, never undone.
248  */
249 static u8 fastsleep_workaround_applyonce;
250
251 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
252                 struct device_attribute *attr, char *buf)
253 {
254         return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
255 }
256
257 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
258                 struct device_attribute *attr, const char *buf,
259                 size_t count)
260 {
261         cpumask_t primary_thread_mask;
262         int err;
263         u8 val;
264
265         if (kstrtou8(buf, 0, &val) || val != 1)
266                 return -EINVAL;
267
268         if (fastsleep_workaround_applyonce == 1)
269                 return count;
270
271         /*
272          * fastsleep_workaround_applyonce = 1 implies
273          * fastsleep workaround needs to be left in 'applied' state on all
274          * the cores. Do this by-
275          * 1. Patching out the call to 'undo' workaround in fastsleep exit path
276          * 2. Sending ipi to all the cores which have at least one online thread
277          * 3. Patching out the call to 'apply' workaround in fastsleep entry
278          * path
279          * There is no need to send ipi to cores which have all threads
280          * offlined, as last thread of the core entering fastsleep or deeper
281          * state would have applied workaround.
282          */
283         err = patch_instruction(
284                 (unsigned int *)pnv_fastsleep_workaround_at_exit,
285                 PPC_INST_NOP);
286         if (err) {
287                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
288                 goto fail;
289         }
290
291         get_online_cpus();
292         primary_thread_mask = cpu_online_cores_map();
293         on_each_cpu_mask(&primary_thread_mask,
294                                 pnv_fastsleep_workaround_apply,
295                                 &err, 1);
296         put_online_cpus();
297         if (err) {
298                 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
299                 goto fail;
300         }
301
302         err = patch_instruction(
303                 (unsigned int *)pnv_fastsleep_workaround_at_entry,
304                 PPC_INST_NOP);
305         if (err) {
306                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
307                 goto fail;
308         }
309
310         fastsleep_workaround_applyonce = 1;
311
312         return count;
313 fail:
314         return -EIO;
315 }
316
317 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
318                         show_fastsleep_workaround_applyonce,
319                         store_fastsleep_workaround_applyonce);
320
321 static unsigned long __power7_idle_type(unsigned long type)
322 {
323         unsigned long srr1;
324
325         if (!prep_irq_for_idle_irqsoff())
326                 return 0;
327
328         __ppc64_runlatch_off();
329         srr1 = power7_idle_insn(type);
330         __ppc64_runlatch_on();
331
332         fini_irq_for_idle_irqsoff();
333
334         return srr1;
335 }
336
337 void power7_idle_type(unsigned long type)
338 {
339         unsigned long srr1;
340
341         srr1 = __power7_idle_type(type);
342         irq_set_pending_from_srr1(srr1);
343 }
344
345 void power7_idle(void)
346 {
347         if (!powersave_nap)
348                 return;
349
350         power7_idle_type(PNV_THREAD_NAP);
351 }
352
353 static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
354                                       unsigned long stop_psscr_mask)
355 {
356         unsigned long psscr;
357         unsigned long srr1;
358
359         if (!prep_irq_for_idle_irqsoff())
360                 return 0;
361
362         psscr = mfspr(SPRN_PSSCR);
363         psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
364
365         __ppc64_runlatch_off();
366         srr1 = power9_idle_stop(psscr);
367         __ppc64_runlatch_on();
368
369         fini_irq_for_idle_irqsoff();
370
371         return srr1;
372 }
373
374 void power9_idle_type(unsigned long stop_psscr_val,
375                                       unsigned long stop_psscr_mask)
376 {
377         unsigned long srr1;
378
379         srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
380         irq_set_pending_from_srr1(srr1);
381 }
382
383 /*
384  * Used for ppc_md.power_save which needs a function with no parameters
385  */
386 void power9_idle(void)
387 {
388         power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
389 }
390
391 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
392 /*
393  * This is used in working around bugs in thread reconfiguration
394  * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
395  * memory and the way that XER[SO] is checkpointed.
396  * This function forces the core into SMT4 in order by asking
397  * all other threads not to stop, and sending a message to any
398  * that are in a stop state.
399  * Must be called with preemption disabled.
400  *
401  * DO NOT call this unless cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG) is
402  * true; otherwise this function will hang the system, due to the
403  * optimization in power9_idle_stop.
404  */
405 void pnv_power9_force_smt4_catch(void)
406 {
407         int cpu, cpu0, thr;
408         struct paca_struct *tpaca;
409         int awake_threads = 1;          /* this thread is awake */
410         int poke_threads = 0;
411         int need_awake = threads_per_core;
412
413         cpu = smp_processor_id();
414         cpu0 = cpu & ~(threads_per_core - 1);
415         tpaca = &paca[cpu0];
416         for (thr = 0; thr < threads_per_core; ++thr) {
417                 if (cpu != cpu0 + thr)
418                         atomic_inc(&tpaca[thr].dont_stop);
419         }
420         /* order setting dont_stop vs testing requested_psscr */
421         mb();
422         for (thr = 0; thr < threads_per_core; ++thr) {
423                 if (!tpaca[thr].requested_psscr)
424                         ++awake_threads;
425                 else
426                         poke_threads |= (1 << thr);
427         }
428
429         /* If at least 3 threads are awake, the core is in SMT4 already */
430         if (awake_threads < need_awake) {
431                 /* We have to wake some threads; we'll use msgsnd */
432                 for (thr = 0; thr < threads_per_core; ++thr) {
433                         if (poke_threads & (1 << thr)) {
434                                 ppc_msgsnd_sync();
435                                 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
436                                            tpaca[thr].hw_cpu_id);
437                         }
438                 }
439                 /* now spin until at least 3 threads are awake */
440                 do {
441                         for (thr = 0; thr < threads_per_core; ++thr) {
442                                 if ((poke_threads & (1 << thr)) &&
443                                     !tpaca[thr].requested_psscr) {
444                                         ++awake_threads;
445                                         poke_threads &= ~(1 << thr);
446                                 }
447                         }
448                 } while (awake_threads < need_awake);
449         }
450 }
451 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
452
453 void pnv_power9_force_smt4_release(void)
454 {
455         int cpu, cpu0, thr;
456         struct paca_struct *tpaca;
457
458         cpu = smp_processor_id();
459         cpu0 = cpu & ~(threads_per_core - 1);
460         tpaca = &paca[cpu0];
461
462         /* clear all the dont_stop flags */
463         for (thr = 0; thr < threads_per_core; ++thr) {
464                 if (cpu != cpu0 + thr)
465                         atomic_dec(&tpaca[thr].dont_stop);
466         }
467 }
468 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
469 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
470
471 #ifdef CONFIG_HOTPLUG_CPU
472 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
473 {
474         u64 pir = get_hard_smp_processor_id(cpu);
475
476         mtspr(SPRN_LPCR, lpcr_val);
477
478         /*
479          * Program the LPCR via stop-api only if the deepest stop state
480          * can lose hypervisor context.
481          */
482         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
483                 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
484 }
485
486 /*
487  * pnv_cpu_offline: A function that puts the CPU into the deepest
488  * available platform idle state on a CPU-Offline.
489  * interrupts hard disabled and no lazy irq pending.
490  */
491 unsigned long pnv_cpu_offline(unsigned int cpu)
492 {
493         unsigned long srr1;
494         u32 idle_states = pnv_get_supported_cpuidle_states();
495         u64 lpcr_val;
496
497         /*
498          * We don't want to take decrementer interrupts while we are
499          * offline, so clear LPCR:PECE1. We keep PECE2 (and
500          * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
501          *
502          * If the CPU gets woken up by a special wakeup, ensure that
503          * the SLW engine sets LPCR with decrementer bit cleared, else
504          * the CPU will come back to the kernel due to a spurious
505          * wakeup.
506          */
507         lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
508         pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
509
510         __ppc64_runlatch_off();
511
512         if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
513                 unsigned long psscr;
514
515                 psscr = mfspr(SPRN_PSSCR);
516                 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
517                                                 pnv_deepest_stop_psscr_val;
518                 srr1 = power9_idle_stop(psscr);
519
520         } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
521                    (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
522                 srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
523         } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
524                    (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
525                 srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
526         } else if (idle_states & OPAL_PM_NAP_ENABLED) {
527                 srr1 = power7_idle_insn(PNV_THREAD_NAP);
528         } else {
529                 /* This is the fallback method. We emulate snooze */
530                 while (!generic_check_cpu_restart(cpu)) {
531                         HMT_low();
532                         HMT_very_low();
533                 }
534                 srr1 = 0;
535                 HMT_medium();
536         }
537
538         __ppc64_runlatch_on();
539
540         /*
541          * Re-enable decrementer interrupts in LPCR.
542          *
543          * Further, we want stop states to be woken up by decrementer
544          * for non-hotplug cases. So program the LPCR via stop api as
545          * well.
546          */
547         lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
548         pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
549
550         return srr1;
551 }
552 #endif
553
554 /*
555  * Power ISA 3.0 idle initialization.
556  *
557  * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
558  * Register (PSSCR) to control idle behavior.
559  *
560  * PSSCR layout:
561  * ----------------------------------------------------------
562  * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
563  * ----------------------------------------------------------
564  * 0      4     41   42    43   44     48    54   56    60
565  *
566  * PSSCR key fields:
567  *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
568  *      lowest power-saving state the thread entered since stop instruction was
569  *      last executed.
570  *
571  *      Bit 41 - Status Disable(SD)
572  *      0 - Shows PLS entries
573  *      1 - PLS entries are all 0
574  *
575  *      Bit 42 - Enable State Loss
576  *      0 - No state is lost irrespective of other fields
577  *      1 - Allows state loss
578  *
579  *      Bit 43 - Exit Criterion
580  *      0 - Exit from power-save mode on any interrupt
581  *      1 - Exit from power-save mode controlled by LPCR's PECE bits
582  *
583  *      Bits 44:47 - Power-Saving Level Limit
584  *      This limits the power-saving level that can be entered into.
585  *
586  *      Bits 60:63 - Requested Level
587  *      Used to specify which power-saving level must be entered on executing
588  *      stop instruction
589  */
590
591 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
592 {
593         int err = 0;
594
595         /*
596          * psscr_mask == 0xf indicates an older firmware.
597          * Set remaining fields of psscr to the default values.
598          * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
599          */
600         if (*psscr_mask == 0xf) {
601                 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
602                 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
603                 return err;
604         }
605
606         /*
607          * New firmware is expected to set the psscr_val bits correctly.
608          * Validate that the following invariants are correctly maintained by
609          * the new firmware.
610          * - ESL bit value matches the EC bit value.
611          * - ESL bit is set for all the deep stop states.
612          */
613         if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
614                 err = ERR_EC_ESL_MISMATCH;
615         } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
616                 GET_PSSCR_ESL(*psscr_val) == 0) {
617                 err = ERR_DEEP_STATE_ESL_MISMATCH;
618         }
619
620         return err;
621 }
622
623 /*
624  * pnv_arch300_idle_init: Initializes the default idle state, first
625  *                        deep idle state and deepest idle state on
626  *                        ISA 3.0 CPUs.
627  *
628  * @np: /ibm,opal/power-mgt device node
629  * @flags: cpu-idle-state-flags array
630  * @dt_idle_states: Number of idle state entries
631  * Returns 0 on success
632  */
633 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
634                                         int dt_idle_states)
635 {
636         u64 *psscr_val = NULL;
637         u64 *psscr_mask = NULL;
638         u32 *residency_ns = NULL;
639         u64 max_residency_ns = 0;
640         int rc = 0, i;
641
642         psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
643         psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
644         residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
645                                GFP_KERNEL);
646
647         if (!psscr_val || !psscr_mask || !residency_ns) {
648                 rc = -1;
649                 goto out;
650         }
651
652         if (of_property_read_u64_array(np,
653                 "ibm,cpu-idle-state-psscr",
654                 psscr_val, dt_idle_states)) {
655                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
656                 rc = -1;
657                 goto out;
658         }
659
660         if (of_property_read_u64_array(np,
661                                        "ibm,cpu-idle-state-psscr-mask",
662                                        psscr_mask, dt_idle_states)) {
663                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
664                 rc = -1;
665                 goto out;
666         }
667
668         if (of_property_read_u32_array(np,
669                                        "ibm,cpu-idle-state-residency-ns",
670                                         residency_ns, dt_idle_states)) {
671                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
672                 rc = -1;
673                 goto out;
674         }
675
676         /*
677          * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
678          * and the pnv_default_stop_{val,mask}.
679          *
680          * pnv_first_deep_stop_state should be set to the first stop
681          * level to cause hypervisor state loss.
682          *
683          * pnv_deepest_stop_{val,mask} should be set to values corresponding to
684          * the deepest stop state.
685          *
686          * pnv_default_stop_{val,mask} should be set to values corresponding to
687          * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
688          */
689         pnv_first_deep_stop_state = MAX_STOP_STATE;
690         for (i = 0; i < dt_idle_states; i++) {
691                 int err;
692                 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
693
694                 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
695                      (pnv_first_deep_stop_state > psscr_rl))
696                         pnv_first_deep_stop_state = psscr_rl;
697
698                 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
699                                               flags[i]);
700                 if (err) {
701                         report_invalid_psscr_val(psscr_val[i], err);
702                         continue;
703                 }
704
705                 if (max_residency_ns < residency_ns[i]) {
706                         max_residency_ns = residency_ns[i];
707                         pnv_deepest_stop_psscr_val = psscr_val[i];
708                         pnv_deepest_stop_psscr_mask = psscr_mask[i];
709                         pnv_deepest_stop_flag = flags[i];
710                         deepest_stop_found = true;
711                 }
712
713                 if (!default_stop_found &&
714                     (flags[i] & OPAL_PM_STOP_INST_FAST)) {
715                         pnv_default_stop_val = psscr_val[i];
716                         pnv_default_stop_mask = psscr_mask[i];
717                         default_stop_found = true;
718                 }
719         }
720
721         if (unlikely(!default_stop_found)) {
722                 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
723         } else {
724                 ppc_md.power_save = power9_idle;
725                 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
726                         pnv_default_stop_val, pnv_default_stop_mask);
727         }
728
729         if (unlikely(!deepest_stop_found)) {
730                 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
731         } else {
732                 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
733                         pnv_deepest_stop_psscr_val,
734                         pnv_deepest_stop_psscr_mask);
735         }
736
737         pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
738                 pnv_first_deep_stop_state);
739 out:
740         kfree(psscr_val);
741         kfree(psscr_mask);
742         kfree(residency_ns);
743         return rc;
744 }
745
746 /*
747  * Probe device tree for supported idle states
748  */
749 static void __init pnv_probe_idle_states(void)
750 {
751         struct device_node *np;
752         int dt_idle_states;
753         u32 *flags = NULL;
754         int i;
755
756         np = of_find_node_by_path("/ibm,opal/power-mgt");
757         if (!np) {
758                 pr_warn("opal: PowerMgmt Node not found\n");
759                 goto out;
760         }
761         dt_idle_states = of_property_count_u32_elems(np,
762                         "ibm,cpu-idle-state-flags");
763         if (dt_idle_states < 0) {
764                 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
765                 goto out;
766         }
767
768         flags = kcalloc(dt_idle_states, sizeof(*flags),  GFP_KERNEL);
769
770         if (of_property_read_u32_array(np,
771                         "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
772                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
773                 goto out;
774         }
775
776         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
777                 if (pnv_power9_idle_init(np, flags, dt_idle_states))
778                         goto out;
779         }
780
781         for (i = 0; i < dt_idle_states; i++)
782                 supported_cpuidle_states |= flags[i];
783
784 out:
785         kfree(flags);
786 }
787 static int __init pnv_init_idle_states(void)
788 {
789
790         supported_cpuidle_states = 0;
791
792         if (cpuidle_disable != IDLE_NO_OVERRIDE)
793                 goto out;
794
795         pnv_probe_idle_states();
796
797         if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
798                 patch_instruction(
799                         (unsigned int *)pnv_fastsleep_workaround_at_entry,
800                         PPC_INST_NOP);
801                 patch_instruction(
802                         (unsigned int *)pnv_fastsleep_workaround_at_exit,
803                         PPC_INST_NOP);
804         } else {
805                 /*
806                  * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
807                  * workaround is needed to use fastsleep. Provide sysfs
808                  * control to choose how this workaround has to be applied.
809                  */
810                 device_create_file(cpu_subsys.dev_root,
811                                 &dev_attr_fastsleep_workaround_applyonce);
812         }
813
814         pnv_alloc_idle_core_states();
815
816         /*
817          * For each CPU, record its PACA address in each of it's
818          * sibling thread's PACA at the slot corresponding to this
819          * CPU's index in the core.
820          */
821         if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
822                 int cpu;
823
824                 pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
825                 for_each_possible_cpu(cpu) {
826                         int base_cpu = cpu_first_thread_sibling(cpu);
827                         int idx = cpu_thread_in_core(cpu);
828                         int i;
829
830                         for (i = 0; i < threads_per_core; i++) {
831                                 int j = base_cpu + i;
832
833                                 paca[j].thread_sibling_pacas[idx] = &paca[cpu];
834                         }
835                 }
836         }
837
838         if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
839                 ppc_md.power_save = power7_idle;
840
841 out:
842         return 0;
843 }
844 machine_subsys_initcall(powernv, pnv_init_idle_states);