KVM: selftests: Restrict test region to 48-bit physical addresses when using nested
[linux-2.6-microblaze.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *      for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *      (in contrast to entering ACPI C3, in which case the WBINVD
27  *      instruction needs to be executed to flush the caches)
28  */
29
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57
58 #define INTEL_IDLE_VERSION "0.5.1"
59
60 static struct cpuidle_driver intel_idle_driver = {
61         .name = "intel_idle",
62         .owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66 static unsigned int disabled_states_mask;
67 static unsigned int preferred_states_mask;
68
69 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
70
71 static unsigned long auto_demotion_disable_flags;
72
73 static enum {
74         C1E_PROMOTION_PRESERVE,
75         C1E_PROMOTION_ENABLE,
76         C1E_PROMOTION_DISABLE
77 } c1e_promotion = C1E_PROMOTION_PRESERVE;
78
79 struct idle_cpu {
80         struct cpuidle_state *state_table;
81
82         /*
83          * Hardware C-state auto-demotion may not always be optimal.
84          * Indicate which enable bits to clear here.
85          */
86         unsigned long auto_demotion_disable_flags;
87         bool byt_auto_demotion_disable_flag;
88         bool disable_promotion_to_c1e;
89         bool use_acpi;
90 };
91
92 static const struct idle_cpu *icpu __initdata;
93 static struct cpuidle_state *cpuidle_state_table __initdata;
94
95 static unsigned int mwait_substates __initdata;
96
97 /*
98  * Enable interrupts before entering the C-state. On some platforms and for
99  * some C-states, this may measurably decrease interrupt latency.
100  */
101 #define CPUIDLE_FLAG_IRQ_ENABLE         BIT(14)
102
103 /*
104  * Enable this state by default even if the ACPI _CST does not list it.
105  */
106 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
107
108 /*
109  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
110  * the C-state (top nibble) and sub-state (bottom nibble)
111  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
112  *
113  * We store the hint at the top of our "flags" for each state.
114  */
115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
117
118 /**
119  * intel_idle - Ask the processor to enter the given idle state.
120  * @dev: cpuidle device of the target CPU.
121  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
122  * @index: Target idle state index.
123  *
124  * Use the MWAIT instruction to notify the processor that the CPU represented by
125  * @dev is idle and it can try to enter the idle state corresponding to @index.
126  *
127  * If the local APIC timer is not known to be reliable in the target idle state,
128  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
129  *
130  * Must be called under local_irq_disable().
131  */
132 static __cpuidle int intel_idle(struct cpuidle_device *dev,
133                                 struct cpuidle_driver *drv, int index)
134 {
135         struct cpuidle_state *state = &drv->states[index];
136         unsigned long eax = flg2MWAIT(state->flags);
137         unsigned long ecx = 1; /* break on interrupt flag */
138
139         if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)
140                 local_irq_enable();
141
142         mwait_idle_with_hints(eax, ecx);
143
144         return index;
145 }
146
147 /**
148  * intel_idle_s2idle - Ask the processor to enter the given idle state.
149  * @dev: cpuidle device of the target CPU.
150  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
151  * @index: Target idle state index.
152  *
153  * Use the MWAIT instruction to notify the processor that the CPU represented by
154  * @dev is idle and it can try to enter the idle state corresponding to @index.
155  *
156  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
157  * scheduler tick and suspended scheduler clock on the target CPU.
158  */
159 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
160                                        struct cpuidle_driver *drv, int index)
161 {
162         unsigned long eax = flg2MWAIT(drv->states[index].flags);
163         unsigned long ecx = 1; /* break on interrupt flag */
164
165         mwait_idle_with_hints(eax, ecx);
166
167         return 0;
168 }
169
170 /*
171  * States are indexed by the cstate number,
172  * which is also the index into the MWAIT hint array.
173  * Thus C0 is a dummy.
174  */
175 static struct cpuidle_state nehalem_cstates[] __initdata = {
176         {
177                 .name = "C1",
178                 .desc = "MWAIT 0x00",
179                 .flags = MWAIT2flg(0x00),
180                 .exit_latency = 3,
181                 .target_residency = 6,
182                 .enter = &intel_idle,
183                 .enter_s2idle = intel_idle_s2idle, },
184         {
185                 .name = "C1E",
186                 .desc = "MWAIT 0x01",
187                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
188                 .exit_latency = 10,
189                 .target_residency = 20,
190                 .enter = &intel_idle,
191                 .enter_s2idle = intel_idle_s2idle, },
192         {
193                 .name = "C3",
194                 .desc = "MWAIT 0x10",
195                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
196                 .exit_latency = 20,
197                 .target_residency = 80,
198                 .enter = &intel_idle,
199                 .enter_s2idle = intel_idle_s2idle, },
200         {
201                 .name = "C6",
202                 .desc = "MWAIT 0x20",
203                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
204                 .exit_latency = 200,
205                 .target_residency = 800,
206                 .enter = &intel_idle,
207                 .enter_s2idle = intel_idle_s2idle, },
208         {
209                 .enter = NULL }
210 };
211
212 static struct cpuidle_state snb_cstates[] __initdata = {
213         {
214                 .name = "C1",
215                 .desc = "MWAIT 0x00",
216                 .flags = MWAIT2flg(0x00),
217                 .exit_latency = 2,
218                 .target_residency = 2,
219                 .enter = &intel_idle,
220                 .enter_s2idle = intel_idle_s2idle, },
221         {
222                 .name = "C1E",
223                 .desc = "MWAIT 0x01",
224                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
225                 .exit_latency = 10,
226                 .target_residency = 20,
227                 .enter = &intel_idle,
228                 .enter_s2idle = intel_idle_s2idle, },
229         {
230                 .name = "C3",
231                 .desc = "MWAIT 0x10",
232                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
233                 .exit_latency = 80,
234                 .target_residency = 211,
235                 .enter = &intel_idle,
236                 .enter_s2idle = intel_idle_s2idle, },
237         {
238                 .name = "C6",
239                 .desc = "MWAIT 0x20",
240                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
241                 .exit_latency = 104,
242                 .target_residency = 345,
243                 .enter = &intel_idle,
244                 .enter_s2idle = intel_idle_s2idle, },
245         {
246                 .name = "C7",
247                 .desc = "MWAIT 0x30",
248                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
249                 .exit_latency = 109,
250                 .target_residency = 345,
251                 .enter = &intel_idle,
252                 .enter_s2idle = intel_idle_s2idle, },
253         {
254                 .enter = NULL }
255 };
256
257 static struct cpuidle_state byt_cstates[] __initdata = {
258         {
259                 .name = "C1",
260                 .desc = "MWAIT 0x00",
261                 .flags = MWAIT2flg(0x00),
262                 .exit_latency = 1,
263                 .target_residency = 1,
264                 .enter = &intel_idle,
265                 .enter_s2idle = intel_idle_s2idle, },
266         {
267                 .name = "C6N",
268                 .desc = "MWAIT 0x58",
269                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
270                 .exit_latency = 300,
271                 .target_residency = 275,
272                 .enter = &intel_idle,
273                 .enter_s2idle = intel_idle_s2idle, },
274         {
275                 .name = "C6S",
276                 .desc = "MWAIT 0x52",
277                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
278                 .exit_latency = 500,
279                 .target_residency = 560,
280                 .enter = &intel_idle,
281                 .enter_s2idle = intel_idle_s2idle, },
282         {
283                 .name = "C7",
284                 .desc = "MWAIT 0x60",
285                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
286                 .exit_latency = 1200,
287                 .target_residency = 4000,
288                 .enter = &intel_idle,
289                 .enter_s2idle = intel_idle_s2idle, },
290         {
291                 .name = "C7S",
292                 .desc = "MWAIT 0x64",
293                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
294                 .exit_latency = 10000,
295                 .target_residency = 20000,
296                 .enter = &intel_idle,
297                 .enter_s2idle = intel_idle_s2idle, },
298         {
299                 .enter = NULL }
300 };
301
302 static struct cpuidle_state cht_cstates[] __initdata = {
303         {
304                 .name = "C1",
305                 .desc = "MWAIT 0x00",
306                 .flags = MWAIT2flg(0x00),
307                 .exit_latency = 1,
308                 .target_residency = 1,
309                 .enter = &intel_idle,
310                 .enter_s2idle = intel_idle_s2idle, },
311         {
312                 .name = "C6N",
313                 .desc = "MWAIT 0x58",
314                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
315                 .exit_latency = 80,
316                 .target_residency = 275,
317                 .enter = &intel_idle,
318                 .enter_s2idle = intel_idle_s2idle, },
319         {
320                 .name = "C6S",
321                 .desc = "MWAIT 0x52",
322                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
323                 .exit_latency = 200,
324                 .target_residency = 560,
325                 .enter = &intel_idle,
326                 .enter_s2idle = intel_idle_s2idle, },
327         {
328                 .name = "C7",
329                 .desc = "MWAIT 0x60",
330                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
331                 .exit_latency = 1200,
332                 .target_residency = 4000,
333                 .enter = &intel_idle,
334                 .enter_s2idle = intel_idle_s2idle, },
335         {
336                 .name = "C7S",
337                 .desc = "MWAIT 0x64",
338                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
339                 .exit_latency = 10000,
340                 .target_residency = 20000,
341                 .enter = &intel_idle,
342                 .enter_s2idle = intel_idle_s2idle, },
343         {
344                 .enter = NULL }
345 };
346
347 static struct cpuidle_state ivb_cstates[] __initdata = {
348         {
349                 .name = "C1",
350                 .desc = "MWAIT 0x00",
351                 .flags = MWAIT2flg(0x00),
352                 .exit_latency = 1,
353                 .target_residency = 1,
354                 .enter = &intel_idle,
355                 .enter_s2idle = intel_idle_s2idle, },
356         {
357                 .name = "C1E",
358                 .desc = "MWAIT 0x01",
359                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
360                 .exit_latency = 10,
361                 .target_residency = 20,
362                 .enter = &intel_idle,
363                 .enter_s2idle = intel_idle_s2idle, },
364         {
365                 .name = "C3",
366                 .desc = "MWAIT 0x10",
367                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
368                 .exit_latency = 59,
369                 .target_residency = 156,
370                 .enter = &intel_idle,
371                 .enter_s2idle = intel_idle_s2idle, },
372         {
373                 .name = "C6",
374                 .desc = "MWAIT 0x20",
375                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
376                 .exit_latency = 80,
377                 .target_residency = 300,
378                 .enter = &intel_idle,
379                 .enter_s2idle = intel_idle_s2idle, },
380         {
381                 .name = "C7",
382                 .desc = "MWAIT 0x30",
383                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
384                 .exit_latency = 87,
385                 .target_residency = 300,
386                 .enter = &intel_idle,
387                 .enter_s2idle = intel_idle_s2idle, },
388         {
389                 .enter = NULL }
390 };
391
392 static struct cpuidle_state ivt_cstates[] __initdata = {
393         {
394                 .name = "C1",
395                 .desc = "MWAIT 0x00",
396                 .flags = MWAIT2flg(0x00),
397                 .exit_latency = 1,
398                 .target_residency = 1,
399                 .enter = &intel_idle,
400                 .enter_s2idle = intel_idle_s2idle, },
401         {
402                 .name = "C1E",
403                 .desc = "MWAIT 0x01",
404                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
405                 .exit_latency = 10,
406                 .target_residency = 80,
407                 .enter = &intel_idle,
408                 .enter_s2idle = intel_idle_s2idle, },
409         {
410                 .name = "C3",
411                 .desc = "MWAIT 0x10",
412                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
413                 .exit_latency = 59,
414                 .target_residency = 156,
415                 .enter = &intel_idle,
416                 .enter_s2idle = intel_idle_s2idle, },
417         {
418                 .name = "C6",
419                 .desc = "MWAIT 0x20",
420                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
421                 .exit_latency = 82,
422                 .target_residency = 300,
423                 .enter = &intel_idle,
424                 .enter_s2idle = intel_idle_s2idle, },
425         {
426                 .enter = NULL }
427 };
428
429 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
430         {
431                 .name = "C1",
432                 .desc = "MWAIT 0x00",
433                 .flags = MWAIT2flg(0x00),
434                 .exit_latency = 1,
435                 .target_residency = 1,
436                 .enter = &intel_idle,
437                 .enter_s2idle = intel_idle_s2idle, },
438         {
439                 .name = "C1E",
440                 .desc = "MWAIT 0x01",
441                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
442                 .exit_latency = 10,
443                 .target_residency = 250,
444                 .enter = &intel_idle,
445                 .enter_s2idle = intel_idle_s2idle, },
446         {
447                 .name = "C3",
448                 .desc = "MWAIT 0x10",
449                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
450                 .exit_latency = 59,
451                 .target_residency = 300,
452                 .enter = &intel_idle,
453                 .enter_s2idle = intel_idle_s2idle, },
454         {
455                 .name = "C6",
456                 .desc = "MWAIT 0x20",
457                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
458                 .exit_latency = 84,
459                 .target_residency = 400,
460                 .enter = &intel_idle,
461                 .enter_s2idle = intel_idle_s2idle, },
462         {
463                 .enter = NULL }
464 };
465
466 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
467         {
468                 .name = "C1",
469                 .desc = "MWAIT 0x00",
470                 .flags = MWAIT2flg(0x00),
471                 .exit_latency = 1,
472                 .target_residency = 1,
473                 .enter = &intel_idle,
474                 .enter_s2idle = intel_idle_s2idle, },
475         {
476                 .name = "C1E",
477                 .desc = "MWAIT 0x01",
478                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
479                 .exit_latency = 10,
480                 .target_residency = 500,
481                 .enter = &intel_idle,
482                 .enter_s2idle = intel_idle_s2idle, },
483         {
484                 .name = "C3",
485                 .desc = "MWAIT 0x10",
486                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
487                 .exit_latency = 59,
488                 .target_residency = 600,
489                 .enter = &intel_idle,
490                 .enter_s2idle = intel_idle_s2idle, },
491         {
492                 .name = "C6",
493                 .desc = "MWAIT 0x20",
494                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
495                 .exit_latency = 88,
496                 .target_residency = 700,
497                 .enter = &intel_idle,
498                 .enter_s2idle = intel_idle_s2idle, },
499         {
500                 .enter = NULL }
501 };
502
503 static struct cpuidle_state hsw_cstates[] __initdata = {
504         {
505                 .name = "C1",
506                 .desc = "MWAIT 0x00",
507                 .flags = MWAIT2flg(0x00),
508                 .exit_latency = 2,
509                 .target_residency = 2,
510                 .enter = &intel_idle,
511                 .enter_s2idle = intel_idle_s2idle, },
512         {
513                 .name = "C1E",
514                 .desc = "MWAIT 0x01",
515                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
516                 .exit_latency = 10,
517                 .target_residency = 20,
518                 .enter = &intel_idle,
519                 .enter_s2idle = intel_idle_s2idle, },
520         {
521                 .name = "C3",
522                 .desc = "MWAIT 0x10",
523                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
524                 .exit_latency = 33,
525                 .target_residency = 100,
526                 .enter = &intel_idle,
527                 .enter_s2idle = intel_idle_s2idle, },
528         {
529                 .name = "C6",
530                 .desc = "MWAIT 0x20",
531                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
532                 .exit_latency = 133,
533                 .target_residency = 400,
534                 .enter = &intel_idle,
535                 .enter_s2idle = intel_idle_s2idle, },
536         {
537                 .name = "C7s",
538                 .desc = "MWAIT 0x32",
539                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
540                 .exit_latency = 166,
541                 .target_residency = 500,
542                 .enter = &intel_idle,
543                 .enter_s2idle = intel_idle_s2idle, },
544         {
545                 .name = "C8",
546                 .desc = "MWAIT 0x40",
547                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
548                 .exit_latency = 300,
549                 .target_residency = 900,
550                 .enter = &intel_idle,
551                 .enter_s2idle = intel_idle_s2idle, },
552         {
553                 .name = "C9",
554                 .desc = "MWAIT 0x50",
555                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
556                 .exit_latency = 600,
557                 .target_residency = 1800,
558                 .enter = &intel_idle,
559                 .enter_s2idle = intel_idle_s2idle, },
560         {
561                 .name = "C10",
562                 .desc = "MWAIT 0x60",
563                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
564                 .exit_latency = 2600,
565                 .target_residency = 7700,
566                 .enter = &intel_idle,
567                 .enter_s2idle = intel_idle_s2idle, },
568         {
569                 .enter = NULL }
570 };
571 static struct cpuidle_state bdw_cstates[] __initdata = {
572         {
573                 .name = "C1",
574                 .desc = "MWAIT 0x00",
575                 .flags = MWAIT2flg(0x00),
576                 .exit_latency = 2,
577                 .target_residency = 2,
578                 .enter = &intel_idle,
579                 .enter_s2idle = intel_idle_s2idle, },
580         {
581                 .name = "C1E",
582                 .desc = "MWAIT 0x01",
583                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
584                 .exit_latency = 10,
585                 .target_residency = 20,
586                 .enter = &intel_idle,
587                 .enter_s2idle = intel_idle_s2idle, },
588         {
589                 .name = "C3",
590                 .desc = "MWAIT 0x10",
591                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
592                 .exit_latency = 40,
593                 .target_residency = 100,
594                 .enter = &intel_idle,
595                 .enter_s2idle = intel_idle_s2idle, },
596         {
597                 .name = "C6",
598                 .desc = "MWAIT 0x20",
599                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
600                 .exit_latency = 133,
601                 .target_residency = 400,
602                 .enter = &intel_idle,
603                 .enter_s2idle = intel_idle_s2idle, },
604         {
605                 .name = "C7s",
606                 .desc = "MWAIT 0x32",
607                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
608                 .exit_latency = 166,
609                 .target_residency = 500,
610                 .enter = &intel_idle,
611                 .enter_s2idle = intel_idle_s2idle, },
612         {
613                 .name = "C8",
614                 .desc = "MWAIT 0x40",
615                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
616                 .exit_latency = 300,
617                 .target_residency = 900,
618                 .enter = &intel_idle,
619                 .enter_s2idle = intel_idle_s2idle, },
620         {
621                 .name = "C9",
622                 .desc = "MWAIT 0x50",
623                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
624                 .exit_latency = 600,
625                 .target_residency = 1800,
626                 .enter = &intel_idle,
627                 .enter_s2idle = intel_idle_s2idle, },
628         {
629                 .name = "C10",
630                 .desc = "MWAIT 0x60",
631                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
632                 .exit_latency = 2600,
633                 .target_residency = 7700,
634                 .enter = &intel_idle,
635                 .enter_s2idle = intel_idle_s2idle, },
636         {
637                 .enter = NULL }
638 };
639
640 static struct cpuidle_state skl_cstates[] __initdata = {
641         {
642                 .name = "C1",
643                 .desc = "MWAIT 0x00",
644                 .flags = MWAIT2flg(0x00),
645                 .exit_latency = 2,
646                 .target_residency = 2,
647                 .enter = &intel_idle,
648                 .enter_s2idle = intel_idle_s2idle, },
649         {
650                 .name = "C1E",
651                 .desc = "MWAIT 0x01",
652                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
653                 .exit_latency = 10,
654                 .target_residency = 20,
655                 .enter = &intel_idle,
656                 .enter_s2idle = intel_idle_s2idle, },
657         {
658                 .name = "C3",
659                 .desc = "MWAIT 0x10",
660                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
661                 .exit_latency = 70,
662                 .target_residency = 100,
663                 .enter = &intel_idle,
664                 .enter_s2idle = intel_idle_s2idle, },
665         {
666                 .name = "C6",
667                 .desc = "MWAIT 0x20",
668                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
669                 .exit_latency = 85,
670                 .target_residency = 200,
671                 .enter = &intel_idle,
672                 .enter_s2idle = intel_idle_s2idle, },
673         {
674                 .name = "C7s",
675                 .desc = "MWAIT 0x33",
676                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
677                 .exit_latency = 124,
678                 .target_residency = 800,
679                 .enter = &intel_idle,
680                 .enter_s2idle = intel_idle_s2idle, },
681         {
682                 .name = "C8",
683                 .desc = "MWAIT 0x40",
684                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
685                 .exit_latency = 200,
686                 .target_residency = 800,
687                 .enter = &intel_idle,
688                 .enter_s2idle = intel_idle_s2idle, },
689         {
690                 .name = "C9",
691                 .desc = "MWAIT 0x50",
692                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
693                 .exit_latency = 480,
694                 .target_residency = 5000,
695                 .enter = &intel_idle,
696                 .enter_s2idle = intel_idle_s2idle, },
697         {
698                 .name = "C10",
699                 .desc = "MWAIT 0x60",
700                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
701                 .exit_latency = 890,
702                 .target_residency = 5000,
703                 .enter = &intel_idle,
704                 .enter_s2idle = intel_idle_s2idle, },
705         {
706                 .enter = NULL }
707 };
708
709 static struct cpuidle_state skx_cstates[] __initdata = {
710         {
711                 .name = "C1",
712                 .desc = "MWAIT 0x00",
713                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
714                 .exit_latency = 2,
715                 .target_residency = 2,
716                 .enter = &intel_idle,
717                 .enter_s2idle = intel_idle_s2idle, },
718         {
719                 .name = "C1E",
720                 .desc = "MWAIT 0x01",
721                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
722                 .exit_latency = 10,
723                 .target_residency = 20,
724                 .enter = &intel_idle,
725                 .enter_s2idle = intel_idle_s2idle, },
726         {
727                 .name = "C6",
728                 .desc = "MWAIT 0x20",
729                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
730                 .exit_latency = 133,
731                 .target_residency = 600,
732                 .enter = &intel_idle,
733                 .enter_s2idle = intel_idle_s2idle, },
734         {
735                 .enter = NULL }
736 };
737
738 static struct cpuidle_state icx_cstates[] __initdata = {
739         {
740                 .name = "C1",
741                 .desc = "MWAIT 0x00",
742                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
743                 .exit_latency = 1,
744                 .target_residency = 1,
745                 .enter = &intel_idle,
746                 .enter_s2idle = intel_idle_s2idle, },
747         {
748                 .name = "C1E",
749                 .desc = "MWAIT 0x01",
750                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
751                 .exit_latency = 4,
752                 .target_residency = 4,
753                 .enter = &intel_idle,
754                 .enter_s2idle = intel_idle_s2idle, },
755         {
756                 .name = "C6",
757                 .desc = "MWAIT 0x20",
758                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
759                 .exit_latency = 170,
760                 .target_residency = 600,
761                 .enter = &intel_idle,
762                 .enter_s2idle = intel_idle_s2idle, },
763         {
764                 .enter = NULL }
765 };
766
767 /*
768  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
769  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
770  * But in this case there is effectively no C1, because C1 requests are
771  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
772  * and C1E requests end up with C1, so there is effectively no C1E.
773  *
774  * By default we enable C1E and disable C1 by marking it with
775  * 'CPUIDLE_FLAG_UNUSABLE'.
776  */
777 static struct cpuidle_state adl_cstates[] __initdata = {
778         {
779                 .name = "C1",
780                 .desc = "MWAIT 0x00",
781                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
782                 .exit_latency = 1,
783                 .target_residency = 1,
784                 .enter = &intel_idle,
785                 .enter_s2idle = intel_idle_s2idle, },
786         {
787                 .name = "C1E",
788                 .desc = "MWAIT 0x01",
789                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
790                 .exit_latency = 2,
791                 .target_residency = 4,
792                 .enter = &intel_idle,
793                 .enter_s2idle = intel_idle_s2idle, },
794         {
795                 .name = "C6",
796                 .desc = "MWAIT 0x20",
797                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
798                 .exit_latency = 220,
799                 .target_residency = 600,
800                 .enter = &intel_idle,
801                 .enter_s2idle = intel_idle_s2idle, },
802         {
803                 .name = "C8",
804                 .desc = "MWAIT 0x40",
805                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
806                 .exit_latency = 280,
807                 .target_residency = 800,
808                 .enter = &intel_idle,
809                 .enter_s2idle = intel_idle_s2idle, },
810         {
811                 .name = "C10",
812                 .desc = "MWAIT 0x60",
813                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
814                 .exit_latency = 680,
815                 .target_residency = 2000,
816                 .enter = &intel_idle,
817                 .enter_s2idle = intel_idle_s2idle, },
818         {
819                 .enter = NULL }
820 };
821
822 static struct cpuidle_state adl_l_cstates[] __initdata = {
823         {
824                 .name = "C1",
825                 .desc = "MWAIT 0x00",
826                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
827                 .exit_latency = 1,
828                 .target_residency = 1,
829                 .enter = &intel_idle,
830                 .enter_s2idle = intel_idle_s2idle, },
831         {
832                 .name = "C1E",
833                 .desc = "MWAIT 0x01",
834                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
835                 .exit_latency = 2,
836                 .target_residency = 4,
837                 .enter = &intel_idle,
838                 .enter_s2idle = intel_idle_s2idle, },
839         {
840                 .name = "C6",
841                 .desc = "MWAIT 0x20",
842                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
843                 .exit_latency = 170,
844                 .target_residency = 500,
845                 .enter = &intel_idle,
846                 .enter_s2idle = intel_idle_s2idle, },
847         {
848                 .name = "C8",
849                 .desc = "MWAIT 0x40",
850                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
851                 .exit_latency = 200,
852                 .target_residency = 600,
853                 .enter = &intel_idle,
854                 .enter_s2idle = intel_idle_s2idle, },
855         {
856                 .name = "C10",
857                 .desc = "MWAIT 0x60",
858                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
859                 .exit_latency = 230,
860                 .target_residency = 700,
861                 .enter = &intel_idle,
862                 .enter_s2idle = intel_idle_s2idle, },
863         {
864                 .enter = NULL }
865 };
866
867 /*
868  * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
869  * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
870  * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
871  * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
872  * both C1 and C1E requests end up with C1, so there is effectively no C1E.
873  *
874  * By default we enable C1 and disable C1E by marking it with
875  * 'CPUIDLE_FLAG_UNUSABLE'.
876  */
877 static struct cpuidle_state spr_cstates[] __initdata = {
878         {
879                 .name = "C1",
880                 .desc = "MWAIT 0x00",
881                 .flags = MWAIT2flg(0x00),
882                 .exit_latency = 1,
883                 .target_residency = 1,
884                 .enter = &intel_idle,
885                 .enter_s2idle = intel_idle_s2idle, },
886         {
887                 .name = "C1E",
888                 .desc = "MWAIT 0x01",
889                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
890                                            CPUIDLE_FLAG_UNUSABLE,
891                 .exit_latency = 2,
892                 .target_residency = 4,
893                 .enter = &intel_idle,
894                 .enter_s2idle = intel_idle_s2idle, },
895         {
896                 .name = "C6",
897                 .desc = "MWAIT 0x20",
898                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
899                 .exit_latency = 290,
900                 .target_residency = 800,
901                 .enter = &intel_idle,
902                 .enter_s2idle = intel_idle_s2idle, },
903         {
904                 .enter = NULL }
905 };
906
907 static struct cpuidle_state atom_cstates[] __initdata = {
908         {
909                 .name = "C1E",
910                 .desc = "MWAIT 0x00",
911                 .flags = MWAIT2flg(0x00),
912                 .exit_latency = 10,
913                 .target_residency = 20,
914                 .enter = &intel_idle,
915                 .enter_s2idle = intel_idle_s2idle, },
916         {
917                 .name = "C2",
918                 .desc = "MWAIT 0x10",
919                 .flags = MWAIT2flg(0x10),
920                 .exit_latency = 20,
921                 .target_residency = 80,
922                 .enter = &intel_idle,
923                 .enter_s2idle = intel_idle_s2idle, },
924         {
925                 .name = "C4",
926                 .desc = "MWAIT 0x30",
927                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
928                 .exit_latency = 100,
929                 .target_residency = 400,
930                 .enter = &intel_idle,
931                 .enter_s2idle = intel_idle_s2idle, },
932         {
933                 .name = "C6",
934                 .desc = "MWAIT 0x52",
935                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
936                 .exit_latency = 140,
937                 .target_residency = 560,
938                 .enter = &intel_idle,
939                 .enter_s2idle = intel_idle_s2idle, },
940         {
941                 .enter = NULL }
942 };
943 static struct cpuidle_state tangier_cstates[] __initdata = {
944         {
945                 .name = "C1",
946                 .desc = "MWAIT 0x00",
947                 .flags = MWAIT2flg(0x00),
948                 .exit_latency = 1,
949                 .target_residency = 4,
950                 .enter = &intel_idle,
951                 .enter_s2idle = intel_idle_s2idle, },
952         {
953                 .name = "C4",
954                 .desc = "MWAIT 0x30",
955                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
956                 .exit_latency = 100,
957                 .target_residency = 400,
958                 .enter = &intel_idle,
959                 .enter_s2idle = intel_idle_s2idle, },
960         {
961                 .name = "C6",
962                 .desc = "MWAIT 0x52",
963                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
964                 .exit_latency = 140,
965                 .target_residency = 560,
966                 .enter = &intel_idle,
967                 .enter_s2idle = intel_idle_s2idle, },
968         {
969                 .name = "C7",
970                 .desc = "MWAIT 0x60",
971                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
972                 .exit_latency = 1200,
973                 .target_residency = 4000,
974                 .enter = &intel_idle,
975                 .enter_s2idle = intel_idle_s2idle, },
976         {
977                 .name = "C9",
978                 .desc = "MWAIT 0x64",
979                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
980                 .exit_latency = 10000,
981                 .target_residency = 20000,
982                 .enter = &intel_idle,
983                 .enter_s2idle = intel_idle_s2idle, },
984         {
985                 .enter = NULL }
986 };
987 static struct cpuidle_state avn_cstates[] __initdata = {
988         {
989                 .name = "C1",
990                 .desc = "MWAIT 0x00",
991                 .flags = MWAIT2flg(0x00),
992                 .exit_latency = 2,
993                 .target_residency = 2,
994                 .enter = &intel_idle,
995                 .enter_s2idle = intel_idle_s2idle, },
996         {
997                 .name = "C6",
998                 .desc = "MWAIT 0x51",
999                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1000                 .exit_latency = 15,
1001                 .target_residency = 45,
1002                 .enter = &intel_idle,
1003                 .enter_s2idle = intel_idle_s2idle, },
1004         {
1005                 .enter = NULL }
1006 };
1007 static struct cpuidle_state knl_cstates[] __initdata = {
1008         {
1009                 .name = "C1",
1010                 .desc = "MWAIT 0x00",
1011                 .flags = MWAIT2flg(0x00),
1012                 .exit_latency = 1,
1013                 .target_residency = 2,
1014                 .enter = &intel_idle,
1015                 .enter_s2idle = intel_idle_s2idle },
1016         {
1017                 .name = "C6",
1018                 .desc = "MWAIT 0x10",
1019                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1020                 .exit_latency = 120,
1021                 .target_residency = 500,
1022                 .enter = &intel_idle,
1023                 .enter_s2idle = intel_idle_s2idle },
1024         {
1025                 .enter = NULL }
1026 };
1027
1028 static struct cpuidle_state bxt_cstates[] __initdata = {
1029         {
1030                 .name = "C1",
1031                 .desc = "MWAIT 0x00",
1032                 .flags = MWAIT2flg(0x00),
1033                 .exit_latency = 2,
1034                 .target_residency = 2,
1035                 .enter = &intel_idle,
1036                 .enter_s2idle = intel_idle_s2idle, },
1037         {
1038                 .name = "C1E",
1039                 .desc = "MWAIT 0x01",
1040                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1041                 .exit_latency = 10,
1042                 .target_residency = 20,
1043                 .enter = &intel_idle,
1044                 .enter_s2idle = intel_idle_s2idle, },
1045         {
1046                 .name = "C6",
1047                 .desc = "MWAIT 0x20",
1048                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1049                 .exit_latency = 133,
1050                 .target_residency = 133,
1051                 .enter = &intel_idle,
1052                 .enter_s2idle = intel_idle_s2idle, },
1053         {
1054                 .name = "C7s",
1055                 .desc = "MWAIT 0x31",
1056                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1057                 .exit_latency = 155,
1058                 .target_residency = 155,
1059                 .enter = &intel_idle,
1060                 .enter_s2idle = intel_idle_s2idle, },
1061         {
1062                 .name = "C8",
1063                 .desc = "MWAIT 0x40",
1064                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1065                 .exit_latency = 1000,
1066                 .target_residency = 1000,
1067                 .enter = &intel_idle,
1068                 .enter_s2idle = intel_idle_s2idle, },
1069         {
1070                 .name = "C9",
1071                 .desc = "MWAIT 0x50",
1072                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1073                 .exit_latency = 2000,
1074                 .target_residency = 2000,
1075                 .enter = &intel_idle,
1076                 .enter_s2idle = intel_idle_s2idle, },
1077         {
1078                 .name = "C10",
1079                 .desc = "MWAIT 0x60",
1080                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1081                 .exit_latency = 10000,
1082                 .target_residency = 10000,
1083                 .enter = &intel_idle,
1084                 .enter_s2idle = intel_idle_s2idle, },
1085         {
1086                 .enter = NULL }
1087 };
1088
1089 static struct cpuidle_state dnv_cstates[] __initdata = {
1090         {
1091                 .name = "C1",
1092                 .desc = "MWAIT 0x00",
1093                 .flags = MWAIT2flg(0x00),
1094                 .exit_latency = 2,
1095                 .target_residency = 2,
1096                 .enter = &intel_idle,
1097                 .enter_s2idle = intel_idle_s2idle, },
1098         {
1099                 .name = "C1E",
1100                 .desc = "MWAIT 0x01",
1101                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1102                 .exit_latency = 10,
1103                 .target_residency = 20,
1104                 .enter = &intel_idle,
1105                 .enter_s2idle = intel_idle_s2idle, },
1106         {
1107                 .name = "C6",
1108                 .desc = "MWAIT 0x20",
1109                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1110                 .exit_latency = 50,
1111                 .target_residency = 500,
1112                 .enter = &intel_idle,
1113                 .enter_s2idle = intel_idle_s2idle, },
1114         {
1115                 .enter = NULL }
1116 };
1117
1118 /*
1119  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1120  * C6, and this is indicated in the CPUID mwait leaf.
1121  */
1122 static struct cpuidle_state snr_cstates[] __initdata = {
1123         {
1124                 .name = "C1",
1125                 .desc = "MWAIT 0x00",
1126                 .flags = MWAIT2flg(0x00),
1127                 .exit_latency = 2,
1128                 .target_residency = 2,
1129                 .enter = &intel_idle,
1130                 .enter_s2idle = intel_idle_s2idle, },
1131         {
1132                 .name = "C1E",
1133                 .desc = "MWAIT 0x01",
1134                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1135                 .exit_latency = 15,
1136                 .target_residency = 25,
1137                 .enter = &intel_idle,
1138                 .enter_s2idle = intel_idle_s2idle, },
1139         {
1140                 .name = "C6",
1141                 .desc = "MWAIT 0x20",
1142                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1143                 .exit_latency = 130,
1144                 .target_residency = 500,
1145                 .enter = &intel_idle,
1146                 .enter_s2idle = intel_idle_s2idle, },
1147         {
1148                 .enter = NULL }
1149 };
1150
1151 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1152         .state_table = nehalem_cstates,
1153         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1154         .disable_promotion_to_c1e = true,
1155 };
1156
1157 static const struct idle_cpu idle_cpu_nhx __initconst = {
1158         .state_table = nehalem_cstates,
1159         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1160         .disable_promotion_to_c1e = true,
1161         .use_acpi = true,
1162 };
1163
1164 static const struct idle_cpu idle_cpu_atom __initconst = {
1165         .state_table = atom_cstates,
1166 };
1167
1168 static const struct idle_cpu idle_cpu_tangier __initconst = {
1169         .state_table = tangier_cstates,
1170 };
1171
1172 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1173         .state_table = atom_cstates,
1174         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1175 };
1176
1177 static const struct idle_cpu idle_cpu_snb __initconst = {
1178         .state_table = snb_cstates,
1179         .disable_promotion_to_c1e = true,
1180 };
1181
1182 static const struct idle_cpu idle_cpu_snx __initconst = {
1183         .state_table = snb_cstates,
1184         .disable_promotion_to_c1e = true,
1185         .use_acpi = true,
1186 };
1187
1188 static const struct idle_cpu idle_cpu_byt __initconst = {
1189         .state_table = byt_cstates,
1190         .disable_promotion_to_c1e = true,
1191         .byt_auto_demotion_disable_flag = true,
1192 };
1193
1194 static const struct idle_cpu idle_cpu_cht __initconst = {
1195         .state_table = cht_cstates,
1196         .disable_promotion_to_c1e = true,
1197         .byt_auto_demotion_disable_flag = true,
1198 };
1199
1200 static const struct idle_cpu idle_cpu_ivb __initconst = {
1201         .state_table = ivb_cstates,
1202         .disable_promotion_to_c1e = true,
1203 };
1204
1205 static const struct idle_cpu idle_cpu_ivt __initconst = {
1206         .state_table = ivt_cstates,
1207         .disable_promotion_to_c1e = true,
1208         .use_acpi = true,
1209 };
1210
1211 static const struct idle_cpu idle_cpu_hsw __initconst = {
1212         .state_table = hsw_cstates,
1213         .disable_promotion_to_c1e = true,
1214 };
1215
1216 static const struct idle_cpu idle_cpu_hsx __initconst = {
1217         .state_table = hsw_cstates,
1218         .disable_promotion_to_c1e = true,
1219         .use_acpi = true,
1220 };
1221
1222 static const struct idle_cpu idle_cpu_bdw __initconst = {
1223         .state_table = bdw_cstates,
1224         .disable_promotion_to_c1e = true,
1225 };
1226
1227 static const struct idle_cpu idle_cpu_bdx __initconst = {
1228         .state_table = bdw_cstates,
1229         .disable_promotion_to_c1e = true,
1230         .use_acpi = true,
1231 };
1232
1233 static const struct idle_cpu idle_cpu_skl __initconst = {
1234         .state_table = skl_cstates,
1235         .disable_promotion_to_c1e = true,
1236 };
1237
1238 static const struct idle_cpu idle_cpu_skx __initconst = {
1239         .state_table = skx_cstates,
1240         .disable_promotion_to_c1e = true,
1241         .use_acpi = true,
1242 };
1243
1244 static const struct idle_cpu idle_cpu_icx __initconst = {
1245         .state_table = icx_cstates,
1246         .disable_promotion_to_c1e = true,
1247         .use_acpi = true,
1248 };
1249
1250 static const struct idle_cpu idle_cpu_adl __initconst = {
1251         .state_table = adl_cstates,
1252 };
1253
1254 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1255         .state_table = adl_l_cstates,
1256 };
1257
1258 static const struct idle_cpu idle_cpu_spr __initconst = {
1259         .state_table = spr_cstates,
1260         .disable_promotion_to_c1e = true,
1261         .use_acpi = true,
1262 };
1263
1264 static const struct idle_cpu idle_cpu_avn __initconst = {
1265         .state_table = avn_cstates,
1266         .disable_promotion_to_c1e = true,
1267         .use_acpi = true,
1268 };
1269
1270 static const struct idle_cpu idle_cpu_knl __initconst = {
1271         .state_table = knl_cstates,
1272         .use_acpi = true,
1273 };
1274
1275 static const struct idle_cpu idle_cpu_bxt __initconst = {
1276         .state_table = bxt_cstates,
1277         .disable_promotion_to_c1e = true,
1278 };
1279
1280 static const struct idle_cpu idle_cpu_dnv __initconst = {
1281         .state_table = dnv_cstates,
1282         .disable_promotion_to_c1e = true,
1283         .use_acpi = true,
1284 };
1285
1286 static const struct idle_cpu idle_cpu_snr __initconst = {
1287         .state_table = snr_cstates,
1288         .disable_promotion_to_c1e = true,
1289         .use_acpi = true,
1290 };
1291
1292 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1293         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &idle_cpu_nhx),
1294         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &idle_cpu_nehalem),
1295         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &idle_cpu_nehalem),
1296         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &idle_cpu_nehalem),
1297         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &idle_cpu_nhx),
1298         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &idle_cpu_nhx),
1299         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &idle_cpu_atom),
1300         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1301         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &idle_cpu_nhx),
1302         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &idle_cpu_snb),
1303         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &idle_cpu_snx),
1304         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &idle_cpu_atom),
1305         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &idle_cpu_byt),
1306         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1307         X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &idle_cpu_cht),
1308         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &idle_cpu_ivb),
1309         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &idle_cpu_ivt),
1310         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &idle_cpu_hsw),
1311         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &idle_cpu_hsx),
1312         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &idle_cpu_hsw),
1313         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &idle_cpu_hsw),
1314         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1315         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &idle_cpu_bdw),
1316         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &idle_cpu_bdw),
1317         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &idle_cpu_bdx),
1318         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &idle_cpu_bdx),
1319         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &idle_cpu_skl),
1320         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &idle_cpu_skl),
1321         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &idle_cpu_skl),
1322         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &idle_cpu_skl),
1323         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
1324         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
1325         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
1326         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &idle_cpu_adl),
1327         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &idle_cpu_adl_l),
1328         X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
1329         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
1330         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
1331         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &idle_cpu_bxt),
1332         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1333         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &idle_cpu_dnv),
1334         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &idle_cpu_snr),
1335         {}
1336 };
1337
1338 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1339         X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1340         {}
1341 };
1342
1343 static bool __init intel_idle_max_cstate_reached(int cstate)
1344 {
1345         if (cstate + 1 > max_cstate) {
1346                 pr_info("max_cstate %d reached\n", max_cstate);
1347                 return true;
1348         }
1349         return false;
1350 }
1351
1352 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1353 {
1354         unsigned long eax = flg2MWAIT(state->flags);
1355
1356         if (boot_cpu_has(X86_FEATURE_ARAT))
1357                 return false;
1358
1359         /*
1360          * Switch over to one-shot tick broadcast if the target C-state
1361          * is deeper than C1.
1362          */
1363         return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1364 }
1365
1366 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1367 #include <acpi/processor.h>
1368
1369 static bool no_acpi __read_mostly;
1370 module_param(no_acpi, bool, 0444);
1371 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1372
1373 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1374 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1375 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1376
1377 static struct acpi_processor_power acpi_state_table __initdata;
1378
1379 /**
1380  * intel_idle_cst_usable - Check if the _CST information can be used.
1381  *
1382  * Check if all of the C-states listed by _CST in the max_cstate range are
1383  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1384  */
1385 static bool __init intel_idle_cst_usable(void)
1386 {
1387         int cstate, limit;
1388
1389         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1390                       acpi_state_table.count);
1391
1392         for (cstate = 1; cstate < limit; cstate++) {
1393                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1394
1395                 if (cx->entry_method != ACPI_CSTATE_FFH)
1396                         return false;
1397         }
1398
1399         return true;
1400 }
1401
1402 static bool __init intel_idle_acpi_cst_extract(void)
1403 {
1404         unsigned int cpu;
1405
1406         if (no_acpi) {
1407                 pr_debug("Not allowed to use ACPI _CST\n");
1408                 return false;
1409         }
1410
1411         for_each_possible_cpu(cpu) {
1412                 struct acpi_processor *pr = per_cpu(processors, cpu);
1413
1414                 if (!pr)
1415                         continue;
1416
1417                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1418                         continue;
1419
1420                 acpi_state_table.count++;
1421
1422                 if (!intel_idle_cst_usable())
1423                         continue;
1424
1425                 if (!acpi_processor_claim_cst_control())
1426                         break;
1427
1428                 return true;
1429         }
1430
1431         acpi_state_table.count = 0;
1432         pr_debug("ACPI _CST not found or not usable\n");
1433         return false;
1434 }
1435
1436 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1437 {
1438         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1439
1440         /*
1441          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1442          * the interesting states are ACPI_CSTATE_FFH.
1443          */
1444         for (cstate = 1; cstate < limit; cstate++) {
1445                 struct acpi_processor_cx *cx;
1446                 struct cpuidle_state *state;
1447
1448                 if (intel_idle_max_cstate_reached(cstate - 1))
1449                         break;
1450
1451                 cx = &acpi_state_table.states[cstate];
1452
1453                 state = &drv->states[drv->state_count++];
1454
1455                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1456                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1457                 state->exit_latency = cx->latency;
1458                 /*
1459                  * For C1-type C-states use the same number for both the exit
1460                  * latency and target residency, because that is the case for
1461                  * C1 in the majority of the static C-states tables above.
1462                  * For the other types of C-states, however, set the target
1463                  * residency to 3 times the exit latency which should lead to
1464                  * a reasonable balance between energy-efficiency and
1465                  * performance in the majority of interesting cases.
1466                  */
1467                 state->target_residency = cx->latency;
1468                 if (cx->type > ACPI_STATE_C1)
1469                         state->target_residency *= 3;
1470
1471                 state->flags = MWAIT2flg(cx->address);
1472                 if (cx->type > ACPI_STATE_C2)
1473                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1474
1475                 if (disabled_states_mask & BIT(cstate))
1476                         state->flags |= CPUIDLE_FLAG_OFF;
1477
1478                 if (intel_idle_state_needs_timer_stop(state))
1479                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1480
1481                 state->enter = intel_idle;
1482                 state->enter_s2idle = intel_idle_s2idle;
1483         }
1484 }
1485
1486 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1487 {
1488         int cstate, limit;
1489
1490         /*
1491          * If there are no _CST C-states, do not disable any C-states by
1492          * default.
1493          */
1494         if (!acpi_state_table.count)
1495                 return false;
1496
1497         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1498         /*
1499          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1500          * the interesting states are ACPI_CSTATE_FFH.
1501          */
1502         for (cstate = 1; cstate < limit; cstate++) {
1503                 if (acpi_state_table.states[cstate].address == mwait_hint)
1504                         return false;
1505         }
1506         return true;
1507 }
1508 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1509 #define force_use_acpi  (false)
1510
1511 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1512 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1513 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1514 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1515
1516 /**
1517  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1518  *
1519  * Tune IVT multi-socket targets.
1520  * Assumption: num_sockets == (max_package_num + 1).
1521  */
1522 static void __init ivt_idle_state_table_update(void)
1523 {
1524         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1525         int cpu, package_num, num_sockets = 1;
1526
1527         for_each_online_cpu(cpu) {
1528                 package_num = topology_physical_package_id(cpu);
1529                 if (package_num + 1 > num_sockets) {
1530                         num_sockets = package_num + 1;
1531
1532                         if (num_sockets > 4) {
1533                                 cpuidle_state_table = ivt_cstates_8s;
1534                                 return;
1535                         }
1536                 }
1537         }
1538
1539         if (num_sockets > 2)
1540                 cpuidle_state_table = ivt_cstates_4s;
1541
1542         /* else, 1 and 2 socket systems use default ivt_cstates */
1543 }
1544
1545 /**
1546  * irtl_2_usec - IRTL to microseconds conversion.
1547  * @irtl: IRTL MSR value.
1548  *
1549  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1550  */
1551 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1552 {
1553         static const unsigned int irtl_ns_units[] __initconst = {
1554                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1555         };
1556         unsigned long long ns;
1557
1558         if (!irtl)
1559                 return 0;
1560
1561         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1562
1563         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1564 }
1565
1566 /**
1567  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1568  *
1569  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1570  * definitive maximum latency and use the same value for target_residency.
1571  */
1572 static void __init bxt_idle_state_table_update(void)
1573 {
1574         unsigned long long msr;
1575         unsigned int usec;
1576
1577         rdmsrl(MSR_PKGC6_IRTL, msr);
1578         usec = irtl_2_usec(msr);
1579         if (usec) {
1580                 bxt_cstates[2].exit_latency = usec;
1581                 bxt_cstates[2].target_residency = usec;
1582         }
1583
1584         rdmsrl(MSR_PKGC7_IRTL, msr);
1585         usec = irtl_2_usec(msr);
1586         if (usec) {
1587                 bxt_cstates[3].exit_latency = usec;
1588                 bxt_cstates[3].target_residency = usec;
1589         }
1590
1591         rdmsrl(MSR_PKGC8_IRTL, msr);
1592         usec = irtl_2_usec(msr);
1593         if (usec) {
1594                 bxt_cstates[4].exit_latency = usec;
1595                 bxt_cstates[4].target_residency = usec;
1596         }
1597
1598         rdmsrl(MSR_PKGC9_IRTL, msr);
1599         usec = irtl_2_usec(msr);
1600         if (usec) {
1601                 bxt_cstates[5].exit_latency = usec;
1602                 bxt_cstates[5].target_residency = usec;
1603         }
1604
1605         rdmsrl(MSR_PKGC10_IRTL, msr);
1606         usec = irtl_2_usec(msr);
1607         if (usec) {
1608                 bxt_cstates[6].exit_latency = usec;
1609                 bxt_cstates[6].target_residency = usec;
1610         }
1611
1612 }
1613
1614 /**
1615  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1616  *
1617  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1618  */
1619 static void __init sklh_idle_state_table_update(void)
1620 {
1621         unsigned long long msr;
1622         unsigned int eax, ebx, ecx, edx;
1623
1624
1625         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1626         if (max_cstate <= 7)
1627                 return;
1628
1629         /* if PC10 not present in CPUID.MWAIT.EDX */
1630         if ((mwait_substates & (0xF << 28)) == 0)
1631                 return;
1632
1633         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1634
1635         /* PC10 is not enabled in PKG C-state limit */
1636         if ((msr & 0xF) != 8)
1637                 return;
1638
1639         ecx = 0;
1640         cpuid(7, &eax, &ebx, &ecx, &edx);
1641
1642         /* if SGX is present */
1643         if (ebx & (1 << 2)) {
1644
1645                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1646
1647                 /* if SGX is enabled */
1648                 if (msr & (1 << 18))
1649                         return;
1650         }
1651
1652         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1653         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1654 }
1655
1656 /**
1657  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1658  * idle states table.
1659  */
1660 static void __init skx_idle_state_table_update(void)
1661 {
1662         unsigned long long msr;
1663
1664         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1665
1666         /*
1667          * 000b: C0/C1 (no package C-state support)
1668          * 001b: C2
1669          * 010b: C6 (non-retention)
1670          * 011b: C6 (retention)
1671          * 111b: No Package C state limits.
1672          */
1673         if ((msr & 0x7) < 2) {
1674                 /*
1675                  * Uses the CC6 + PC0 latency and 3 times of
1676                  * latency for target_residency if the PC6
1677                  * is disabled in BIOS. This is consistent
1678                  * with how intel_idle driver uses _CST
1679                  * to set the target_residency.
1680                  */
1681                 skx_cstates[2].exit_latency = 92;
1682                 skx_cstates[2].target_residency = 276;
1683         }
1684 }
1685
1686 /**
1687  * adl_idle_state_table_update - Adjust AlderLake idle states table.
1688  */
1689 static void __init adl_idle_state_table_update(void)
1690 {
1691         /* Check if user prefers C1 over C1E. */
1692         if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1693                 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1694                 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1695
1696                 /* Disable C1E by clearing the "C1E promotion" bit. */
1697                 c1e_promotion = C1E_PROMOTION_DISABLE;
1698                 return;
1699         }
1700
1701         /* Make sure C1E is enabled by default */
1702         c1e_promotion = C1E_PROMOTION_ENABLE;
1703 }
1704
1705 /**
1706  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1707  */
1708 static void __init spr_idle_state_table_update(void)
1709 {
1710         unsigned long long msr;
1711
1712         /* Check if user prefers C1E over C1. */
1713         if ((preferred_states_mask & BIT(2)) &&
1714             !(preferred_states_mask & BIT(1))) {
1715                 /* Disable C1 and enable C1E. */
1716                 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
1717                 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1718
1719                 /* Enable C1E using the "C1E promotion" bit. */
1720                 c1e_promotion = C1E_PROMOTION_ENABLE;
1721         }
1722
1723         /*
1724          * By default, the C6 state assumes the worst-case scenario of package
1725          * C6. However, if PC6 is disabled, we update the numbers to match
1726          * core C6.
1727          */
1728         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1729
1730         /* Limit value 2 and above allow for PC6. */
1731         if ((msr & 0x7) < 2) {
1732                 spr_cstates[2].exit_latency = 190;
1733                 spr_cstates[2].target_residency = 600;
1734         }
1735 }
1736
1737 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1738 {
1739         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1740         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1741                                         MWAIT_SUBSTATE_MASK;
1742
1743         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1744         if (num_substates == 0)
1745                 return false;
1746
1747         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1748                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1749
1750         return true;
1751 }
1752
1753 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1754 {
1755         int cstate;
1756
1757         switch (boot_cpu_data.x86_model) {
1758         case INTEL_FAM6_IVYBRIDGE_X:
1759                 ivt_idle_state_table_update();
1760                 break;
1761         case INTEL_FAM6_ATOM_GOLDMONT:
1762         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1763                 bxt_idle_state_table_update();
1764                 break;
1765         case INTEL_FAM6_SKYLAKE:
1766                 sklh_idle_state_table_update();
1767                 break;
1768         case INTEL_FAM6_SKYLAKE_X:
1769                 skx_idle_state_table_update();
1770                 break;
1771         case INTEL_FAM6_SAPPHIRERAPIDS_X:
1772                 spr_idle_state_table_update();
1773                 break;
1774         case INTEL_FAM6_ALDERLAKE:
1775         case INTEL_FAM6_ALDERLAKE_L:
1776                 adl_idle_state_table_update();
1777                 break;
1778         }
1779
1780         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1781                 unsigned int mwait_hint;
1782
1783                 if (intel_idle_max_cstate_reached(cstate))
1784                         break;
1785
1786                 if (!cpuidle_state_table[cstate].enter &&
1787                     !cpuidle_state_table[cstate].enter_s2idle)
1788                         break;
1789
1790                 /* If marked as unusable, skip this state. */
1791                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1792                         pr_debug("state %s is disabled\n",
1793                                  cpuidle_state_table[cstate].name);
1794                         continue;
1795                 }
1796
1797                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1798                 if (!intel_idle_verify_cstate(mwait_hint))
1799                         continue;
1800
1801                 /* Structure copy. */
1802                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1803
1804                 if ((disabled_states_mask & BIT(drv->state_count)) ||
1805                     ((icpu->use_acpi || force_use_acpi) &&
1806                      intel_idle_off_by_default(mwait_hint) &&
1807                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1808                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1809
1810                 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1811                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1812
1813                 drv->state_count++;
1814         }
1815
1816         if (icpu->byt_auto_demotion_disable_flag) {
1817                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1818                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1819         }
1820 }
1821
1822 /**
1823  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1824  * @drv: cpuidle driver structure to initialize.
1825  */
1826 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1827 {
1828         cpuidle_poll_state_init(drv);
1829
1830         if (disabled_states_mask & BIT(0))
1831                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1832
1833         drv->state_count = 1;
1834
1835         if (icpu)
1836                 intel_idle_init_cstates_icpu(drv);
1837         else
1838                 intel_idle_init_cstates_acpi(drv);
1839 }
1840
1841 static void auto_demotion_disable(void)
1842 {
1843         unsigned long long msr_bits;
1844
1845         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1846         msr_bits &= ~auto_demotion_disable_flags;
1847         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1848 }
1849
1850 static void c1e_promotion_enable(void)
1851 {
1852         unsigned long long msr_bits;
1853
1854         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1855         msr_bits |= 0x2;
1856         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1857 }
1858
1859 static void c1e_promotion_disable(void)
1860 {
1861         unsigned long long msr_bits;
1862
1863         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1864         msr_bits &= ~0x2;
1865         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1866 }
1867
1868 /**
1869  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1870  * @cpu: CPU to initialize.
1871  *
1872  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1873  * with the processor model flags.
1874  */
1875 static int intel_idle_cpu_init(unsigned int cpu)
1876 {
1877         struct cpuidle_device *dev;
1878
1879         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1880         dev->cpu = cpu;
1881
1882         if (cpuidle_register_device(dev)) {
1883                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1884                 return -EIO;
1885         }
1886
1887         if (auto_demotion_disable_flags)
1888                 auto_demotion_disable();
1889
1890         if (c1e_promotion == C1E_PROMOTION_ENABLE)
1891                 c1e_promotion_enable();
1892         else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1893                 c1e_promotion_disable();
1894
1895         return 0;
1896 }
1897
1898 static int intel_idle_cpu_online(unsigned int cpu)
1899 {
1900         struct cpuidle_device *dev;
1901
1902         if (!boot_cpu_has(X86_FEATURE_ARAT))
1903                 tick_broadcast_enable();
1904
1905         /*
1906          * Some systems can hotplug a cpu at runtime after
1907          * the kernel has booted, we have to initialize the
1908          * driver in this case
1909          */
1910         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1911         if (!dev->registered)
1912                 return intel_idle_cpu_init(cpu);
1913
1914         return 0;
1915 }
1916
1917 /**
1918  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1919  */
1920 static void __init intel_idle_cpuidle_devices_uninit(void)
1921 {
1922         int i;
1923
1924         for_each_online_cpu(i)
1925                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1926 }
1927
1928 static int __init intel_idle_init(void)
1929 {
1930         const struct x86_cpu_id *id;
1931         unsigned int eax, ebx, ecx;
1932         int retval;
1933
1934         /* Do not load intel_idle at all for now if idle= is passed */
1935         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1936                 return -ENODEV;
1937
1938         if (max_cstate == 0) {
1939                 pr_debug("disabled\n");
1940                 return -EPERM;
1941         }
1942
1943         id = x86_match_cpu(intel_idle_ids);
1944         if (id) {
1945                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1946                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1947                         return -ENODEV;
1948                 }
1949         } else {
1950                 id = x86_match_cpu(intel_mwait_ids);
1951                 if (!id)
1952                         return -ENODEV;
1953         }
1954
1955         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1956                 return -ENODEV;
1957
1958         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1959
1960         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1961             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1962             !mwait_substates)
1963                         return -ENODEV;
1964
1965         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1966
1967         icpu = (const struct idle_cpu *)id->driver_data;
1968         if (icpu) {
1969                 cpuidle_state_table = icpu->state_table;
1970                 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1971                 if (icpu->disable_promotion_to_c1e)
1972                         c1e_promotion = C1E_PROMOTION_DISABLE;
1973                 if (icpu->use_acpi || force_use_acpi)
1974                         intel_idle_acpi_cst_extract();
1975         } else if (!intel_idle_acpi_cst_extract()) {
1976                 return -ENODEV;
1977         }
1978
1979         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1980                  boot_cpu_data.x86_model);
1981
1982         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1983         if (!intel_idle_cpuidle_devices)
1984                 return -ENOMEM;
1985
1986         intel_idle_cpuidle_driver_init(&intel_idle_driver);
1987
1988         retval = cpuidle_register_driver(&intel_idle_driver);
1989         if (retval) {
1990                 struct cpuidle_driver *drv = cpuidle_get_driver();
1991                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1992                        drv ? drv->name : "none");
1993                 goto init_driver_fail;
1994         }
1995
1996         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1997                                    intel_idle_cpu_online, NULL);
1998         if (retval < 0)
1999                 goto hp_setup_fail;
2000
2001         pr_debug("Local APIC timer is reliable in %s\n",
2002                  boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2003
2004         return 0;
2005
2006 hp_setup_fail:
2007         intel_idle_cpuidle_devices_uninit();
2008         cpuidle_unregister_driver(&intel_idle_driver);
2009 init_driver_fail:
2010         free_percpu(intel_idle_cpuidle_devices);
2011         return retval;
2012
2013 }
2014 device_initcall(intel_idle_init);
2015
2016 /*
2017  * We are not really modular, but we used to support that.  Meaning we also
2018  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2019  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2020  * is the easiest way (currently) to continue doing that.
2021  */
2022 module_param(max_cstate, int, 0444);
2023 /*
2024  * The positions of the bits that are set in this number are the indices of the
2025  * idle states to be disabled by default (as reflected by the names of the
2026  * corresponding idle state directories in sysfs, "state0", "state1" ...
2027  * "state<i>" ..., where <i> is the index of the given state).
2028  */
2029 module_param_named(states_off, disabled_states_mask, uint, 0444);
2030 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2031 /*
2032  * Some platforms come with mutually exclusive C-states, so that if one is
2033  * enabled, the other C-states must not be used. Example: C1 and C1E on
2034  * Sapphire Rapids platform. This parameter allows for selecting the
2035  * preferred C-states among the groups of mutually exclusive C-states - the
2036  * selected C-states will be registered, the other C-states from the mutually
2037  * exclusive group won't be registered. If the platform has no mutually
2038  * exclusive C-states, this parameter has no effect.
2039  */
2040 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2041 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");