d79335506ecd3c3f5aa3beabde8e0dd74cef10a3
[linux-2.6-microblaze.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *      for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *      (in contrast to entering ACPI C3, in which case the WBINVD
27  *      instruction needs to be executed to flush the caches)
28  */
29
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 #define DEBUG
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57
58 #define INTEL_IDLE_VERSION "0.5.1"
59
60 static struct cpuidle_driver intel_idle_driver = {
61         .name = "intel_idle",
62         .owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66 static unsigned int disabled_states_mask;
67
68 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
69
70 static unsigned long auto_demotion_disable_flags;
71 static bool disable_promotion_to_c1e;
72
73 struct idle_cpu {
74         struct cpuidle_state *state_table;
75
76         /*
77          * Hardware C-state auto-demotion may not always be optimal.
78          * Indicate which enable bits to clear here.
79          */
80         unsigned long auto_demotion_disable_flags;
81         bool byt_auto_demotion_disable_flag;
82         bool disable_promotion_to_c1e;
83         bool use_acpi;
84 };
85
86 static const struct idle_cpu *icpu __initdata;
87 static struct cpuidle_state *cpuidle_state_table __initdata;
88
89 static unsigned int mwait_substates __initdata;
90
91 /*
92  * Enable this state by default even if the ACPI _CST does not list it.
93  */
94 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
95
96 /*
97  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
98  * the C-state (top nibble) and sub-state (bottom nibble)
99  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
100  *
101  * We store the hint at the top of our "flags" for each state.
102  */
103 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
104 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
105
106 /**
107  * intel_idle - Ask the processor to enter the given idle state.
108  * @dev: cpuidle device of the target CPU.
109  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
110  * @index: Target idle state index.
111  *
112  * Use the MWAIT instruction to notify the processor that the CPU represented by
113  * @dev is idle and it can try to enter the idle state corresponding to @index.
114  *
115  * If the local APIC timer is not known to be reliable in the target idle state,
116  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
117  *
118  * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
119  * flushing user TLBs.
120  *
121  * Must be called under local_irq_disable().
122  */
123 static __cpuidle int intel_idle(struct cpuidle_device *dev,
124                                 struct cpuidle_driver *drv, int index)
125 {
126         struct cpuidle_state *state = &drv->states[index];
127         unsigned long eax = flg2MWAIT(state->flags);
128         unsigned long ecx = 1; /* break on interrupt flag */
129
130         mwait_idle_with_hints(eax, ecx);
131
132         return index;
133 }
134
135 /**
136  * intel_idle_s2idle - Ask the processor to enter the given idle state.
137  * @dev: cpuidle device of the target CPU.
138  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
139  * @index: Target idle state index.
140  *
141  * Use the MWAIT instruction to notify the processor that the CPU represented by
142  * @dev is idle and it can try to enter the idle state corresponding to @index.
143  *
144  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
145  * scheduler tick and suspended scheduler clock on the target CPU.
146  */
147 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
148                                        struct cpuidle_driver *drv, int index)
149 {
150         unsigned long eax = flg2MWAIT(drv->states[index].flags);
151         unsigned long ecx = 1; /* break on interrupt flag */
152
153         mwait_idle_with_hints(eax, ecx);
154
155         return 0;
156 }
157
158 /*
159  * States are indexed by the cstate number,
160  * which is also the index into the MWAIT hint array.
161  * Thus C0 is a dummy.
162  */
163 static struct cpuidle_state nehalem_cstates[] __initdata = {
164         {
165                 .name = "C1",
166                 .desc = "MWAIT 0x00",
167                 .flags = MWAIT2flg(0x00),
168                 .exit_latency = 3,
169                 .target_residency = 6,
170                 .enter = &intel_idle,
171                 .enter_s2idle = intel_idle_s2idle, },
172         {
173                 .name = "C1E",
174                 .desc = "MWAIT 0x01",
175                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
176                 .exit_latency = 10,
177                 .target_residency = 20,
178                 .enter = &intel_idle,
179                 .enter_s2idle = intel_idle_s2idle, },
180         {
181                 .name = "C3",
182                 .desc = "MWAIT 0x10",
183                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
184                 .exit_latency = 20,
185                 .target_residency = 80,
186                 .enter = &intel_idle,
187                 .enter_s2idle = intel_idle_s2idle, },
188         {
189                 .name = "C6",
190                 .desc = "MWAIT 0x20",
191                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
192                 .exit_latency = 200,
193                 .target_residency = 800,
194                 .enter = &intel_idle,
195                 .enter_s2idle = intel_idle_s2idle, },
196         {
197                 .enter = NULL }
198 };
199
200 static struct cpuidle_state snb_cstates[] __initdata = {
201         {
202                 .name = "C1",
203                 .desc = "MWAIT 0x00",
204                 .flags = MWAIT2flg(0x00),
205                 .exit_latency = 2,
206                 .target_residency = 2,
207                 .enter = &intel_idle,
208                 .enter_s2idle = intel_idle_s2idle, },
209         {
210                 .name = "C1E",
211                 .desc = "MWAIT 0x01",
212                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
213                 .exit_latency = 10,
214                 .target_residency = 20,
215                 .enter = &intel_idle,
216                 .enter_s2idle = intel_idle_s2idle, },
217         {
218                 .name = "C3",
219                 .desc = "MWAIT 0x10",
220                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
221                 .exit_latency = 80,
222                 .target_residency = 211,
223                 .enter = &intel_idle,
224                 .enter_s2idle = intel_idle_s2idle, },
225         {
226                 .name = "C6",
227                 .desc = "MWAIT 0x20",
228                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
229                 .exit_latency = 104,
230                 .target_residency = 345,
231                 .enter = &intel_idle,
232                 .enter_s2idle = intel_idle_s2idle, },
233         {
234                 .name = "C7",
235                 .desc = "MWAIT 0x30",
236                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
237                 .exit_latency = 109,
238                 .target_residency = 345,
239                 .enter = &intel_idle,
240                 .enter_s2idle = intel_idle_s2idle, },
241         {
242                 .enter = NULL }
243 };
244
245 static struct cpuidle_state byt_cstates[] __initdata = {
246         {
247                 .name = "C1",
248                 .desc = "MWAIT 0x00",
249                 .flags = MWAIT2flg(0x00),
250                 .exit_latency = 1,
251                 .target_residency = 1,
252                 .enter = &intel_idle,
253                 .enter_s2idle = intel_idle_s2idle, },
254         {
255                 .name = "C6N",
256                 .desc = "MWAIT 0x58",
257                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
258                 .exit_latency = 300,
259                 .target_residency = 275,
260                 .enter = &intel_idle,
261                 .enter_s2idle = intel_idle_s2idle, },
262         {
263                 .name = "C6S",
264                 .desc = "MWAIT 0x52",
265                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
266                 .exit_latency = 500,
267                 .target_residency = 560,
268                 .enter = &intel_idle,
269                 .enter_s2idle = intel_idle_s2idle, },
270         {
271                 .name = "C7",
272                 .desc = "MWAIT 0x60",
273                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
274                 .exit_latency = 1200,
275                 .target_residency = 4000,
276                 .enter = &intel_idle,
277                 .enter_s2idle = intel_idle_s2idle, },
278         {
279                 .name = "C7S",
280                 .desc = "MWAIT 0x64",
281                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
282                 .exit_latency = 10000,
283                 .target_residency = 20000,
284                 .enter = &intel_idle,
285                 .enter_s2idle = intel_idle_s2idle, },
286         {
287                 .enter = NULL }
288 };
289
290 static struct cpuidle_state cht_cstates[] __initdata = {
291         {
292                 .name = "C1",
293                 .desc = "MWAIT 0x00",
294                 .flags = MWAIT2flg(0x00),
295                 .exit_latency = 1,
296                 .target_residency = 1,
297                 .enter = &intel_idle,
298                 .enter_s2idle = intel_idle_s2idle, },
299         {
300                 .name = "C6N",
301                 .desc = "MWAIT 0x58",
302                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
303                 .exit_latency = 80,
304                 .target_residency = 275,
305                 .enter = &intel_idle,
306                 .enter_s2idle = intel_idle_s2idle, },
307         {
308                 .name = "C6S",
309                 .desc = "MWAIT 0x52",
310                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
311                 .exit_latency = 200,
312                 .target_residency = 560,
313                 .enter = &intel_idle,
314                 .enter_s2idle = intel_idle_s2idle, },
315         {
316                 .name = "C7",
317                 .desc = "MWAIT 0x60",
318                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
319                 .exit_latency = 1200,
320                 .target_residency = 4000,
321                 .enter = &intel_idle,
322                 .enter_s2idle = intel_idle_s2idle, },
323         {
324                 .name = "C7S",
325                 .desc = "MWAIT 0x64",
326                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
327                 .exit_latency = 10000,
328                 .target_residency = 20000,
329                 .enter = &intel_idle,
330                 .enter_s2idle = intel_idle_s2idle, },
331         {
332                 .enter = NULL }
333 };
334
335 static struct cpuidle_state ivb_cstates[] __initdata = {
336         {
337                 .name = "C1",
338                 .desc = "MWAIT 0x00",
339                 .flags = MWAIT2flg(0x00),
340                 .exit_latency = 1,
341                 .target_residency = 1,
342                 .enter = &intel_idle,
343                 .enter_s2idle = intel_idle_s2idle, },
344         {
345                 .name = "C1E",
346                 .desc = "MWAIT 0x01",
347                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
348                 .exit_latency = 10,
349                 .target_residency = 20,
350                 .enter = &intel_idle,
351                 .enter_s2idle = intel_idle_s2idle, },
352         {
353                 .name = "C3",
354                 .desc = "MWAIT 0x10",
355                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
356                 .exit_latency = 59,
357                 .target_residency = 156,
358                 .enter = &intel_idle,
359                 .enter_s2idle = intel_idle_s2idle, },
360         {
361                 .name = "C6",
362                 .desc = "MWAIT 0x20",
363                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
364                 .exit_latency = 80,
365                 .target_residency = 300,
366                 .enter = &intel_idle,
367                 .enter_s2idle = intel_idle_s2idle, },
368         {
369                 .name = "C7",
370                 .desc = "MWAIT 0x30",
371                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
372                 .exit_latency = 87,
373                 .target_residency = 300,
374                 .enter = &intel_idle,
375                 .enter_s2idle = intel_idle_s2idle, },
376         {
377                 .enter = NULL }
378 };
379
380 static struct cpuidle_state ivt_cstates[] __initdata = {
381         {
382                 .name = "C1",
383                 .desc = "MWAIT 0x00",
384                 .flags = MWAIT2flg(0x00),
385                 .exit_latency = 1,
386                 .target_residency = 1,
387                 .enter = &intel_idle,
388                 .enter_s2idle = intel_idle_s2idle, },
389         {
390                 .name = "C1E",
391                 .desc = "MWAIT 0x01",
392                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
393                 .exit_latency = 10,
394                 .target_residency = 80,
395                 .enter = &intel_idle,
396                 .enter_s2idle = intel_idle_s2idle, },
397         {
398                 .name = "C3",
399                 .desc = "MWAIT 0x10",
400                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
401                 .exit_latency = 59,
402                 .target_residency = 156,
403                 .enter = &intel_idle,
404                 .enter_s2idle = intel_idle_s2idle, },
405         {
406                 .name = "C6",
407                 .desc = "MWAIT 0x20",
408                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
409                 .exit_latency = 82,
410                 .target_residency = 300,
411                 .enter = &intel_idle,
412                 .enter_s2idle = intel_idle_s2idle, },
413         {
414                 .enter = NULL }
415 };
416
417 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
418         {
419                 .name = "C1",
420                 .desc = "MWAIT 0x00",
421                 .flags = MWAIT2flg(0x00),
422                 .exit_latency = 1,
423                 .target_residency = 1,
424                 .enter = &intel_idle,
425                 .enter_s2idle = intel_idle_s2idle, },
426         {
427                 .name = "C1E",
428                 .desc = "MWAIT 0x01",
429                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
430                 .exit_latency = 10,
431                 .target_residency = 250,
432                 .enter = &intel_idle,
433                 .enter_s2idle = intel_idle_s2idle, },
434         {
435                 .name = "C3",
436                 .desc = "MWAIT 0x10",
437                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
438                 .exit_latency = 59,
439                 .target_residency = 300,
440                 .enter = &intel_idle,
441                 .enter_s2idle = intel_idle_s2idle, },
442         {
443                 .name = "C6",
444                 .desc = "MWAIT 0x20",
445                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
446                 .exit_latency = 84,
447                 .target_residency = 400,
448                 .enter = &intel_idle,
449                 .enter_s2idle = intel_idle_s2idle, },
450         {
451                 .enter = NULL }
452 };
453
454 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
455         {
456                 .name = "C1",
457                 .desc = "MWAIT 0x00",
458                 .flags = MWAIT2flg(0x00),
459                 .exit_latency = 1,
460                 .target_residency = 1,
461                 .enter = &intel_idle,
462                 .enter_s2idle = intel_idle_s2idle, },
463         {
464                 .name = "C1E",
465                 .desc = "MWAIT 0x01",
466                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
467                 .exit_latency = 10,
468                 .target_residency = 500,
469                 .enter = &intel_idle,
470                 .enter_s2idle = intel_idle_s2idle, },
471         {
472                 .name = "C3",
473                 .desc = "MWAIT 0x10",
474                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
475                 .exit_latency = 59,
476                 .target_residency = 600,
477                 .enter = &intel_idle,
478                 .enter_s2idle = intel_idle_s2idle, },
479         {
480                 .name = "C6",
481                 .desc = "MWAIT 0x20",
482                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
483                 .exit_latency = 88,
484                 .target_residency = 700,
485                 .enter = &intel_idle,
486                 .enter_s2idle = intel_idle_s2idle, },
487         {
488                 .enter = NULL }
489 };
490
491 static struct cpuidle_state hsw_cstates[] __initdata = {
492         {
493                 .name = "C1",
494                 .desc = "MWAIT 0x00",
495                 .flags = MWAIT2flg(0x00),
496                 .exit_latency = 2,
497                 .target_residency = 2,
498                 .enter = &intel_idle,
499                 .enter_s2idle = intel_idle_s2idle, },
500         {
501                 .name = "C1E",
502                 .desc = "MWAIT 0x01",
503                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
504                 .exit_latency = 10,
505                 .target_residency = 20,
506                 .enter = &intel_idle,
507                 .enter_s2idle = intel_idle_s2idle, },
508         {
509                 .name = "C3",
510                 .desc = "MWAIT 0x10",
511                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
512                 .exit_latency = 33,
513                 .target_residency = 100,
514                 .enter = &intel_idle,
515                 .enter_s2idle = intel_idle_s2idle, },
516         {
517                 .name = "C6",
518                 .desc = "MWAIT 0x20",
519                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
520                 .exit_latency = 133,
521                 .target_residency = 400,
522                 .enter = &intel_idle,
523                 .enter_s2idle = intel_idle_s2idle, },
524         {
525                 .name = "C7s",
526                 .desc = "MWAIT 0x32",
527                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
528                 .exit_latency = 166,
529                 .target_residency = 500,
530                 .enter = &intel_idle,
531                 .enter_s2idle = intel_idle_s2idle, },
532         {
533                 .name = "C8",
534                 .desc = "MWAIT 0x40",
535                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
536                 .exit_latency = 300,
537                 .target_residency = 900,
538                 .enter = &intel_idle,
539                 .enter_s2idle = intel_idle_s2idle, },
540         {
541                 .name = "C9",
542                 .desc = "MWAIT 0x50",
543                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
544                 .exit_latency = 600,
545                 .target_residency = 1800,
546                 .enter = &intel_idle,
547                 .enter_s2idle = intel_idle_s2idle, },
548         {
549                 .name = "C10",
550                 .desc = "MWAIT 0x60",
551                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
552                 .exit_latency = 2600,
553                 .target_residency = 7700,
554                 .enter = &intel_idle,
555                 .enter_s2idle = intel_idle_s2idle, },
556         {
557                 .enter = NULL }
558 };
559 static struct cpuidle_state bdw_cstates[] __initdata = {
560         {
561                 .name = "C1",
562                 .desc = "MWAIT 0x00",
563                 .flags = MWAIT2flg(0x00),
564                 .exit_latency = 2,
565                 .target_residency = 2,
566                 .enter = &intel_idle,
567                 .enter_s2idle = intel_idle_s2idle, },
568         {
569                 .name = "C1E",
570                 .desc = "MWAIT 0x01",
571                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
572                 .exit_latency = 10,
573                 .target_residency = 20,
574                 .enter = &intel_idle,
575                 .enter_s2idle = intel_idle_s2idle, },
576         {
577                 .name = "C3",
578                 .desc = "MWAIT 0x10",
579                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
580                 .exit_latency = 40,
581                 .target_residency = 100,
582                 .enter = &intel_idle,
583                 .enter_s2idle = intel_idle_s2idle, },
584         {
585                 .name = "C6",
586                 .desc = "MWAIT 0x20",
587                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
588                 .exit_latency = 133,
589                 .target_residency = 400,
590                 .enter = &intel_idle,
591                 .enter_s2idle = intel_idle_s2idle, },
592         {
593                 .name = "C7s",
594                 .desc = "MWAIT 0x32",
595                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
596                 .exit_latency = 166,
597                 .target_residency = 500,
598                 .enter = &intel_idle,
599                 .enter_s2idle = intel_idle_s2idle, },
600         {
601                 .name = "C8",
602                 .desc = "MWAIT 0x40",
603                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
604                 .exit_latency = 300,
605                 .target_residency = 900,
606                 .enter = &intel_idle,
607                 .enter_s2idle = intel_idle_s2idle, },
608         {
609                 .name = "C9",
610                 .desc = "MWAIT 0x50",
611                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
612                 .exit_latency = 600,
613                 .target_residency = 1800,
614                 .enter = &intel_idle,
615                 .enter_s2idle = intel_idle_s2idle, },
616         {
617                 .name = "C10",
618                 .desc = "MWAIT 0x60",
619                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
620                 .exit_latency = 2600,
621                 .target_residency = 7700,
622                 .enter = &intel_idle,
623                 .enter_s2idle = intel_idle_s2idle, },
624         {
625                 .enter = NULL }
626 };
627
628 static struct cpuidle_state skl_cstates[] __initdata = {
629         {
630                 .name = "C1",
631                 .desc = "MWAIT 0x00",
632                 .flags = MWAIT2flg(0x00),
633                 .exit_latency = 2,
634                 .target_residency = 2,
635                 .enter = &intel_idle,
636                 .enter_s2idle = intel_idle_s2idle, },
637         {
638                 .name = "C1E",
639                 .desc = "MWAIT 0x01",
640                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
641                 .exit_latency = 10,
642                 .target_residency = 20,
643                 .enter = &intel_idle,
644                 .enter_s2idle = intel_idle_s2idle, },
645         {
646                 .name = "C3",
647                 .desc = "MWAIT 0x10",
648                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
649                 .exit_latency = 70,
650                 .target_residency = 100,
651                 .enter = &intel_idle,
652                 .enter_s2idle = intel_idle_s2idle, },
653         {
654                 .name = "C6",
655                 .desc = "MWAIT 0x20",
656                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
657                 .exit_latency = 85,
658                 .target_residency = 200,
659                 .enter = &intel_idle,
660                 .enter_s2idle = intel_idle_s2idle, },
661         {
662                 .name = "C7s",
663                 .desc = "MWAIT 0x33",
664                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
665                 .exit_latency = 124,
666                 .target_residency = 800,
667                 .enter = &intel_idle,
668                 .enter_s2idle = intel_idle_s2idle, },
669         {
670                 .name = "C8",
671                 .desc = "MWAIT 0x40",
672                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
673                 .exit_latency = 200,
674                 .target_residency = 800,
675                 .enter = &intel_idle,
676                 .enter_s2idle = intel_idle_s2idle, },
677         {
678                 .name = "C9",
679                 .desc = "MWAIT 0x50",
680                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
681                 .exit_latency = 480,
682                 .target_residency = 5000,
683                 .enter = &intel_idle,
684                 .enter_s2idle = intel_idle_s2idle, },
685         {
686                 .name = "C10",
687                 .desc = "MWAIT 0x60",
688                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
689                 .exit_latency = 890,
690                 .target_residency = 5000,
691                 .enter = &intel_idle,
692                 .enter_s2idle = intel_idle_s2idle, },
693         {
694                 .enter = NULL }
695 };
696
697 static struct cpuidle_state skx_cstates[] __initdata = {
698         {
699                 .name = "C1",
700                 .desc = "MWAIT 0x00",
701                 .flags = MWAIT2flg(0x00),
702                 .exit_latency = 2,
703                 .target_residency = 2,
704                 .enter = &intel_idle,
705                 .enter_s2idle = intel_idle_s2idle, },
706         {
707                 .name = "C1E",
708                 .desc = "MWAIT 0x01",
709                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
710                 .exit_latency = 10,
711                 .target_residency = 20,
712                 .enter = &intel_idle,
713                 .enter_s2idle = intel_idle_s2idle, },
714         {
715                 .name = "C6",
716                 .desc = "MWAIT 0x20",
717                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
718                 .exit_latency = 133,
719                 .target_residency = 600,
720                 .enter = &intel_idle,
721                 .enter_s2idle = intel_idle_s2idle, },
722         {
723                 .enter = NULL }
724 };
725
726 static struct cpuidle_state icx_cstates[] __initdata = {
727         {
728                 .name = "C1",
729                 .desc = "MWAIT 0x00",
730                 .flags = MWAIT2flg(0x00),
731                 .exit_latency = 1,
732                 .target_residency = 1,
733                 .enter = &intel_idle,
734                 .enter_s2idle = intel_idle_s2idle, },
735         {
736                 .name = "C1E",
737                 .desc = "MWAIT 0x01",
738                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
739                 .exit_latency = 4,
740                 .target_residency = 4,
741                 .enter = &intel_idle,
742                 .enter_s2idle = intel_idle_s2idle, },
743         {
744                 .name = "C6",
745                 .desc = "MWAIT 0x20",
746                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
747                 .exit_latency = 128,
748                 .target_residency = 384,
749                 .enter = &intel_idle,
750                 .enter_s2idle = intel_idle_s2idle, },
751         {
752                 .enter = NULL }
753 };
754
755 static struct cpuidle_state atom_cstates[] __initdata = {
756         {
757                 .name = "C1E",
758                 .desc = "MWAIT 0x00",
759                 .flags = MWAIT2flg(0x00),
760                 .exit_latency = 10,
761                 .target_residency = 20,
762                 .enter = &intel_idle,
763                 .enter_s2idle = intel_idle_s2idle, },
764         {
765                 .name = "C2",
766                 .desc = "MWAIT 0x10",
767                 .flags = MWAIT2flg(0x10),
768                 .exit_latency = 20,
769                 .target_residency = 80,
770                 .enter = &intel_idle,
771                 .enter_s2idle = intel_idle_s2idle, },
772         {
773                 .name = "C4",
774                 .desc = "MWAIT 0x30",
775                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
776                 .exit_latency = 100,
777                 .target_residency = 400,
778                 .enter = &intel_idle,
779                 .enter_s2idle = intel_idle_s2idle, },
780         {
781                 .name = "C6",
782                 .desc = "MWAIT 0x52",
783                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
784                 .exit_latency = 140,
785                 .target_residency = 560,
786                 .enter = &intel_idle,
787                 .enter_s2idle = intel_idle_s2idle, },
788         {
789                 .enter = NULL }
790 };
791 static struct cpuidle_state tangier_cstates[] __initdata = {
792         {
793                 .name = "C1",
794                 .desc = "MWAIT 0x00",
795                 .flags = MWAIT2flg(0x00),
796                 .exit_latency = 1,
797                 .target_residency = 4,
798                 .enter = &intel_idle,
799                 .enter_s2idle = intel_idle_s2idle, },
800         {
801                 .name = "C4",
802                 .desc = "MWAIT 0x30",
803                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
804                 .exit_latency = 100,
805                 .target_residency = 400,
806                 .enter = &intel_idle,
807                 .enter_s2idle = intel_idle_s2idle, },
808         {
809                 .name = "C6",
810                 .desc = "MWAIT 0x52",
811                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
812                 .exit_latency = 140,
813                 .target_residency = 560,
814                 .enter = &intel_idle,
815                 .enter_s2idle = intel_idle_s2idle, },
816         {
817                 .name = "C7",
818                 .desc = "MWAIT 0x60",
819                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
820                 .exit_latency = 1200,
821                 .target_residency = 4000,
822                 .enter = &intel_idle,
823                 .enter_s2idle = intel_idle_s2idle, },
824         {
825                 .name = "C9",
826                 .desc = "MWAIT 0x64",
827                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
828                 .exit_latency = 10000,
829                 .target_residency = 20000,
830                 .enter = &intel_idle,
831                 .enter_s2idle = intel_idle_s2idle, },
832         {
833                 .enter = NULL }
834 };
835 static struct cpuidle_state avn_cstates[] __initdata = {
836         {
837                 .name = "C1",
838                 .desc = "MWAIT 0x00",
839                 .flags = MWAIT2flg(0x00),
840                 .exit_latency = 2,
841                 .target_residency = 2,
842                 .enter = &intel_idle,
843                 .enter_s2idle = intel_idle_s2idle, },
844         {
845                 .name = "C6",
846                 .desc = "MWAIT 0x51",
847                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
848                 .exit_latency = 15,
849                 .target_residency = 45,
850                 .enter = &intel_idle,
851                 .enter_s2idle = intel_idle_s2idle, },
852         {
853                 .enter = NULL }
854 };
855 static struct cpuidle_state knl_cstates[] __initdata = {
856         {
857                 .name = "C1",
858                 .desc = "MWAIT 0x00",
859                 .flags = MWAIT2flg(0x00),
860                 .exit_latency = 1,
861                 .target_residency = 2,
862                 .enter = &intel_idle,
863                 .enter_s2idle = intel_idle_s2idle },
864         {
865                 .name = "C6",
866                 .desc = "MWAIT 0x10",
867                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
868                 .exit_latency = 120,
869                 .target_residency = 500,
870                 .enter = &intel_idle,
871                 .enter_s2idle = intel_idle_s2idle },
872         {
873                 .enter = NULL }
874 };
875
876 static struct cpuidle_state bxt_cstates[] __initdata = {
877         {
878                 .name = "C1",
879                 .desc = "MWAIT 0x00",
880                 .flags = MWAIT2flg(0x00),
881                 .exit_latency = 2,
882                 .target_residency = 2,
883                 .enter = &intel_idle,
884                 .enter_s2idle = intel_idle_s2idle, },
885         {
886                 .name = "C1E",
887                 .desc = "MWAIT 0x01",
888                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
889                 .exit_latency = 10,
890                 .target_residency = 20,
891                 .enter = &intel_idle,
892                 .enter_s2idle = intel_idle_s2idle, },
893         {
894                 .name = "C6",
895                 .desc = "MWAIT 0x20",
896                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
897                 .exit_latency = 133,
898                 .target_residency = 133,
899                 .enter = &intel_idle,
900                 .enter_s2idle = intel_idle_s2idle, },
901         {
902                 .name = "C7s",
903                 .desc = "MWAIT 0x31",
904                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
905                 .exit_latency = 155,
906                 .target_residency = 155,
907                 .enter = &intel_idle,
908                 .enter_s2idle = intel_idle_s2idle, },
909         {
910                 .name = "C8",
911                 .desc = "MWAIT 0x40",
912                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
913                 .exit_latency = 1000,
914                 .target_residency = 1000,
915                 .enter = &intel_idle,
916                 .enter_s2idle = intel_idle_s2idle, },
917         {
918                 .name = "C9",
919                 .desc = "MWAIT 0x50",
920                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
921                 .exit_latency = 2000,
922                 .target_residency = 2000,
923                 .enter = &intel_idle,
924                 .enter_s2idle = intel_idle_s2idle, },
925         {
926                 .name = "C10",
927                 .desc = "MWAIT 0x60",
928                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
929                 .exit_latency = 10000,
930                 .target_residency = 10000,
931                 .enter = &intel_idle,
932                 .enter_s2idle = intel_idle_s2idle, },
933         {
934                 .enter = NULL }
935 };
936
937 static struct cpuidle_state dnv_cstates[] __initdata = {
938         {
939                 .name = "C1",
940                 .desc = "MWAIT 0x00",
941                 .flags = MWAIT2flg(0x00),
942                 .exit_latency = 2,
943                 .target_residency = 2,
944                 .enter = &intel_idle,
945                 .enter_s2idle = intel_idle_s2idle, },
946         {
947                 .name = "C1E",
948                 .desc = "MWAIT 0x01",
949                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
950                 .exit_latency = 10,
951                 .target_residency = 20,
952                 .enter = &intel_idle,
953                 .enter_s2idle = intel_idle_s2idle, },
954         {
955                 .name = "C6",
956                 .desc = "MWAIT 0x20",
957                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
958                 .exit_latency = 50,
959                 .target_residency = 500,
960                 .enter = &intel_idle,
961                 .enter_s2idle = intel_idle_s2idle, },
962         {
963                 .enter = NULL }
964 };
965
966 static const struct idle_cpu idle_cpu_nehalem __initconst = {
967         .state_table = nehalem_cstates,
968         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
969         .disable_promotion_to_c1e = true,
970 };
971
972 static const struct idle_cpu idle_cpu_nhx __initconst = {
973         .state_table = nehalem_cstates,
974         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
975         .disable_promotion_to_c1e = true,
976         .use_acpi = true,
977 };
978
979 static const struct idle_cpu idle_cpu_atom __initconst = {
980         .state_table = atom_cstates,
981 };
982
983 static const struct idle_cpu idle_cpu_tangier __initconst = {
984         .state_table = tangier_cstates,
985 };
986
987 static const struct idle_cpu idle_cpu_lincroft __initconst = {
988         .state_table = atom_cstates,
989         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
990 };
991
992 static const struct idle_cpu idle_cpu_snb __initconst = {
993         .state_table = snb_cstates,
994         .disable_promotion_to_c1e = true,
995 };
996
997 static const struct idle_cpu idle_cpu_snx __initconst = {
998         .state_table = snb_cstates,
999         .disable_promotion_to_c1e = true,
1000         .use_acpi = true,
1001 };
1002
1003 static const struct idle_cpu idle_cpu_byt __initconst = {
1004         .state_table = byt_cstates,
1005         .disable_promotion_to_c1e = true,
1006         .byt_auto_demotion_disable_flag = true,
1007 };
1008
1009 static const struct idle_cpu idle_cpu_cht __initconst = {
1010         .state_table = cht_cstates,
1011         .disable_promotion_to_c1e = true,
1012         .byt_auto_demotion_disable_flag = true,
1013 };
1014
1015 static const struct idle_cpu idle_cpu_ivb __initconst = {
1016         .state_table = ivb_cstates,
1017         .disable_promotion_to_c1e = true,
1018 };
1019
1020 static const struct idle_cpu idle_cpu_ivt __initconst = {
1021         .state_table = ivt_cstates,
1022         .disable_promotion_to_c1e = true,
1023         .use_acpi = true,
1024 };
1025
1026 static const struct idle_cpu idle_cpu_hsw __initconst = {
1027         .state_table = hsw_cstates,
1028         .disable_promotion_to_c1e = true,
1029 };
1030
1031 static const struct idle_cpu idle_cpu_hsx __initconst = {
1032         .state_table = hsw_cstates,
1033         .disable_promotion_to_c1e = true,
1034         .use_acpi = true,
1035 };
1036
1037 static const struct idle_cpu idle_cpu_bdw __initconst = {
1038         .state_table = bdw_cstates,
1039         .disable_promotion_to_c1e = true,
1040 };
1041
1042 static const struct idle_cpu idle_cpu_bdx __initconst = {
1043         .state_table = bdw_cstates,
1044         .disable_promotion_to_c1e = true,
1045         .use_acpi = true,
1046 };
1047
1048 static const struct idle_cpu idle_cpu_skl __initconst = {
1049         .state_table = skl_cstates,
1050         .disable_promotion_to_c1e = true,
1051 };
1052
1053 static const struct idle_cpu idle_cpu_skx __initconst = {
1054         .state_table = skx_cstates,
1055         .disable_promotion_to_c1e = true,
1056         .use_acpi = true,
1057 };
1058
1059 static const struct idle_cpu idle_cpu_icx __initconst = {
1060         .state_table = icx_cstates,
1061         .disable_promotion_to_c1e = true,
1062         .use_acpi = true,
1063 };
1064
1065 static const struct idle_cpu idle_cpu_avn __initconst = {
1066         .state_table = avn_cstates,
1067         .disable_promotion_to_c1e = true,
1068         .use_acpi = true,
1069 };
1070
1071 static const struct idle_cpu idle_cpu_knl __initconst = {
1072         .state_table = knl_cstates,
1073         .use_acpi = true,
1074 };
1075
1076 static const struct idle_cpu idle_cpu_bxt __initconst = {
1077         .state_table = bxt_cstates,
1078         .disable_promotion_to_c1e = true,
1079 };
1080
1081 static const struct idle_cpu idle_cpu_dnv __initconst = {
1082         .state_table = dnv_cstates,
1083         .disable_promotion_to_c1e = true,
1084         .use_acpi = true,
1085 };
1086
1087 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1088         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &idle_cpu_nhx),
1089         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &idle_cpu_nehalem),
1090         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &idle_cpu_nehalem),
1091         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &idle_cpu_nehalem),
1092         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &idle_cpu_nhx),
1093         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &idle_cpu_nhx),
1094         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &idle_cpu_atom),
1095         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1096         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &idle_cpu_nhx),
1097         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &idle_cpu_snb),
1098         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &idle_cpu_snx),
1099         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &idle_cpu_atom),
1100         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &idle_cpu_byt),
1101         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1102         X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &idle_cpu_cht),
1103         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &idle_cpu_ivb),
1104         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &idle_cpu_ivt),
1105         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &idle_cpu_hsw),
1106         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &idle_cpu_hsx),
1107         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &idle_cpu_hsw),
1108         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &idle_cpu_hsw),
1109         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1110         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &idle_cpu_bdw),
1111         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &idle_cpu_bdw),
1112         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &idle_cpu_bdx),
1113         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &idle_cpu_bdx),
1114         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &idle_cpu_skl),
1115         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &idle_cpu_skl),
1116         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &idle_cpu_skl),
1117         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &idle_cpu_skl),
1118         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
1119         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
1120         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
1121         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
1122         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &idle_cpu_bxt),
1123         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1124         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &idle_cpu_dnv),
1125         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &idle_cpu_dnv),
1126         {}
1127 };
1128
1129 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1130         X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1131         {}
1132 };
1133
1134 static bool __init intel_idle_max_cstate_reached(int cstate)
1135 {
1136         if (cstate + 1 > max_cstate) {
1137                 pr_info("max_cstate %d reached\n", max_cstate);
1138                 return true;
1139         }
1140         return false;
1141 }
1142
1143 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1144 {
1145         unsigned long eax = flg2MWAIT(state->flags);
1146
1147         if (boot_cpu_has(X86_FEATURE_ARAT))
1148                 return false;
1149
1150         /*
1151          * Switch over to one-shot tick broadcast if the target C-state
1152          * is deeper than C1.
1153          */
1154         return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1155 }
1156
1157 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1158 #include <acpi/processor.h>
1159
1160 static bool no_acpi __read_mostly;
1161 module_param(no_acpi, bool, 0444);
1162 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1163
1164 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1165 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1166 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1167
1168 static struct acpi_processor_power acpi_state_table __initdata;
1169
1170 /**
1171  * intel_idle_cst_usable - Check if the _CST information can be used.
1172  *
1173  * Check if all of the C-states listed by _CST in the max_cstate range are
1174  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1175  */
1176 static bool __init intel_idle_cst_usable(void)
1177 {
1178         int cstate, limit;
1179
1180         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1181                       acpi_state_table.count);
1182
1183         for (cstate = 1; cstate < limit; cstate++) {
1184                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1185
1186                 if (cx->entry_method != ACPI_CSTATE_FFH)
1187                         return false;
1188         }
1189
1190         return true;
1191 }
1192
1193 static bool __init intel_idle_acpi_cst_extract(void)
1194 {
1195         unsigned int cpu;
1196
1197         if (no_acpi) {
1198                 pr_debug("Not allowed to use ACPI _CST\n");
1199                 return false;
1200         }
1201
1202         for_each_possible_cpu(cpu) {
1203                 struct acpi_processor *pr = per_cpu(processors, cpu);
1204
1205                 if (!pr)
1206                         continue;
1207
1208                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1209                         continue;
1210
1211                 acpi_state_table.count++;
1212
1213                 if (!intel_idle_cst_usable())
1214                         continue;
1215
1216                 if (!acpi_processor_claim_cst_control())
1217                         break;
1218
1219                 return true;
1220         }
1221
1222         acpi_state_table.count = 0;
1223         pr_debug("ACPI _CST not found or not usable\n");
1224         return false;
1225 }
1226
1227 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1228 {
1229         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1230
1231         /*
1232          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1233          * the interesting states are ACPI_CSTATE_FFH.
1234          */
1235         for (cstate = 1; cstate < limit; cstate++) {
1236                 struct acpi_processor_cx *cx;
1237                 struct cpuidle_state *state;
1238
1239                 if (intel_idle_max_cstate_reached(cstate - 1))
1240                         break;
1241
1242                 cx = &acpi_state_table.states[cstate];
1243
1244                 state = &drv->states[drv->state_count++];
1245
1246                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1247                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1248                 state->exit_latency = cx->latency;
1249                 /*
1250                  * For C1-type C-states use the same number for both the exit
1251                  * latency and target residency, because that is the case for
1252                  * C1 in the majority of the static C-states tables above.
1253                  * For the other types of C-states, however, set the target
1254                  * residency to 3 times the exit latency which should lead to
1255                  * a reasonable balance between energy-efficiency and
1256                  * performance in the majority of interesting cases.
1257                  */
1258                 state->target_residency = cx->latency;
1259                 if (cx->type > ACPI_STATE_C1)
1260                         state->target_residency *= 3;
1261
1262                 state->flags = MWAIT2flg(cx->address);
1263                 if (cx->type > ACPI_STATE_C2)
1264                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1265
1266                 if (disabled_states_mask & BIT(cstate))
1267                         state->flags |= CPUIDLE_FLAG_OFF;
1268
1269                 if (intel_idle_state_needs_timer_stop(state))
1270                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1271
1272                 state->enter = intel_idle;
1273                 state->enter_s2idle = intel_idle_s2idle;
1274         }
1275 }
1276
1277 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1278 {
1279         int cstate, limit;
1280
1281         /*
1282          * If there are no _CST C-states, do not disable any C-states by
1283          * default.
1284          */
1285         if (!acpi_state_table.count)
1286                 return false;
1287
1288         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1289         /*
1290          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1291          * the interesting states are ACPI_CSTATE_FFH.
1292          */
1293         for (cstate = 1; cstate < limit; cstate++) {
1294                 if (acpi_state_table.states[cstate].address == mwait_hint)
1295                         return false;
1296         }
1297         return true;
1298 }
1299 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1300 #define force_use_acpi  (false)
1301
1302 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1303 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1304 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1305 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1306
1307 /**
1308  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1309  *
1310  * Tune IVT multi-socket targets.
1311  * Assumption: num_sockets == (max_package_num + 1).
1312  */
1313 static void __init ivt_idle_state_table_update(void)
1314 {
1315         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1316         int cpu, package_num, num_sockets = 1;
1317
1318         for_each_online_cpu(cpu) {
1319                 package_num = topology_physical_package_id(cpu);
1320                 if (package_num + 1 > num_sockets) {
1321                         num_sockets = package_num + 1;
1322
1323                         if (num_sockets > 4) {
1324                                 cpuidle_state_table = ivt_cstates_8s;
1325                                 return;
1326                         }
1327                 }
1328         }
1329
1330         if (num_sockets > 2)
1331                 cpuidle_state_table = ivt_cstates_4s;
1332
1333         /* else, 1 and 2 socket systems use default ivt_cstates */
1334 }
1335
1336 /**
1337  * irtl_2_usec - IRTL to microseconds conversion.
1338  * @irtl: IRTL MSR value.
1339  *
1340  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1341  */
1342 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1343 {
1344         static const unsigned int irtl_ns_units[] __initconst = {
1345                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1346         };
1347         unsigned long long ns;
1348
1349         if (!irtl)
1350                 return 0;
1351
1352         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1353
1354         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1355 }
1356
1357 /**
1358  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1359  *
1360  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1361  * definitive maximum latency and use the same value for target_residency.
1362  */
1363 static void __init bxt_idle_state_table_update(void)
1364 {
1365         unsigned long long msr;
1366         unsigned int usec;
1367
1368         rdmsrl(MSR_PKGC6_IRTL, msr);
1369         usec = irtl_2_usec(msr);
1370         if (usec) {
1371                 bxt_cstates[2].exit_latency = usec;
1372                 bxt_cstates[2].target_residency = usec;
1373         }
1374
1375         rdmsrl(MSR_PKGC7_IRTL, msr);
1376         usec = irtl_2_usec(msr);
1377         if (usec) {
1378                 bxt_cstates[3].exit_latency = usec;
1379                 bxt_cstates[3].target_residency = usec;
1380         }
1381
1382         rdmsrl(MSR_PKGC8_IRTL, msr);
1383         usec = irtl_2_usec(msr);
1384         if (usec) {
1385                 bxt_cstates[4].exit_latency = usec;
1386                 bxt_cstates[4].target_residency = usec;
1387         }
1388
1389         rdmsrl(MSR_PKGC9_IRTL, msr);
1390         usec = irtl_2_usec(msr);
1391         if (usec) {
1392                 bxt_cstates[5].exit_latency = usec;
1393                 bxt_cstates[5].target_residency = usec;
1394         }
1395
1396         rdmsrl(MSR_PKGC10_IRTL, msr);
1397         usec = irtl_2_usec(msr);
1398         if (usec) {
1399                 bxt_cstates[6].exit_latency = usec;
1400                 bxt_cstates[6].target_residency = usec;
1401         }
1402
1403 }
1404
1405 /**
1406  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1407  *
1408  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1409  */
1410 static void __init sklh_idle_state_table_update(void)
1411 {
1412         unsigned long long msr;
1413         unsigned int eax, ebx, ecx, edx;
1414
1415
1416         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1417         if (max_cstate <= 7)
1418                 return;
1419
1420         /* if PC10 not present in CPUID.MWAIT.EDX */
1421         if ((mwait_substates & (0xF << 28)) == 0)
1422                 return;
1423
1424         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1425
1426         /* PC10 is not enabled in PKG C-state limit */
1427         if ((msr & 0xF) != 8)
1428                 return;
1429
1430         ecx = 0;
1431         cpuid(7, &eax, &ebx, &ecx, &edx);
1432
1433         /* if SGX is present */
1434         if (ebx & (1 << 2)) {
1435
1436                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1437
1438                 /* if SGX is enabled */
1439                 if (msr & (1 << 18))
1440                         return;
1441         }
1442
1443         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1444         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1445 }
1446
1447 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1448 {
1449         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1450         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1451                                         MWAIT_SUBSTATE_MASK;
1452
1453         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1454         if (num_substates == 0)
1455                 return false;
1456
1457         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1458                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1459
1460         return true;
1461 }
1462
1463 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1464 {
1465         int cstate;
1466
1467         switch (boot_cpu_data.x86_model) {
1468         case INTEL_FAM6_IVYBRIDGE_X:
1469                 ivt_idle_state_table_update();
1470                 break;
1471         case INTEL_FAM6_ATOM_GOLDMONT:
1472         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1473                 bxt_idle_state_table_update();
1474                 break;
1475         case INTEL_FAM6_SKYLAKE:
1476                 sklh_idle_state_table_update();
1477                 break;
1478         }
1479
1480         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1481                 unsigned int mwait_hint;
1482
1483                 if (intel_idle_max_cstate_reached(cstate))
1484                         break;
1485
1486                 if (!cpuidle_state_table[cstate].enter &&
1487                     !cpuidle_state_table[cstate].enter_s2idle)
1488                         break;
1489
1490                 /* If marked as unusable, skip this state. */
1491                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1492                         pr_debug("state %s is disabled\n",
1493                                  cpuidle_state_table[cstate].name);
1494                         continue;
1495                 }
1496
1497                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1498                 if (!intel_idle_verify_cstate(mwait_hint))
1499                         continue;
1500
1501                 /* Structure copy. */
1502                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1503
1504                 if ((disabled_states_mask & BIT(drv->state_count)) ||
1505                     ((icpu->use_acpi || force_use_acpi) &&
1506                      intel_idle_off_by_default(mwait_hint) &&
1507                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1508                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1509
1510                 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1511                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1512
1513                 drv->state_count++;
1514         }
1515
1516         if (icpu->byt_auto_demotion_disable_flag) {
1517                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1518                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1519         }
1520 }
1521
1522 /**
1523  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1524  * @drv: cpuidle driver structure to initialize.
1525  */
1526 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1527 {
1528         cpuidle_poll_state_init(drv);
1529
1530         if (disabled_states_mask & BIT(0))
1531                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1532
1533         drv->state_count = 1;
1534
1535         if (icpu)
1536                 intel_idle_init_cstates_icpu(drv);
1537         else
1538                 intel_idle_init_cstates_acpi(drv);
1539 }
1540
1541 static void auto_demotion_disable(void)
1542 {
1543         unsigned long long msr_bits;
1544
1545         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1546         msr_bits &= ~auto_demotion_disable_flags;
1547         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1548 }
1549
1550 static void c1e_promotion_disable(void)
1551 {
1552         unsigned long long msr_bits;
1553
1554         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1555         msr_bits &= ~0x2;
1556         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1557 }
1558
1559 /**
1560  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1561  * @cpu: CPU to initialize.
1562  *
1563  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1564  * with the processor model flags.
1565  */
1566 static int intel_idle_cpu_init(unsigned int cpu)
1567 {
1568         struct cpuidle_device *dev;
1569
1570         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1571         dev->cpu = cpu;
1572
1573         if (cpuidle_register_device(dev)) {
1574                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1575                 return -EIO;
1576         }
1577
1578         if (auto_demotion_disable_flags)
1579                 auto_demotion_disable();
1580
1581         if (disable_promotion_to_c1e)
1582                 c1e_promotion_disable();
1583
1584         return 0;
1585 }
1586
1587 static int intel_idle_cpu_online(unsigned int cpu)
1588 {
1589         struct cpuidle_device *dev;
1590
1591         if (!boot_cpu_has(X86_FEATURE_ARAT))
1592                 tick_broadcast_enable();
1593
1594         /*
1595          * Some systems can hotplug a cpu at runtime after
1596          * the kernel has booted, we have to initialize the
1597          * driver in this case
1598          */
1599         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1600         if (!dev->registered)
1601                 return intel_idle_cpu_init(cpu);
1602
1603         return 0;
1604 }
1605
1606 /**
1607  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1608  */
1609 static void __init intel_idle_cpuidle_devices_uninit(void)
1610 {
1611         int i;
1612
1613         for_each_online_cpu(i)
1614                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1615 }
1616
1617 static int __init intel_idle_init(void)
1618 {
1619         const struct x86_cpu_id *id;
1620         unsigned int eax, ebx, ecx;
1621         int retval;
1622
1623         /* Do not load intel_idle at all for now if idle= is passed */
1624         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1625                 return -ENODEV;
1626
1627         if (max_cstate == 0) {
1628                 pr_debug("disabled\n");
1629                 return -EPERM;
1630         }
1631
1632         id = x86_match_cpu(intel_idle_ids);
1633         if (id) {
1634                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1635                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1636                         return -ENODEV;
1637                 }
1638         } else {
1639                 id = x86_match_cpu(intel_mwait_ids);
1640                 if (!id)
1641                         return -ENODEV;
1642         }
1643
1644         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1645                 return -ENODEV;
1646
1647         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1648
1649         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1650             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1651             !mwait_substates)
1652                         return -ENODEV;
1653
1654         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1655
1656         icpu = (const struct idle_cpu *)id->driver_data;
1657         if (icpu) {
1658                 cpuidle_state_table = icpu->state_table;
1659                 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1660                 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
1661                 if (icpu->use_acpi || force_use_acpi)
1662                         intel_idle_acpi_cst_extract();
1663         } else if (!intel_idle_acpi_cst_extract()) {
1664                 return -ENODEV;
1665         }
1666
1667         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1668                  boot_cpu_data.x86_model);
1669
1670         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1671         if (!intel_idle_cpuidle_devices)
1672                 return -ENOMEM;
1673
1674         intel_idle_cpuidle_driver_init(&intel_idle_driver);
1675
1676         retval = cpuidle_register_driver(&intel_idle_driver);
1677         if (retval) {
1678                 struct cpuidle_driver *drv = cpuidle_get_driver();
1679                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1680                        drv ? drv->name : "none");
1681                 goto init_driver_fail;
1682         }
1683
1684         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1685                                    intel_idle_cpu_online, NULL);
1686         if (retval < 0)
1687                 goto hp_setup_fail;
1688
1689         pr_debug("Local APIC timer is reliable in %s\n",
1690                  boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
1691
1692         return 0;
1693
1694 hp_setup_fail:
1695         intel_idle_cpuidle_devices_uninit();
1696         cpuidle_unregister_driver(&intel_idle_driver);
1697 init_driver_fail:
1698         free_percpu(intel_idle_cpuidle_devices);
1699         return retval;
1700
1701 }
1702 device_initcall(intel_idle_init);
1703
1704 /*
1705  * We are not really modular, but we used to support that.  Meaning we also
1706  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1707  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1708  * is the easiest way (currently) to continue doing that.
1709  */
1710 module_param(max_cstate, int, 0444);
1711 /*
1712  * The positions of the bits that are set in this number are the indices of the
1713  * idle states to be disabled by default (as reflected by the names of the
1714  * corresponding idle state directories in sysfs, "state0", "state1" ...
1715  * "state<i>" ..., where <i> is the index of the given state).
1716  */
1717 module_param_named(states_off, disabled_states_mask, uint, 0444);
1718 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");