Merge tag 'for-linus-5.6-ofs1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubca...
[linux-2.6-microblaze.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  */
8
9 /*
10  * intel_idle is a cpuidle driver that loads on specific Intel processors
11  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
12  * make Linux more efficient on these processors, as intel_idle knows
13  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
14  */
15
16 /*
17  * Design Assumptions
18  *
19  * All CPUs have same idle states as boot CPU
20  *
21  * Chipset BM_STS (bus master status) bit is a NOP
22  *      for preventing entry into deep C-stats
23  */
24
25 /*
26  * Known limitations
27  *
28  * The driver currently initializes for_each_online_cpu() upon modprobe.
29  * It it unaware of subsequent processors hot-added to the system.
30  * This means that if you boot with maxcpus=n and later online
31  * processors above n, those processors will use C1 only.
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 #define DEBUG
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57
58 #define INTEL_IDLE_VERSION "0.4.1"
59
60 static struct cpuidle_driver intel_idle_driver = {
61         .name = "intel_idle",
62         .owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66 static unsigned int disabled_states_mask;
67
68 static unsigned int mwait_substates;
69
70 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
71 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
72 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
73
74 struct idle_cpu {
75         struct cpuidle_state *state_table;
76
77         /*
78          * Hardware C-state auto-demotion may not always be optimal.
79          * Indicate which enable bits to clear here.
80          */
81         unsigned long auto_demotion_disable_flags;
82         bool byt_auto_demotion_disable_flag;
83         bool disable_promotion_to_c1e;
84         bool use_acpi;
85 };
86
87 static const struct idle_cpu *icpu;
88 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
89 static int intel_idle(struct cpuidle_device *dev,
90                         struct cpuidle_driver *drv, int index);
91 static void intel_idle_s2idle(struct cpuidle_device *dev,
92                               struct cpuidle_driver *drv, int index);
93 static struct cpuidle_state *cpuidle_state_table;
94
95 /*
96  * Enable this state by default even if the ACPI _CST does not list it.
97  */
98 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
99
100 /*
101  * Set this flag for states where the HW flushes the TLB for us
102  * and so we don't need cross-calls to keep it consistent.
103  * If this flag is set, SW flushes the TLB, so even if the
104  * HW doesn't do the flushing, this flag is safe to use.
105  */
106 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
107
108 /*
109  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
110  * the C-state (top nibble) and sub-state (bottom nibble)
111  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
112  *
113  * We store the hint at the top of our "flags" for each state.
114  */
115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
117
118 /*
119  * States are indexed by the cstate number,
120  * which is also the index into the MWAIT hint array.
121  * Thus C0 is a dummy.
122  */
123 static struct cpuidle_state nehalem_cstates[] = {
124         {
125                 .name = "C1",
126                 .desc = "MWAIT 0x00",
127                 .flags = MWAIT2flg(0x00),
128                 .exit_latency = 3,
129                 .target_residency = 6,
130                 .enter = &intel_idle,
131                 .enter_s2idle = intel_idle_s2idle, },
132         {
133                 .name = "C1E",
134                 .desc = "MWAIT 0x01",
135                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
136                 .exit_latency = 10,
137                 .target_residency = 20,
138                 .enter = &intel_idle,
139                 .enter_s2idle = intel_idle_s2idle, },
140         {
141                 .name = "C3",
142                 .desc = "MWAIT 0x10",
143                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
144                 .exit_latency = 20,
145                 .target_residency = 80,
146                 .enter = &intel_idle,
147                 .enter_s2idle = intel_idle_s2idle, },
148         {
149                 .name = "C6",
150                 .desc = "MWAIT 0x20",
151                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
152                 .exit_latency = 200,
153                 .target_residency = 800,
154                 .enter = &intel_idle,
155                 .enter_s2idle = intel_idle_s2idle, },
156         {
157                 .enter = NULL }
158 };
159
160 static struct cpuidle_state snb_cstates[] = {
161         {
162                 .name = "C1",
163                 .desc = "MWAIT 0x00",
164                 .flags = MWAIT2flg(0x00),
165                 .exit_latency = 2,
166                 .target_residency = 2,
167                 .enter = &intel_idle,
168                 .enter_s2idle = intel_idle_s2idle, },
169         {
170                 .name = "C1E",
171                 .desc = "MWAIT 0x01",
172                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
173                 .exit_latency = 10,
174                 .target_residency = 20,
175                 .enter = &intel_idle,
176                 .enter_s2idle = intel_idle_s2idle, },
177         {
178                 .name = "C3",
179                 .desc = "MWAIT 0x10",
180                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
181                 .exit_latency = 80,
182                 .target_residency = 211,
183                 .enter = &intel_idle,
184                 .enter_s2idle = intel_idle_s2idle, },
185         {
186                 .name = "C6",
187                 .desc = "MWAIT 0x20",
188                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
189                 .exit_latency = 104,
190                 .target_residency = 345,
191                 .enter = &intel_idle,
192                 .enter_s2idle = intel_idle_s2idle, },
193         {
194                 .name = "C7",
195                 .desc = "MWAIT 0x30",
196                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
197                 .exit_latency = 109,
198                 .target_residency = 345,
199                 .enter = &intel_idle,
200                 .enter_s2idle = intel_idle_s2idle, },
201         {
202                 .enter = NULL }
203 };
204
205 static struct cpuidle_state byt_cstates[] = {
206         {
207                 .name = "C1",
208                 .desc = "MWAIT 0x00",
209                 .flags = MWAIT2flg(0x00),
210                 .exit_latency = 1,
211                 .target_residency = 1,
212                 .enter = &intel_idle,
213                 .enter_s2idle = intel_idle_s2idle, },
214         {
215                 .name = "C6N",
216                 .desc = "MWAIT 0x58",
217                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
218                 .exit_latency = 300,
219                 .target_residency = 275,
220                 .enter = &intel_idle,
221                 .enter_s2idle = intel_idle_s2idle, },
222         {
223                 .name = "C6S",
224                 .desc = "MWAIT 0x52",
225                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
226                 .exit_latency = 500,
227                 .target_residency = 560,
228                 .enter = &intel_idle,
229                 .enter_s2idle = intel_idle_s2idle, },
230         {
231                 .name = "C7",
232                 .desc = "MWAIT 0x60",
233                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
234                 .exit_latency = 1200,
235                 .target_residency = 4000,
236                 .enter = &intel_idle,
237                 .enter_s2idle = intel_idle_s2idle, },
238         {
239                 .name = "C7S",
240                 .desc = "MWAIT 0x64",
241                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
242                 .exit_latency = 10000,
243                 .target_residency = 20000,
244                 .enter = &intel_idle,
245                 .enter_s2idle = intel_idle_s2idle, },
246         {
247                 .enter = NULL }
248 };
249
250 static struct cpuidle_state cht_cstates[] = {
251         {
252                 .name = "C1",
253                 .desc = "MWAIT 0x00",
254                 .flags = MWAIT2flg(0x00),
255                 .exit_latency = 1,
256                 .target_residency = 1,
257                 .enter = &intel_idle,
258                 .enter_s2idle = intel_idle_s2idle, },
259         {
260                 .name = "C6N",
261                 .desc = "MWAIT 0x58",
262                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
263                 .exit_latency = 80,
264                 .target_residency = 275,
265                 .enter = &intel_idle,
266                 .enter_s2idle = intel_idle_s2idle, },
267         {
268                 .name = "C6S",
269                 .desc = "MWAIT 0x52",
270                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
271                 .exit_latency = 200,
272                 .target_residency = 560,
273                 .enter = &intel_idle,
274                 .enter_s2idle = intel_idle_s2idle, },
275         {
276                 .name = "C7",
277                 .desc = "MWAIT 0x60",
278                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
279                 .exit_latency = 1200,
280                 .target_residency = 4000,
281                 .enter = &intel_idle,
282                 .enter_s2idle = intel_idle_s2idle, },
283         {
284                 .name = "C7S",
285                 .desc = "MWAIT 0x64",
286                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
287                 .exit_latency = 10000,
288                 .target_residency = 20000,
289                 .enter = &intel_idle,
290                 .enter_s2idle = intel_idle_s2idle, },
291         {
292                 .enter = NULL }
293 };
294
295 static struct cpuidle_state ivb_cstates[] = {
296         {
297                 .name = "C1",
298                 .desc = "MWAIT 0x00",
299                 .flags = MWAIT2flg(0x00),
300                 .exit_latency = 1,
301                 .target_residency = 1,
302                 .enter = &intel_idle,
303                 .enter_s2idle = intel_idle_s2idle, },
304         {
305                 .name = "C1E",
306                 .desc = "MWAIT 0x01",
307                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
308                 .exit_latency = 10,
309                 .target_residency = 20,
310                 .enter = &intel_idle,
311                 .enter_s2idle = intel_idle_s2idle, },
312         {
313                 .name = "C3",
314                 .desc = "MWAIT 0x10",
315                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
316                 .exit_latency = 59,
317                 .target_residency = 156,
318                 .enter = &intel_idle,
319                 .enter_s2idle = intel_idle_s2idle, },
320         {
321                 .name = "C6",
322                 .desc = "MWAIT 0x20",
323                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
324                 .exit_latency = 80,
325                 .target_residency = 300,
326                 .enter = &intel_idle,
327                 .enter_s2idle = intel_idle_s2idle, },
328         {
329                 .name = "C7",
330                 .desc = "MWAIT 0x30",
331                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
332                 .exit_latency = 87,
333                 .target_residency = 300,
334                 .enter = &intel_idle,
335                 .enter_s2idle = intel_idle_s2idle, },
336         {
337                 .enter = NULL }
338 };
339
340 static struct cpuidle_state ivt_cstates[] = {
341         {
342                 .name = "C1",
343                 .desc = "MWAIT 0x00",
344                 .flags = MWAIT2flg(0x00),
345                 .exit_latency = 1,
346                 .target_residency = 1,
347                 .enter = &intel_idle,
348                 .enter_s2idle = intel_idle_s2idle, },
349         {
350                 .name = "C1E",
351                 .desc = "MWAIT 0x01",
352                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
353                 .exit_latency = 10,
354                 .target_residency = 80,
355                 .enter = &intel_idle,
356                 .enter_s2idle = intel_idle_s2idle, },
357         {
358                 .name = "C3",
359                 .desc = "MWAIT 0x10",
360                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
361                 .exit_latency = 59,
362                 .target_residency = 156,
363                 .enter = &intel_idle,
364                 .enter_s2idle = intel_idle_s2idle, },
365         {
366                 .name = "C6",
367                 .desc = "MWAIT 0x20",
368                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
369                 .exit_latency = 82,
370                 .target_residency = 300,
371                 .enter = &intel_idle,
372                 .enter_s2idle = intel_idle_s2idle, },
373         {
374                 .enter = NULL }
375 };
376
377 static struct cpuidle_state ivt_cstates_4s[] = {
378         {
379                 .name = "C1",
380                 .desc = "MWAIT 0x00",
381                 .flags = MWAIT2flg(0x00),
382                 .exit_latency = 1,
383                 .target_residency = 1,
384                 .enter = &intel_idle,
385                 .enter_s2idle = intel_idle_s2idle, },
386         {
387                 .name = "C1E",
388                 .desc = "MWAIT 0x01",
389                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
390                 .exit_latency = 10,
391                 .target_residency = 250,
392                 .enter = &intel_idle,
393                 .enter_s2idle = intel_idle_s2idle, },
394         {
395                 .name = "C3",
396                 .desc = "MWAIT 0x10",
397                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
398                 .exit_latency = 59,
399                 .target_residency = 300,
400                 .enter = &intel_idle,
401                 .enter_s2idle = intel_idle_s2idle, },
402         {
403                 .name = "C6",
404                 .desc = "MWAIT 0x20",
405                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
406                 .exit_latency = 84,
407                 .target_residency = 400,
408                 .enter = &intel_idle,
409                 .enter_s2idle = intel_idle_s2idle, },
410         {
411                 .enter = NULL }
412 };
413
414 static struct cpuidle_state ivt_cstates_8s[] = {
415         {
416                 .name = "C1",
417                 .desc = "MWAIT 0x00",
418                 .flags = MWAIT2flg(0x00),
419                 .exit_latency = 1,
420                 .target_residency = 1,
421                 .enter = &intel_idle,
422                 .enter_s2idle = intel_idle_s2idle, },
423         {
424                 .name = "C1E",
425                 .desc = "MWAIT 0x01",
426                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
427                 .exit_latency = 10,
428                 .target_residency = 500,
429                 .enter = &intel_idle,
430                 .enter_s2idle = intel_idle_s2idle, },
431         {
432                 .name = "C3",
433                 .desc = "MWAIT 0x10",
434                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
435                 .exit_latency = 59,
436                 .target_residency = 600,
437                 .enter = &intel_idle,
438                 .enter_s2idle = intel_idle_s2idle, },
439         {
440                 .name = "C6",
441                 .desc = "MWAIT 0x20",
442                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
443                 .exit_latency = 88,
444                 .target_residency = 700,
445                 .enter = &intel_idle,
446                 .enter_s2idle = intel_idle_s2idle, },
447         {
448                 .enter = NULL }
449 };
450
451 static struct cpuidle_state hsw_cstates[] = {
452         {
453                 .name = "C1",
454                 .desc = "MWAIT 0x00",
455                 .flags = MWAIT2flg(0x00),
456                 .exit_latency = 2,
457                 .target_residency = 2,
458                 .enter = &intel_idle,
459                 .enter_s2idle = intel_idle_s2idle, },
460         {
461                 .name = "C1E",
462                 .desc = "MWAIT 0x01",
463                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
464                 .exit_latency = 10,
465                 .target_residency = 20,
466                 .enter = &intel_idle,
467                 .enter_s2idle = intel_idle_s2idle, },
468         {
469                 .name = "C3",
470                 .desc = "MWAIT 0x10",
471                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
472                 .exit_latency = 33,
473                 .target_residency = 100,
474                 .enter = &intel_idle,
475                 .enter_s2idle = intel_idle_s2idle, },
476         {
477                 .name = "C6",
478                 .desc = "MWAIT 0x20",
479                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
480                 .exit_latency = 133,
481                 .target_residency = 400,
482                 .enter = &intel_idle,
483                 .enter_s2idle = intel_idle_s2idle, },
484         {
485                 .name = "C7s",
486                 .desc = "MWAIT 0x32",
487                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
488                 .exit_latency = 166,
489                 .target_residency = 500,
490                 .enter = &intel_idle,
491                 .enter_s2idle = intel_idle_s2idle, },
492         {
493                 .name = "C8",
494                 .desc = "MWAIT 0x40",
495                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
496                 .exit_latency = 300,
497                 .target_residency = 900,
498                 .enter = &intel_idle,
499                 .enter_s2idle = intel_idle_s2idle, },
500         {
501                 .name = "C9",
502                 .desc = "MWAIT 0x50",
503                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
504                 .exit_latency = 600,
505                 .target_residency = 1800,
506                 .enter = &intel_idle,
507                 .enter_s2idle = intel_idle_s2idle, },
508         {
509                 .name = "C10",
510                 .desc = "MWAIT 0x60",
511                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
512                 .exit_latency = 2600,
513                 .target_residency = 7700,
514                 .enter = &intel_idle,
515                 .enter_s2idle = intel_idle_s2idle, },
516         {
517                 .enter = NULL }
518 };
519 static struct cpuidle_state bdw_cstates[] = {
520         {
521                 .name = "C1",
522                 .desc = "MWAIT 0x00",
523                 .flags = MWAIT2flg(0x00),
524                 .exit_latency = 2,
525                 .target_residency = 2,
526                 .enter = &intel_idle,
527                 .enter_s2idle = intel_idle_s2idle, },
528         {
529                 .name = "C1E",
530                 .desc = "MWAIT 0x01",
531                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
532                 .exit_latency = 10,
533                 .target_residency = 20,
534                 .enter = &intel_idle,
535                 .enter_s2idle = intel_idle_s2idle, },
536         {
537                 .name = "C3",
538                 .desc = "MWAIT 0x10",
539                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
540                 .exit_latency = 40,
541                 .target_residency = 100,
542                 .enter = &intel_idle,
543                 .enter_s2idle = intel_idle_s2idle, },
544         {
545                 .name = "C6",
546                 .desc = "MWAIT 0x20",
547                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
548                 .exit_latency = 133,
549                 .target_residency = 400,
550                 .enter = &intel_idle,
551                 .enter_s2idle = intel_idle_s2idle, },
552         {
553                 .name = "C7s",
554                 .desc = "MWAIT 0x32",
555                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
556                 .exit_latency = 166,
557                 .target_residency = 500,
558                 .enter = &intel_idle,
559                 .enter_s2idle = intel_idle_s2idle, },
560         {
561                 .name = "C8",
562                 .desc = "MWAIT 0x40",
563                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
564                 .exit_latency = 300,
565                 .target_residency = 900,
566                 .enter = &intel_idle,
567                 .enter_s2idle = intel_idle_s2idle, },
568         {
569                 .name = "C9",
570                 .desc = "MWAIT 0x50",
571                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
572                 .exit_latency = 600,
573                 .target_residency = 1800,
574                 .enter = &intel_idle,
575                 .enter_s2idle = intel_idle_s2idle, },
576         {
577                 .name = "C10",
578                 .desc = "MWAIT 0x60",
579                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
580                 .exit_latency = 2600,
581                 .target_residency = 7700,
582                 .enter = &intel_idle,
583                 .enter_s2idle = intel_idle_s2idle, },
584         {
585                 .enter = NULL }
586 };
587
588 static struct cpuidle_state skl_cstates[] = {
589         {
590                 .name = "C1",
591                 .desc = "MWAIT 0x00",
592                 .flags = MWAIT2flg(0x00),
593                 .exit_latency = 2,
594                 .target_residency = 2,
595                 .enter = &intel_idle,
596                 .enter_s2idle = intel_idle_s2idle, },
597         {
598                 .name = "C1E",
599                 .desc = "MWAIT 0x01",
600                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
601                 .exit_latency = 10,
602                 .target_residency = 20,
603                 .enter = &intel_idle,
604                 .enter_s2idle = intel_idle_s2idle, },
605         {
606                 .name = "C3",
607                 .desc = "MWAIT 0x10",
608                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
609                 .exit_latency = 70,
610                 .target_residency = 100,
611                 .enter = &intel_idle,
612                 .enter_s2idle = intel_idle_s2idle, },
613         {
614                 .name = "C6",
615                 .desc = "MWAIT 0x20",
616                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
617                 .exit_latency = 85,
618                 .target_residency = 200,
619                 .enter = &intel_idle,
620                 .enter_s2idle = intel_idle_s2idle, },
621         {
622                 .name = "C7s",
623                 .desc = "MWAIT 0x33",
624                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
625                 .exit_latency = 124,
626                 .target_residency = 800,
627                 .enter = &intel_idle,
628                 .enter_s2idle = intel_idle_s2idle, },
629         {
630                 .name = "C8",
631                 .desc = "MWAIT 0x40",
632                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
633                 .exit_latency = 200,
634                 .target_residency = 800,
635                 .enter = &intel_idle,
636                 .enter_s2idle = intel_idle_s2idle, },
637         {
638                 .name = "C9",
639                 .desc = "MWAIT 0x50",
640                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
641                 .exit_latency = 480,
642                 .target_residency = 5000,
643                 .enter = &intel_idle,
644                 .enter_s2idle = intel_idle_s2idle, },
645         {
646                 .name = "C10",
647                 .desc = "MWAIT 0x60",
648                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
649                 .exit_latency = 890,
650                 .target_residency = 5000,
651                 .enter = &intel_idle,
652                 .enter_s2idle = intel_idle_s2idle, },
653         {
654                 .enter = NULL }
655 };
656
657 static struct cpuidle_state skx_cstates[] = {
658         {
659                 .name = "C1",
660                 .desc = "MWAIT 0x00",
661                 .flags = MWAIT2flg(0x00),
662                 .exit_latency = 2,
663                 .target_residency = 2,
664                 .enter = &intel_idle,
665                 .enter_s2idle = intel_idle_s2idle, },
666         {
667                 .name = "C1E",
668                 .desc = "MWAIT 0x01",
669                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
670                 .exit_latency = 10,
671                 .target_residency = 20,
672                 .enter = &intel_idle,
673                 .enter_s2idle = intel_idle_s2idle, },
674         {
675                 .name = "C6",
676                 .desc = "MWAIT 0x20",
677                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
678                 .exit_latency = 133,
679                 .target_residency = 600,
680                 .enter = &intel_idle,
681                 .enter_s2idle = intel_idle_s2idle, },
682         {
683                 .enter = NULL }
684 };
685
686 static struct cpuidle_state atom_cstates[] = {
687         {
688                 .name = "C1E",
689                 .desc = "MWAIT 0x00",
690                 .flags = MWAIT2flg(0x00),
691                 .exit_latency = 10,
692                 .target_residency = 20,
693                 .enter = &intel_idle,
694                 .enter_s2idle = intel_idle_s2idle, },
695         {
696                 .name = "C2",
697                 .desc = "MWAIT 0x10",
698                 .flags = MWAIT2flg(0x10),
699                 .exit_latency = 20,
700                 .target_residency = 80,
701                 .enter = &intel_idle,
702                 .enter_s2idle = intel_idle_s2idle, },
703         {
704                 .name = "C4",
705                 .desc = "MWAIT 0x30",
706                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
707                 .exit_latency = 100,
708                 .target_residency = 400,
709                 .enter = &intel_idle,
710                 .enter_s2idle = intel_idle_s2idle, },
711         {
712                 .name = "C6",
713                 .desc = "MWAIT 0x52",
714                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
715                 .exit_latency = 140,
716                 .target_residency = 560,
717                 .enter = &intel_idle,
718                 .enter_s2idle = intel_idle_s2idle, },
719         {
720                 .enter = NULL }
721 };
722 static struct cpuidle_state tangier_cstates[] = {
723         {
724                 .name = "C1",
725                 .desc = "MWAIT 0x00",
726                 .flags = MWAIT2flg(0x00),
727                 .exit_latency = 1,
728                 .target_residency = 4,
729                 .enter = &intel_idle,
730                 .enter_s2idle = intel_idle_s2idle, },
731         {
732                 .name = "C4",
733                 .desc = "MWAIT 0x30",
734                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
735                 .exit_latency = 100,
736                 .target_residency = 400,
737                 .enter = &intel_idle,
738                 .enter_s2idle = intel_idle_s2idle, },
739         {
740                 .name = "C6",
741                 .desc = "MWAIT 0x52",
742                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
743                 .exit_latency = 140,
744                 .target_residency = 560,
745                 .enter = &intel_idle,
746                 .enter_s2idle = intel_idle_s2idle, },
747         {
748                 .name = "C7",
749                 .desc = "MWAIT 0x60",
750                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
751                 .exit_latency = 1200,
752                 .target_residency = 4000,
753                 .enter = &intel_idle,
754                 .enter_s2idle = intel_idle_s2idle, },
755         {
756                 .name = "C9",
757                 .desc = "MWAIT 0x64",
758                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
759                 .exit_latency = 10000,
760                 .target_residency = 20000,
761                 .enter = &intel_idle,
762                 .enter_s2idle = intel_idle_s2idle, },
763         {
764                 .enter = NULL }
765 };
766 static struct cpuidle_state avn_cstates[] = {
767         {
768                 .name = "C1",
769                 .desc = "MWAIT 0x00",
770                 .flags = MWAIT2flg(0x00),
771                 .exit_latency = 2,
772                 .target_residency = 2,
773                 .enter = &intel_idle,
774                 .enter_s2idle = intel_idle_s2idle, },
775         {
776                 .name = "C6",
777                 .desc = "MWAIT 0x51",
778                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
779                 .exit_latency = 15,
780                 .target_residency = 45,
781                 .enter = &intel_idle,
782                 .enter_s2idle = intel_idle_s2idle, },
783         {
784                 .enter = NULL }
785 };
786 static struct cpuidle_state knl_cstates[] = {
787         {
788                 .name = "C1",
789                 .desc = "MWAIT 0x00",
790                 .flags = MWAIT2flg(0x00),
791                 .exit_latency = 1,
792                 .target_residency = 2,
793                 .enter = &intel_idle,
794                 .enter_s2idle = intel_idle_s2idle },
795         {
796                 .name = "C6",
797                 .desc = "MWAIT 0x10",
798                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
799                 .exit_latency = 120,
800                 .target_residency = 500,
801                 .enter = &intel_idle,
802                 .enter_s2idle = intel_idle_s2idle },
803         {
804                 .enter = NULL }
805 };
806
807 static struct cpuidle_state bxt_cstates[] = {
808         {
809                 .name = "C1",
810                 .desc = "MWAIT 0x00",
811                 .flags = MWAIT2flg(0x00),
812                 .exit_latency = 2,
813                 .target_residency = 2,
814                 .enter = &intel_idle,
815                 .enter_s2idle = intel_idle_s2idle, },
816         {
817                 .name = "C1E",
818                 .desc = "MWAIT 0x01",
819                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
820                 .exit_latency = 10,
821                 .target_residency = 20,
822                 .enter = &intel_idle,
823                 .enter_s2idle = intel_idle_s2idle, },
824         {
825                 .name = "C6",
826                 .desc = "MWAIT 0x20",
827                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
828                 .exit_latency = 133,
829                 .target_residency = 133,
830                 .enter = &intel_idle,
831                 .enter_s2idle = intel_idle_s2idle, },
832         {
833                 .name = "C7s",
834                 .desc = "MWAIT 0x31",
835                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
836                 .exit_latency = 155,
837                 .target_residency = 155,
838                 .enter = &intel_idle,
839                 .enter_s2idle = intel_idle_s2idle, },
840         {
841                 .name = "C8",
842                 .desc = "MWAIT 0x40",
843                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
844                 .exit_latency = 1000,
845                 .target_residency = 1000,
846                 .enter = &intel_idle,
847                 .enter_s2idle = intel_idle_s2idle, },
848         {
849                 .name = "C9",
850                 .desc = "MWAIT 0x50",
851                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
852                 .exit_latency = 2000,
853                 .target_residency = 2000,
854                 .enter = &intel_idle,
855                 .enter_s2idle = intel_idle_s2idle, },
856         {
857                 .name = "C10",
858                 .desc = "MWAIT 0x60",
859                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
860                 .exit_latency = 10000,
861                 .target_residency = 10000,
862                 .enter = &intel_idle,
863                 .enter_s2idle = intel_idle_s2idle, },
864         {
865                 .enter = NULL }
866 };
867
868 static struct cpuidle_state dnv_cstates[] = {
869         {
870                 .name = "C1",
871                 .desc = "MWAIT 0x00",
872                 .flags = MWAIT2flg(0x00),
873                 .exit_latency = 2,
874                 .target_residency = 2,
875                 .enter = &intel_idle,
876                 .enter_s2idle = intel_idle_s2idle, },
877         {
878                 .name = "C1E",
879                 .desc = "MWAIT 0x01",
880                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
881                 .exit_latency = 10,
882                 .target_residency = 20,
883                 .enter = &intel_idle,
884                 .enter_s2idle = intel_idle_s2idle, },
885         {
886                 .name = "C6",
887                 .desc = "MWAIT 0x20",
888                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
889                 .exit_latency = 50,
890                 .target_residency = 500,
891                 .enter = &intel_idle,
892                 .enter_s2idle = intel_idle_s2idle, },
893         {
894                 .enter = NULL }
895 };
896
897 /**
898  * intel_idle
899  * @dev: cpuidle_device
900  * @drv: cpuidle driver
901  * @index: index of cpuidle state
902  *
903  * Must be called under local_irq_disable().
904  */
905 static __cpuidle int intel_idle(struct cpuidle_device *dev,
906                                 struct cpuidle_driver *drv, int index)
907 {
908         unsigned long ecx = 1; /* break on interrupt flag */
909         struct cpuidle_state *state = &drv->states[index];
910         unsigned long eax = flg2MWAIT(state->flags);
911         unsigned int cstate;
912         bool uninitialized_var(tick);
913         int cpu = smp_processor_id();
914
915         /*
916          * leave_mm() to avoid costly and often unnecessary wakeups
917          * for flushing the user TLB's associated with the active mm.
918          */
919         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
920                 leave_mm(cpu);
921
922         if (!static_cpu_has(X86_FEATURE_ARAT)) {
923                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
924                                 MWAIT_CSTATE_MASK) + 1;
925                 tick = false;
926                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
927                         tick = true;
928                         tick_broadcast_enter();
929                 }
930         }
931
932         mwait_idle_with_hints(eax, ecx);
933
934         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
935                 tick_broadcast_exit();
936
937         return index;
938 }
939
940 /**
941  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
942  * @dev: cpuidle_device
943  * @drv: cpuidle driver
944  * @index: state index
945  */
946 static void intel_idle_s2idle(struct cpuidle_device *dev,
947                              struct cpuidle_driver *drv, int index)
948 {
949         unsigned long ecx = 1; /* break on interrupt flag */
950         unsigned long eax = flg2MWAIT(drv->states[index].flags);
951
952         mwait_idle_with_hints(eax, ecx);
953 }
954
955 static const struct idle_cpu idle_cpu_nehalem = {
956         .state_table = nehalem_cstates,
957         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
958         .disable_promotion_to_c1e = true,
959 };
960
961 static const struct idle_cpu idle_cpu_nhx = {
962         .state_table = nehalem_cstates,
963         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
964         .disable_promotion_to_c1e = true,
965         .use_acpi = true,
966 };
967
968 static const struct idle_cpu idle_cpu_atom = {
969         .state_table = atom_cstates,
970 };
971
972 static const struct idle_cpu idle_cpu_tangier = {
973         .state_table = tangier_cstates,
974 };
975
976 static const struct idle_cpu idle_cpu_lincroft = {
977         .state_table = atom_cstates,
978         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
979 };
980
981 static const struct idle_cpu idle_cpu_snb = {
982         .state_table = snb_cstates,
983         .disable_promotion_to_c1e = true,
984 };
985
986 static const struct idle_cpu idle_cpu_snx = {
987         .state_table = snb_cstates,
988         .disable_promotion_to_c1e = true,
989         .use_acpi = true,
990 };
991
992 static const struct idle_cpu idle_cpu_byt = {
993         .state_table = byt_cstates,
994         .disable_promotion_to_c1e = true,
995         .byt_auto_demotion_disable_flag = true,
996 };
997
998 static const struct idle_cpu idle_cpu_cht = {
999         .state_table = cht_cstates,
1000         .disable_promotion_to_c1e = true,
1001         .byt_auto_demotion_disable_flag = true,
1002 };
1003
1004 static const struct idle_cpu idle_cpu_ivb = {
1005         .state_table = ivb_cstates,
1006         .disable_promotion_to_c1e = true,
1007 };
1008
1009 static const struct idle_cpu idle_cpu_ivt = {
1010         .state_table = ivt_cstates,
1011         .disable_promotion_to_c1e = true,
1012         .use_acpi = true,
1013 };
1014
1015 static const struct idle_cpu idle_cpu_hsw = {
1016         .state_table = hsw_cstates,
1017         .disable_promotion_to_c1e = true,
1018 };
1019
1020 static const struct idle_cpu idle_cpu_hsx = {
1021         .state_table = hsw_cstates,
1022         .disable_promotion_to_c1e = true,
1023         .use_acpi = true,
1024 };
1025
1026 static const struct idle_cpu idle_cpu_bdw = {
1027         .state_table = bdw_cstates,
1028         .disable_promotion_to_c1e = true,
1029 };
1030
1031 static const struct idle_cpu idle_cpu_bdx = {
1032         .state_table = bdw_cstates,
1033         .disable_promotion_to_c1e = true,
1034         .use_acpi = true,
1035 };
1036
1037 static const struct idle_cpu idle_cpu_skl = {
1038         .state_table = skl_cstates,
1039         .disable_promotion_to_c1e = true,
1040 };
1041
1042 static const struct idle_cpu idle_cpu_skx = {
1043         .state_table = skx_cstates,
1044         .disable_promotion_to_c1e = true,
1045         .use_acpi = true,
1046 };
1047
1048 static const struct idle_cpu idle_cpu_avn = {
1049         .state_table = avn_cstates,
1050         .disable_promotion_to_c1e = true,
1051         .use_acpi = true,
1052 };
1053
1054 static const struct idle_cpu idle_cpu_knl = {
1055         .state_table = knl_cstates,
1056         .use_acpi = true,
1057 };
1058
1059 static const struct idle_cpu idle_cpu_bxt = {
1060         .state_table = bxt_cstates,
1061         .disable_promotion_to_c1e = true,
1062 };
1063
1064 static const struct idle_cpu idle_cpu_dnv = {
1065         .state_table = dnv_cstates,
1066         .disable_promotion_to_c1e = true,
1067         .use_acpi = true,
1068 };
1069
1070 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1071         INTEL_CPU_FAM6(NEHALEM_EP,              idle_cpu_nhx),
1072         INTEL_CPU_FAM6(NEHALEM,                 idle_cpu_nehalem),
1073         INTEL_CPU_FAM6(NEHALEM_G,               idle_cpu_nehalem),
1074         INTEL_CPU_FAM6(WESTMERE,                idle_cpu_nehalem),
1075         INTEL_CPU_FAM6(WESTMERE_EP,             idle_cpu_nhx),
1076         INTEL_CPU_FAM6(NEHALEM_EX,              idle_cpu_nhx),
1077         INTEL_CPU_FAM6(ATOM_BONNELL,            idle_cpu_atom),
1078         INTEL_CPU_FAM6(ATOM_BONNELL_MID,        idle_cpu_lincroft),
1079         INTEL_CPU_FAM6(WESTMERE_EX,             idle_cpu_nhx),
1080         INTEL_CPU_FAM6(SANDYBRIDGE,             idle_cpu_snb),
1081         INTEL_CPU_FAM6(SANDYBRIDGE_X,           idle_cpu_snx),
1082         INTEL_CPU_FAM6(ATOM_SALTWELL,           idle_cpu_atom),
1083         INTEL_CPU_FAM6(ATOM_SILVERMONT,         idle_cpu_byt),
1084         INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,     idle_cpu_tangier),
1085         INTEL_CPU_FAM6(ATOM_AIRMONT,            idle_cpu_cht),
1086         INTEL_CPU_FAM6(IVYBRIDGE,               idle_cpu_ivb),
1087         INTEL_CPU_FAM6(IVYBRIDGE_X,             idle_cpu_ivt),
1088         INTEL_CPU_FAM6(HASWELL,                 idle_cpu_hsw),
1089         INTEL_CPU_FAM6(HASWELL_X,               idle_cpu_hsx),
1090         INTEL_CPU_FAM6(HASWELL_L,               idle_cpu_hsw),
1091         INTEL_CPU_FAM6(HASWELL_G,               idle_cpu_hsw),
1092         INTEL_CPU_FAM6(ATOM_SILVERMONT_D,       idle_cpu_avn),
1093         INTEL_CPU_FAM6(BROADWELL,               idle_cpu_bdw),
1094         INTEL_CPU_FAM6(BROADWELL_G,             idle_cpu_bdw),
1095         INTEL_CPU_FAM6(BROADWELL_X,             idle_cpu_bdx),
1096         INTEL_CPU_FAM6(BROADWELL_D,             idle_cpu_bdx),
1097         INTEL_CPU_FAM6(SKYLAKE_L,               idle_cpu_skl),
1098         INTEL_CPU_FAM6(SKYLAKE,                 idle_cpu_skl),
1099         INTEL_CPU_FAM6(KABYLAKE_L,              idle_cpu_skl),
1100         INTEL_CPU_FAM6(KABYLAKE,                idle_cpu_skl),
1101         INTEL_CPU_FAM6(SKYLAKE_X,               idle_cpu_skx),
1102         INTEL_CPU_FAM6(XEON_PHI_KNL,            idle_cpu_knl),
1103         INTEL_CPU_FAM6(XEON_PHI_KNM,            idle_cpu_knl),
1104         INTEL_CPU_FAM6(ATOM_GOLDMONT,           idle_cpu_bxt),
1105         INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS,      idle_cpu_bxt),
1106         INTEL_CPU_FAM6(ATOM_GOLDMONT_D,         idle_cpu_dnv),
1107         INTEL_CPU_FAM6(ATOM_TREMONT_D,          idle_cpu_dnv),
1108         {}
1109 };
1110
1111 #define INTEL_CPU_FAM6_MWAIT \
1112         { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 }
1113
1114 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1115         INTEL_CPU_FAM6_MWAIT,
1116         {}
1117 };
1118
1119 static bool __init intel_idle_max_cstate_reached(int cstate)
1120 {
1121         if (cstate + 1 > max_cstate) {
1122                 pr_info("max_cstate %d reached\n", max_cstate);
1123                 return true;
1124         }
1125         return false;
1126 }
1127
1128 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1129 #include <acpi/processor.h>
1130
1131 static bool no_acpi __read_mostly;
1132 module_param(no_acpi, bool, 0444);
1133 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1134
1135 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1136 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1137 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1138
1139 static struct acpi_processor_power acpi_state_table __initdata;
1140
1141 /**
1142  * intel_idle_cst_usable - Check if the _CST information can be used.
1143  *
1144  * Check if all of the C-states listed by _CST in the max_cstate range are
1145  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1146  */
1147 static bool __init intel_idle_cst_usable(void)
1148 {
1149         int cstate, limit;
1150
1151         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1152                       acpi_state_table.count);
1153
1154         for (cstate = 1; cstate < limit; cstate++) {
1155                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1156
1157                 if (cx->entry_method != ACPI_CSTATE_FFH)
1158                         return false;
1159         }
1160
1161         return true;
1162 }
1163
1164 static bool __init intel_idle_acpi_cst_extract(void)
1165 {
1166         unsigned int cpu;
1167
1168         if (no_acpi) {
1169                 pr_debug("Not allowed to use ACPI _CST\n");
1170                 return false;
1171         }
1172
1173         for_each_possible_cpu(cpu) {
1174                 struct acpi_processor *pr = per_cpu(processors, cpu);
1175
1176                 if (!pr)
1177                         continue;
1178
1179                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1180                         continue;
1181
1182                 acpi_state_table.count++;
1183
1184                 if (!intel_idle_cst_usable())
1185                         continue;
1186
1187                 if (!acpi_processor_claim_cst_control()) {
1188                         acpi_state_table.count = 0;
1189                         return false;
1190                 }
1191
1192                 return true;
1193         }
1194
1195         pr_debug("ACPI _CST not found or not usable\n");
1196         return false;
1197 }
1198
1199 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1200 {
1201         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1202
1203         /*
1204          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1205          * the interesting states are ACPI_CSTATE_FFH.
1206          */
1207         for (cstate = 1; cstate < limit; cstate++) {
1208                 struct acpi_processor_cx *cx;
1209                 struct cpuidle_state *state;
1210
1211                 if (intel_idle_max_cstate_reached(cstate))
1212                         break;
1213
1214                 cx = &acpi_state_table.states[cstate];
1215
1216                 state = &drv->states[drv->state_count++];
1217
1218                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1219                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1220                 state->exit_latency = cx->latency;
1221                 /*
1222                  * For C1-type C-states use the same number for both the exit
1223                  * latency and target residency, because that is the case for
1224                  * C1 in the majority of the static C-states tables above.
1225                  * For the other types of C-states, however, set the target
1226                  * residency to 3 times the exit latency which should lead to
1227                  * a reasonable balance between energy-efficiency and
1228                  * performance in the majority of interesting cases.
1229                  */
1230                 state->target_residency = cx->latency;
1231                 if (cx->type > ACPI_STATE_C1)
1232                         state->target_residency *= 3;
1233
1234                 state->flags = MWAIT2flg(cx->address);
1235                 if (cx->type > ACPI_STATE_C2)
1236                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1237
1238                 if (disabled_states_mask & BIT(cstate))
1239                         state->flags |= CPUIDLE_FLAG_OFF;
1240
1241                 state->enter = intel_idle;
1242                 state->enter_s2idle = intel_idle_s2idle;
1243         }
1244 }
1245
1246 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1247 {
1248         int cstate, limit;
1249
1250         /*
1251          * If there are no _CST C-states, do not disable any C-states by
1252          * default.
1253          */
1254         if (!acpi_state_table.count)
1255                 return false;
1256
1257         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1258         /*
1259          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1260          * the interesting states are ACPI_CSTATE_FFH.
1261          */
1262         for (cstate = 1; cstate < limit; cstate++) {
1263                 if (acpi_state_table.states[cstate].address == mwait_hint)
1264                         return false;
1265         }
1266         return true;
1267 }
1268 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1269 #define force_use_acpi  (false)
1270
1271 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1272 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1273 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1274 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1275
1276 /*
1277  * ivt_idle_state_table_update(void)
1278  *
1279  * Tune IVT multi-socket targets
1280  * Assumption: num_sockets == (max_package_num + 1)
1281  */
1282 static void __init ivt_idle_state_table_update(void)
1283 {
1284         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1285         int cpu, package_num, num_sockets = 1;
1286
1287         for_each_online_cpu(cpu) {
1288                 package_num = topology_physical_package_id(cpu);
1289                 if (package_num + 1 > num_sockets) {
1290                         num_sockets = package_num + 1;
1291
1292                         if (num_sockets > 4) {
1293                                 cpuidle_state_table = ivt_cstates_8s;
1294                                 return;
1295                         }
1296                 }
1297         }
1298
1299         if (num_sockets > 2)
1300                 cpuidle_state_table = ivt_cstates_4s;
1301
1302         /* else, 1 and 2 socket systems use default ivt_cstates */
1303 }
1304
1305 /**
1306  * irtl_2_usec - IRTL to microseconds conversion.
1307  * @irtl: IRTL MSR value.
1308  *
1309  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1310  */
1311 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1312 {
1313         static const unsigned int irtl_ns_units[] __initconst = {
1314                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1315         };
1316         unsigned long long ns;
1317
1318         if (!irtl)
1319                 return 0;
1320
1321         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1322
1323         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1324 }
1325
1326 /*
1327  * bxt_idle_state_table_update(void)
1328  *
1329  * On BXT, we trust the IRTL to show the definitive maximum latency
1330  * We use the same value for target_residency.
1331  */
1332 static void __init bxt_idle_state_table_update(void)
1333 {
1334         unsigned long long msr;
1335         unsigned int usec;
1336
1337         rdmsrl(MSR_PKGC6_IRTL, msr);
1338         usec = irtl_2_usec(msr);
1339         if (usec) {
1340                 bxt_cstates[2].exit_latency = usec;
1341                 bxt_cstates[2].target_residency = usec;
1342         }
1343
1344         rdmsrl(MSR_PKGC7_IRTL, msr);
1345         usec = irtl_2_usec(msr);
1346         if (usec) {
1347                 bxt_cstates[3].exit_latency = usec;
1348                 bxt_cstates[3].target_residency = usec;
1349         }
1350
1351         rdmsrl(MSR_PKGC8_IRTL, msr);
1352         usec = irtl_2_usec(msr);
1353         if (usec) {
1354                 bxt_cstates[4].exit_latency = usec;
1355                 bxt_cstates[4].target_residency = usec;
1356         }
1357
1358         rdmsrl(MSR_PKGC9_IRTL, msr);
1359         usec = irtl_2_usec(msr);
1360         if (usec) {
1361                 bxt_cstates[5].exit_latency = usec;
1362                 bxt_cstates[5].target_residency = usec;
1363         }
1364
1365         rdmsrl(MSR_PKGC10_IRTL, msr);
1366         usec = irtl_2_usec(msr);
1367         if (usec) {
1368                 bxt_cstates[6].exit_latency = usec;
1369                 bxt_cstates[6].target_residency = usec;
1370         }
1371
1372 }
1373 /*
1374  * sklh_idle_state_table_update(void)
1375  *
1376  * On SKL-H (model 0x5e) disable C8 and C9 if:
1377  * C10 is enabled and SGX disabled
1378  */
1379 static void __init sklh_idle_state_table_update(void)
1380 {
1381         unsigned long long msr;
1382         unsigned int eax, ebx, ecx, edx;
1383
1384
1385         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1386         if (max_cstate <= 7)
1387                 return;
1388
1389         /* if PC10 not present in CPUID.MWAIT.EDX */
1390         if ((mwait_substates & (0xF << 28)) == 0)
1391                 return;
1392
1393         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1394
1395         /* PC10 is not enabled in PKG C-state limit */
1396         if ((msr & 0xF) != 8)
1397                 return;
1398
1399         ecx = 0;
1400         cpuid(7, &eax, &ebx, &ecx, &edx);
1401
1402         /* if SGX is present */
1403         if (ebx & (1 << 2)) {
1404
1405                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1406
1407                 /* if SGX is enabled */
1408                 if (msr & (1 << 18))
1409                         return;
1410         }
1411
1412         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1413         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1414 }
1415
1416 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1417 {
1418         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1419         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1420                                         MWAIT_SUBSTATE_MASK;
1421
1422         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1423         if (num_substates == 0)
1424                 return false;
1425
1426         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1427                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1428
1429         return true;
1430 }
1431
1432 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1433 {
1434         int cstate;
1435
1436         switch (boot_cpu_data.x86_model) {
1437         case INTEL_FAM6_IVYBRIDGE_X:
1438                 ivt_idle_state_table_update();
1439                 break;
1440         case INTEL_FAM6_ATOM_GOLDMONT:
1441         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1442                 bxt_idle_state_table_update();
1443                 break;
1444         case INTEL_FAM6_SKYLAKE:
1445                 sklh_idle_state_table_update();
1446                 break;
1447         }
1448
1449         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1450                 unsigned int mwait_hint;
1451
1452                 if (intel_idle_max_cstate_reached(cstate))
1453                         break;
1454
1455                 if (!cpuidle_state_table[cstate].enter &&
1456                     !cpuidle_state_table[cstate].enter_s2idle)
1457                         break;
1458
1459                 /* If marked as unusable, skip this state. */
1460                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1461                         pr_debug("state %s is disabled\n",
1462                                  cpuidle_state_table[cstate].name);
1463                         continue;
1464                 }
1465
1466                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1467                 if (!intel_idle_verify_cstate(mwait_hint))
1468                         continue;
1469
1470                 /* Structure copy. */
1471                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1472
1473                 if ((disabled_states_mask & BIT(drv->state_count)) ||
1474                     ((icpu->use_acpi || force_use_acpi) &&
1475                      intel_idle_off_by_default(mwait_hint) &&
1476                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1477                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1478
1479                 drv->state_count++;
1480         }
1481
1482         if (icpu->byt_auto_demotion_disable_flag) {
1483                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1484                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1485         }
1486 }
1487
1488 /*
1489  * intel_idle_cpuidle_driver_init()
1490  * allocate, initialize cpuidle_states
1491  */
1492 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1493 {
1494         cpuidle_poll_state_init(drv);
1495
1496         if (disabled_states_mask & BIT(0))
1497                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1498
1499         drv->state_count = 1;
1500
1501         if (icpu)
1502                 intel_idle_init_cstates_icpu(drv);
1503         else
1504                 intel_idle_init_cstates_acpi(drv);
1505 }
1506
1507 static void auto_demotion_disable(void)
1508 {
1509         unsigned long long msr_bits;
1510
1511         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1512         msr_bits &= ~(icpu->auto_demotion_disable_flags);
1513         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1514 }
1515
1516 static void c1e_promotion_disable(void)
1517 {
1518         unsigned long long msr_bits;
1519
1520         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1521         msr_bits &= ~0x2;
1522         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1523 }
1524
1525 /*
1526  * intel_idle_cpu_init()
1527  * allocate, initialize, register cpuidle_devices
1528  * @cpu: cpu/core to initialize
1529  */
1530 static int intel_idle_cpu_init(unsigned int cpu)
1531 {
1532         struct cpuidle_device *dev;
1533
1534         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1535         dev->cpu = cpu;
1536
1537         if (cpuidle_register_device(dev)) {
1538                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1539                 return -EIO;
1540         }
1541
1542         if (!icpu)
1543                 return 0;
1544
1545         if (icpu->auto_demotion_disable_flags)
1546                 auto_demotion_disable();
1547
1548         if (icpu->disable_promotion_to_c1e)
1549                 c1e_promotion_disable();
1550
1551         return 0;
1552 }
1553
1554 static int intel_idle_cpu_online(unsigned int cpu)
1555 {
1556         struct cpuidle_device *dev;
1557
1558         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1559                 tick_broadcast_enable();
1560
1561         /*
1562          * Some systems can hotplug a cpu at runtime after
1563          * the kernel has booted, we have to initialize the
1564          * driver in this case
1565          */
1566         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1567         if (!dev->registered)
1568                 return intel_idle_cpu_init(cpu);
1569
1570         return 0;
1571 }
1572
1573 /**
1574  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1575  */
1576 static void __init intel_idle_cpuidle_devices_uninit(void)
1577 {
1578         int i;
1579
1580         for_each_online_cpu(i)
1581                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1582 }
1583
1584 static int __init intel_idle_init(void)
1585 {
1586         const struct x86_cpu_id *id;
1587         unsigned int eax, ebx, ecx;
1588         int retval;
1589
1590         /* Do not load intel_idle at all for now if idle= is passed */
1591         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1592                 return -ENODEV;
1593
1594         if (max_cstate == 0) {
1595                 pr_debug("disabled\n");
1596                 return -EPERM;
1597         }
1598
1599         id = x86_match_cpu(intel_idle_ids);
1600         if (id) {
1601                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1602                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1603                         return -ENODEV;
1604                 }
1605         } else {
1606                 id = x86_match_cpu(intel_mwait_ids);
1607                 if (!id)
1608                         return -ENODEV;
1609         }
1610
1611         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1612                 return -ENODEV;
1613
1614         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1615
1616         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1617             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1618             !mwait_substates)
1619                         return -ENODEV;
1620
1621         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1622
1623         icpu = (const struct idle_cpu *)id->driver_data;
1624         if (icpu) {
1625                 cpuidle_state_table = icpu->state_table;
1626                 if (icpu->use_acpi || force_use_acpi)
1627                         intel_idle_acpi_cst_extract();
1628         } else if (!intel_idle_acpi_cst_extract()) {
1629                 return -ENODEV;
1630         }
1631
1632         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1633                  boot_cpu_data.x86_model);
1634
1635         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1636         if (!intel_idle_cpuidle_devices)
1637                 return -ENOMEM;
1638
1639         intel_idle_cpuidle_driver_init(&intel_idle_driver);
1640
1641         retval = cpuidle_register_driver(&intel_idle_driver);
1642         if (retval) {
1643                 struct cpuidle_driver *drv = cpuidle_get_driver();
1644                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1645                        drv ? drv->name : "none");
1646                 goto init_driver_fail;
1647         }
1648
1649         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1650                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1651
1652         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1653                                    intel_idle_cpu_online, NULL);
1654         if (retval < 0)
1655                 goto hp_setup_fail;
1656
1657         pr_debug("lapic_timer_reliable_states 0x%x\n",
1658                  lapic_timer_reliable_states);
1659
1660         return 0;
1661
1662 hp_setup_fail:
1663         intel_idle_cpuidle_devices_uninit();
1664         cpuidle_unregister_driver(&intel_idle_driver);
1665 init_driver_fail:
1666         free_percpu(intel_idle_cpuidle_devices);
1667         return retval;
1668
1669 }
1670 device_initcall(intel_idle_init);
1671
1672 /*
1673  * We are not really modular, but we used to support that.  Meaning we also
1674  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1675  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1676  * is the easiest way (currently) to continue doing that.
1677  */
1678 module_param(max_cstate, int, 0444);
1679 /*
1680  * The positions of the bits that are set in this number are the indices of the
1681  * idle states to be disabled by default (as reflected by the names of the
1682  * corresponding idle state directories in sysfs, "state0", "state1" ...
1683  * "state<i>" ..., where <i> is the index of the given state).
1684  */
1685 module_param_named(states_off, disabled_states_mask, uint, 0444);
1686 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");