6326bee97c0bac28567b4abf759aa99e7934a676
[linux-2.6-microblaze.git] / tools / power / x86 / turbostat / turbostat.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * turbostat -- show CPU frequency and C-state residency
4  * on modern Intel and AMD processors.
5  *
6  * Copyright (c) 2013 Intel Corporation.
7  * Len Brown <len.brown@intel.com>
8  */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <sys/capability.h>
34 #include <errno.h>
35 #include <math.h>
36 #include <linux/perf_event.h>
37 #include <asm/unistd.h>
38 #include <stdbool.h>
39
40 char *proc_stat = "/proc/stat";
41 FILE *outf;
42 int *fd_percpu;
43 int *fd_instr_count_percpu;
44 struct timeval interval_tv = {5, 0};
45 struct timespec interval_ts = {5, 0};
46
47 /* Save original CPU model */
48 unsigned int model_orig;
49
50 unsigned int num_iterations;
51 unsigned int debug;
52 unsigned int quiet;
53 unsigned int shown;
54 unsigned int sums_need_wide_columns;
55 unsigned int rapl_joules;
56 unsigned int summary_only;
57 unsigned int list_header_only;
58 unsigned int dump_only;
59 unsigned int do_snb_cstates;
60 unsigned int do_knl_cstates;
61 unsigned int do_slm_cstates;
62 unsigned int use_c1_residency_msr;
63 unsigned int has_aperf;
64 unsigned int has_epb;
65 unsigned int do_irtl_snb;
66 unsigned int do_irtl_hsw;
67 unsigned int units = 1000000;   /* MHz etc */
68 unsigned int genuine_intel;
69 unsigned int authentic_amd;
70 unsigned int hygon_genuine;
71 unsigned int max_level, max_extended_level;
72 unsigned int has_invariant_tsc;
73 unsigned int do_nhm_platform_info;
74 unsigned int no_MSR_MISC_PWR_MGMT;
75 unsigned int aperf_mperf_multiplier = 1;
76 double bclk;
77 double base_hz;
78 unsigned int has_base_hz;
79 double tsc_tweak = 1.0;
80 unsigned int show_pkg_only;
81 unsigned int show_core_only;
82 char *output_buffer, *outp;
83 unsigned int do_rapl;
84 unsigned int do_dts;
85 unsigned int do_ptm;
86 unsigned int do_ipc;
87 unsigned long long  gfx_cur_rc6_ms;
88 unsigned long long cpuidle_cur_cpu_lpi_us;
89 unsigned long long cpuidle_cur_sys_lpi_us;
90 unsigned int gfx_cur_mhz;
91 unsigned int gfx_act_mhz;
92 unsigned int tcc_activation_temp;
93 unsigned int tcc_activation_temp_override;
94 int tcc_offset_bits;
95 double rapl_power_units, rapl_time_units;
96 double rapl_dram_energy_units, rapl_energy_units;
97 double rapl_joule_counter_range;
98 unsigned int do_core_perf_limit_reasons;
99 unsigned int has_automatic_cstate_conversion;
100 unsigned int dis_cstate_prewake;
101 unsigned int do_gfx_perf_limit_reasons;
102 unsigned int do_ring_perf_limit_reasons;
103 unsigned int crystal_hz;
104 unsigned long long tsc_hz;
105 int base_cpu;
106 double discover_bclk(unsigned int family, unsigned int model);
107 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
108                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
109 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
110 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
111 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
112 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
113 unsigned int has_misc_feature_control;
114 unsigned int first_counter_read = 1;
115 int ignore_stdin;
116
117 #define RAPL_PKG                (1 << 0)
118                                         /* 0x610 MSR_PKG_POWER_LIMIT */
119                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
120 #define RAPL_PKG_PERF_STATUS    (1 << 1)
121                                         /* 0x613 MSR_PKG_PERF_STATUS */
122 #define RAPL_PKG_POWER_INFO     (1 << 2)
123                                         /* 0x614 MSR_PKG_POWER_INFO */
124
125 #define RAPL_DRAM               (1 << 3)
126                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
127                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
128 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
129                                         /* 0x61b MSR_DRAM_PERF_STATUS */
130 #define RAPL_DRAM_POWER_INFO    (1 << 5)
131                                         /* 0x61c MSR_DRAM_POWER_INFO */
132
133 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
134                                         /* 0x638 MSR_PP0_POWER_LIMIT */
135 #define RAPL_CORE_POLICY        (1 << 7)
136                                         /* 0x63a MSR_PP0_POLICY */
137
138 #define RAPL_GFX                (1 << 8)
139                                         /* 0x640 MSR_PP1_POWER_LIMIT */
140                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
141                                         /* 0x642 MSR_PP1_POLICY */
142
143 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
144                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
145 #define RAPL_PER_CORE_ENERGY    (1 << 10)
146                                         /* Indicates cores energy collection is per-core,
147                                          * not per-package. */
148 #define RAPL_AMD_F17H           (1 << 11)
149                                         /* 0xc0010299 MSR_RAPL_PWR_UNIT */
150                                         /* 0xc001029a MSR_CORE_ENERGY_STAT */
151                                         /* 0xc001029b MSR_PKG_ENERGY_STAT */
152 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
153 #define TJMAX_DEFAULT   100
154
155 /* MSRs that are not yet in the kernel-provided header. */
156 #define MSR_RAPL_PWR_UNIT       0xc0010299
157 #define MSR_CORE_ENERGY_STAT    0xc001029a
158 #define MSR_PKG_ENERGY_STAT     0xc001029b
159
160 #define MAX(a, b) ((a) > (b) ? (a) : (b))
161
162 /*
163  * buffer size used by sscanf() for added column names
164  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
165  */
166 #define NAME_BYTES 20
167 #define PATH_BYTES 128
168
169 int backwards_count;
170 char *progname;
171
172 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
173 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
174 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
175 #define MAX_ADDED_COUNTERS 8
176 #define MAX_ADDED_THREAD_COUNTERS 24
177 #define BITMASK_SIZE 32
178
179 struct thread_data {
180         struct timeval tv_begin;
181         struct timeval tv_end;
182         struct timeval tv_delta;
183         unsigned long long tsc;
184         unsigned long long aperf;
185         unsigned long long mperf;
186         unsigned long long c1;
187         unsigned long long instr_count;
188         unsigned long long  irq_count;
189         unsigned int smi_count;
190         unsigned int cpu_id;
191         unsigned int apic_id;
192         unsigned int x2apic_id;
193         unsigned int flags;
194         bool is_atom;
195 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
196 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
197         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
198 } *thread_even, *thread_odd;
199
200 struct core_data {
201         unsigned long long c3;
202         unsigned long long c6;
203         unsigned long long c7;
204         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
205         unsigned int core_temp_c;
206         unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
207         unsigned int core_id;
208         unsigned long long counter[MAX_ADDED_COUNTERS];
209 } *core_even, *core_odd;
210
211 struct pkg_data {
212         unsigned long long pc2;
213         unsigned long long pc3;
214         unsigned long long pc6;
215         unsigned long long pc7;
216         unsigned long long pc8;
217         unsigned long long pc9;
218         unsigned long long pc10;
219         unsigned long long cpu_lpi;
220         unsigned long long sys_lpi;
221         unsigned long long pkg_wtd_core_c0;
222         unsigned long long pkg_any_core_c0;
223         unsigned long long pkg_any_gfxe_c0;
224         unsigned long long pkg_both_core_gfxe_c0;
225         long long gfx_rc6_ms;
226         unsigned int gfx_mhz;
227         unsigned int gfx_act_mhz;
228         unsigned int package_id;
229         unsigned long long energy_pkg;  /* MSR_PKG_ENERGY_STATUS */
230         unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
231         unsigned long long energy_cores;        /* MSR_PP0_ENERGY_STATUS */
232         unsigned long long energy_gfx;  /* MSR_PP1_ENERGY_STATUS */
233         unsigned long long rapl_pkg_perf_status;        /* MSR_PKG_PERF_STATUS */
234         unsigned long long rapl_dram_perf_status;       /* MSR_DRAM_PERF_STATUS */
235         unsigned int pkg_temp_c;
236         unsigned long long counter[MAX_ADDED_COUNTERS];
237 } *package_even, *package_odd;
238
239 #define ODD_COUNTERS thread_odd, core_odd, package_odd
240 #define EVEN_COUNTERS thread_even, core_even, package_even
241
242 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
243         ((thread_base) +                                                      \
244          ((pkg_no) *                                                          \
245           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
246          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
247          ((core_no) * topo.threads_per_core) +                                \
248          (thread_no))
249
250 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
251         ((core_base) +                                                  \
252          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
253          ((node_no) * topo.cores_per_node) +                            \
254          (core_no))
255
256
257 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
258
259 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
260 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
261 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
262
263 struct msr_counter {
264         unsigned int msr_num;
265         char name[NAME_BYTES];
266         char path[PATH_BYTES];
267         unsigned int width;
268         enum counter_type type;
269         enum counter_format format;
270         struct msr_counter *next;
271         unsigned int flags;
272 #define FLAGS_HIDE      (1 << 0)
273 #define FLAGS_SHOW      (1 << 1)
274 #define SYSFS_PERCPU    (1 << 1)
275 };
276
277 /*
278  * The accumulated sum of MSR is defined as a monotonic
279  * increasing MSR, it will be accumulated periodically,
280  * despite its register's bit width.
281  */
282 enum {
283         IDX_PKG_ENERGY,
284         IDX_DRAM_ENERGY,
285         IDX_PP0_ENERGY,
286         IDX_PP1_ENERGY,
287         IDX_PKG_PERF,
288         IDX_DRAM_PERF,
289         IDX_COUNT,
290 };
291
292 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
293
294 struct msr_sum_array {
295         /* get_msr_sum() = sum + (get_msr() - last) */
296         struct {
297                 /*The accumulated MSR value is updated by the timer*/
298                 unsigned long long sum;
299                 /*The MSR footprint recorded in last timer*/
300                 unsigned long long last;
301         } entries[IDX_COUNT];
302 };
303
304 /* The percpu MSR sum array.*/
305 struct msr_sum_array *per_cpu_msr_sum;
306
307 off_t idx_to_offset(int idx)
308 {
309         off_t offset;
310
311         switch (idx) {
312         case IDX_PKG_ENERGY:
313                 if (do_rapl & RAPL_AMD_F17H)
314                         offset = MSR_PKG_ENERGY_STAT;
315                 else
316                         offset = MSR_PKG_ENERGY_STATUS;
317                 break;
318         case IDX_DRAM_ENERGY:
319                 offset = MSR_DRAM_ENERGY_STATUS;
320                 break;
321         case IDX_PP0_ENERGY:
322                 offset = MSR_PP0_ENERGY_STATUS;
323                 break;
324         case IDX_PP1_ENERGY:
325                 offset = MSR_PP1_ENERGY_STATUS;
326                 break;
327         case IDX_PKG_PERF:
328                 offset = MSR_PKG_PERF_STATUS;
329                 break;
330         case IDX_DRAM_PERF:
331                 offset = MSR_DRAM_PERF_STATUS;
332                 break;
333         default:
334                 offset = -1;
335         }
336         return offset;
337 }
338
339 int offset_to_idx(off_t offset)
340 {
341         int idx;
342
343         switch (offset) {
344         case MSR_PKG_ENERGY_STATUS:
345         case MSR_PKG_ENERGY_STAT:
346                 idx = IDX_PKG_ENERGY;
347                 break;
348         case MSR_DRAM_ENERGY_STATUS:
349                 idx = IDX_DRAM_ENERGY;
350                 break;
351         case MSR_PP0_ENERGY_STATUS:
352                 idx = IDX_PP0_ENERGY;
353                 break;
354         case MSR_PP1_ENERGY_STATUS:
355                 idx = IDX_PP1_ENERGY;
356                 break;
357         case MSR_PKG_PERF_STATUS:
358                 idx = IDX_PKG_PERF;
359                 break;
360         case MSR_DRAM_PERF_STATUS:
361                 idx = IDX_DRAM_PERF;
362                 break;
363         default:
364                 idx = -1;
365         }
366         return idx;
367 }
368
369 int idx_valid(int idx)
370 {
371         switch (idx) {
372         case IDX_PKG_ENERGY:
373                 return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
374         case IDX_DRAM_ENERGY:
375                 return do_rapl & RAPL_DRAM;
376         case IDX_PP0_ENERGY:
377                 return do_rapl & RAPL_CORES_ENERGY_STATUS;
378         case IDX_PP1_ENERGY:
379                 return do_rapl & RAPL_GFX;
380         case IDX_PKG_PERF:
381                 return do_rapl & RAPL_PKG_PERF_STATUS;
382         case IDX_DRAM_PERF:
383                 return do_rapl & RAPL_DRAM_PERF_STATUS;
384         default:
385                 return 0;
386         }
387 }
388 struct sys_counters {
389         unsigned int added_thread_counters;
390         unsigned int added_core_counters;
391         unsigned int added_package_counters;
392         struct msr_counter *tp;
393         struct msr_counter *cp;
394         struct msr_counter *pp;
395 } sys;
396
397 struct system_summary {
398         struct thread_data threads;
399         struct core_data cores;
400         struct pkg_data packages;
401 } average;
402
403 struct cpu_topology {
404         int physical_package_id;
405         int die_id;
406         int logical_cpu_id;
407         int physical_node_id;
408         int logical_node_id;    /* 0-based count within the package */
409         int physical_core_id;
410         int thread_id;
411         cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
412 } *cpus;
413
414 struct topo_params {
415         int num_packages;
416         int num_die;
417         int num_cpus;
418         int num_cores;
419         int max_cpu_num;
420         int max_node_num;
421         int nodes_per_pkg;
422         int cores_per_node;
423         int threads_per_core;
424 } topo;
425
426 struct timeval tv_even, tv_odd, tv_delta;
427
428 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
429 int *irqs_per_cpu;              /* indexed by cpu_num */
430
431 void setup_all_buffers(void);
432
433 char *sys_lpi_file;
434 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
435 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
436
437 int cpu_is_not_present(int cpu)
438 {
439         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
440 }
441 /*
442  * run func(thread, core, package) in topology order
443  * skip non-present cpus
444  */
445
446 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
447         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
448 {
449         int retval, pkg_no, core_no, thread_no, node_no;
450
451         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
452                 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
453                         for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
454                                 for (thread_no = 0; thread_no <
455                                         topo.threads_per_core; ++thread_no) {
456                                         struct thread_data *t;
457                                         struct core_data *c;
458                                         struct pkg_data *p;
459
460                                         t = GET_THREAD(thread_base, thread_no,
461                                                        core_no, node_no,
462                                                        pkg_no);
463
464                                         if (cpu_is_not_present(t->cpu_id))
465                                                 continue;
466
467                                         c = GET_CORE(core_base, core_no,
468                                                      node_no, pkg_no);
469                                         p = GET_PKG(pkg_base, pkg_no);
470
471                                         retval = func(t, c, p);
472                                         if (retval)
473                                                 return retval;
474                                 }
475                         }
476                 }
477         }
478         return 0;
479 }
480
481 int cpu_migrate(int cpu)
482 {
483         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
484         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
485         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
486                 return -1;
487         else
488                 return 0;
489 }
490 int get_msr_fd(int cpu)
491 {
492         char pathname[32];
493         int fd;
494
495         fd = fd_percpu[cpu];
496
497         if (fd)
498                 return fd;
499
500         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
501         fd = open(pathname, O_RDONLY);
502         if (fd < 0)
503                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
504
505         fd_percpu[cpu] = fd;
506
507         return fd;
508 }
509
510 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
511 {
512         return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
513 }
514
515 static int perf_instr_count_open(int cpu_num)
516 {
517         struct perf_event_attr pea;
518         int fd;
519
520         memset(&pea, 0, sizeof(struct perf_event_attr));
521         pea.type = PERF_TYPE_HARDWARE;
522         pea.size = sizeof(struct perf_event_attr);
523         pea.config = PERF_COUNT_HW_INSTRUCTIONS;
524
525         /* counter for cpu_num, including user + kernel and all processes */
526         fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
527         if (fd == -1) 
528                 err(-1, "cpu%d: perf instruction counter\n", cpu_num);
529
530         return fd;
531 }
532
533 int get_instr_count_fd(int cpu)
534 {
535         if (fd_instr_count_percpu[cpu])
536                 return fd_instr_count_percpu[cpu];
537
538         fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
539
540         return fd_instr_count_percpu[cpu];
541 }
542
543 int get_msr(int cpu, off_t offset, unsigned long long *msr)
544 {
545         ssize_t retval;
546
547         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
548
549         if (retval != sizeof *msr)
550                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
551
552         return 0;
553 }
554
555 /*
556  * This list matches the column headers, except
557  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
558  * 2. Core and CPU are moved to the end, we can't have strings that contain them
559  *    matching on them for --show and --hide.
560  */
561 struct msr_counter bic[] = {
562         { 0x0, "usec" },
563         { 0x0, "Time_Of_Day_Seconds" },
564         { 0x0, "Package" },
565         { 0x0, "Node" },
566         { 0x0, "Avg_MHz" },
567         { 0x0, "Busy%" },
568         { 0x0, "Bzy_MHz" },
569         { 0x0, "TSC_MHz" },
570         { 0x0, "IRQ" },
571         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
572         { 0x0, "sysfs" },
573         { 0x0, "CPU%c1" },
574         { 0x0, "CPU%c3" },
575         { 0x0, "CPU%c6" },
576         { 0x0, "CPU%c7" },
577         { 0x0, "ThreadC" },
578         { 0x0, "CoreTmp" },
579         { 0x0, "CoreCnt" },
580         { 0x0, "PkgTmp" },
581         { 0x0, "GFX%rc6" },
582         { 0x0, "GFXMHz" },
583         { 0x0, "Pkg%pc2" },
584         { 0x0, "Pkg%pc3" },
585         { 0x0, "Pkg%pc6" },
586         { 0x0, "Pkg%pc7" },
587         { 0x0, "Pkg%pc8" },
588         { 0x0, "Pkg%pc9" },
589         { 0x0, "Pk%pc10" },
590         { 0x0, "CPU%LPI" },
591         { 0x0, "SYS%LPI" },
592         { 0x0, "PkgWatt" },
593         { 0x0, "CorWatt" },
594         { 0x0, "GFXWatt" },
595         { 0x0, "PkgCnt" },
596         { 0x0, "RAMWatt" },
597         { 0x0, "PKG_%" },
598         { 0x0, "RAM_%" },
599         { 0x0, "Pkg_J" },
600         { 0x0, "Cor_J" },
601         { 0x0, "GFX_J" },
602         { 0x0, "RAM_J" },
603         { 0x0, "Mod%c6" },
604         { 0x0, "Totl%C0" },
605         { 0x0, "Any%C0" },
606         { 0x0, "GFX%C0" },
607         { 0x0, "CPUGFX%" },
608         { 0x0, "Core" },
609         { 0x0, "CPU" },
610         { 0x0, "APIC" },
611         { 0x0, "X2APIC" },
612         { 0x0, "Die" },
613         { 0x0, "GFXAMHz" },
614         { 0x0, "IPC" },
615 };
616
617 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
618 #define BIC_USEC        (1ULL << 0)
619 #define BIC_TOD         (1ULL << 1)
620 #define BIC_Package     (1ULL << 2)
621 #define BIC_Node        (1ULL << 3)
622 #define BIC_Avg_MHz     (1ULL << 4)
623 #define BIC_Busy        (1ULL << 5)
624 #define BIC_Bzy_MHz     (1ULL << 6)
625 #define BIC_TSC_MHz     (1ULL << 7)
626 #define BIC_IRQ         (1ULL << 8)
627 #define BIC_SMI         (1ULL << 9)
628 #define BIC_sysfs       (1ULL << 10)
629 #define BIC_CPU_c1      (1ULL << 11)
630 #define BIC_CPU_c3      (1ULL << 12)
631 #define BIC_CPU_c6      (1ULL << 13)
632 #define BIC_CPU_c7      (1ULL << 14)
633 #define BIC_ThreadC     (1ULL << 15)
634 #define BIC_CoreTmp     (1ULL << 16)
635 #define BIC_CoreCnt     (1ULL << 17)
636 #define BIC_PkgTmp      (1ULL << 18)
637 #define BIC_GFX_rc6     (1ULL << 19)
638 #define BIC_GFXMHz      (1ULL << 20)
639 #define BIC_Pkgpc2      (1ULL << 21)
640 #define BIC_Pkgpc3      (1ULL << 22)
641 #define BIC_Pkgpc6      (1ULL << 23)
642 #define BIC_Pkgpc7      (1ULL << 24)
643 #define BIC_Pkgpc8      (1ULL << 25)
644 #define BIC_Pkgpc9      (1ULL << 26)
645 #define BIC_Pkgpc10     (1ULL << 27)
646 #define BIC_CPU_LPI     (1ULL << 28)
647 #define BIC_SYS_LPI     (1ULL << 29)
648 #define BIC_PkgWatt     (1ULL << 30)
649 #define BIC_CorWatt     (1ULL << 31)
650 #define BIC_GFXWatt     (1ULL << 32)
651 #define BIC_PkgCnt      (1ULL << 33)
652 #define BIC_RAMWatt     (1ULL << 34)
653 #define BIC_PKG__       (1ULL << 35)
654 #define BIC_RAM__       (1ULL << 36)
655 #define BIC_Pkg_J       (1ULL << 37)
656 #define BIC_Cor_J       (1ULL << 38)
657 #define BIC_GFX_J       (1ULL << 39)
658 #define BIC_RAM_J       (1ULL << 40)
659 #define BIC_Mod_c6      (1ULL << 41)
660 #define BIC_Totl_c0     (1ULL << 42)
661 #define BIC_Any_c0      (1ULL << 43)
662 #define BIC_GFX_c0      (1ULL << 44)
663 #define BIC_CPUGFX      (1ULL << 45)
664 #define BIC_Core        (1ULL << 46)
665 #define BIC_CPU         (1ULL << 47)
666 #define BIC_APIC        (1ULL << 48)
667 #define BIC_X2APIC      (1ULL << 49)
668 #define BIC_Die         (1ULL << 50)
669 #define BIC_GFXACTMHz   (1ULL << 51)
670 #define BIC_IPC         (1ULL << 52)
671
672 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
673
674 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
675 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
676
677 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
678 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
679 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
680 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
681 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
682 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
683
684
685 #define MAX_DEFERRED 16
686 char *deferred_skip_names[MAX_DEFERRED];
687 int deferred_skip_index;
688
689 /*
690  * HIDE_LIST - hide this list of counters, show the rest [default]
691  * SHOW_LIST - show this list of counters, hide the rest
692  */
693 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
694
695 void help(void)
696 {
697         fprintf(outf,
698         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
699         "\n"
700         "Turbostat forks the specified COMMAND and prints statistics\n"
701         "when COMMAND completes.\n"
702         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
703         "to print statistics, until interrupted.\n"
704         "  -a, --add    add a counter\n"
705         "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
706         "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
707         "                 {core | package | j,k,l..m,n-p }\n"
708         "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
709         "  -D, --Dump   displays the raw counter values\n"
710         "  -e, --enable [all | column]\n"
711         "               shows all or the specified disabled column\n"
712         "  -H, --hide [column|column,column,...]\n"
713         "               hide the specified column(s)\n"
714         "  -i, --interval sec.subsec\n"
715         "               Override default 5-second measurement interval\n"
716         "  -J, --Joules displays energy in Joules instead of Watts\n"
717         "  -l, --list   list column headers only\n"
718         "  -n, --num_iterations num\n"
719         "               number of the measurement iterations\n"
720         "  -o, --out file\n"
721         "               create or truncate \"file\" for all output\n"
722         "  -q, --quiet  skip decoding system configuration header\n"
723         "  -s, --show [column|column,column,...]\n"
724         "               show only the specified column(s)\n"
725         "  -S, --Summary\n"
726         "               limits output to 1-line system summary per interval\n"
727         "  -T, --TCC temperature\n"
728         "               sets the Thermal Control Circuit temperature in\n"
729         "                 degrees Celsius\n"
730         "  -h, --help   print this help message\n"
731         "  -v, --version        print version information\n"
732         "\n"
733         "For more help, run \"man turbostat\"\n");
734 }
735
736 /*
737  * bic_lookup
738  * for all the strings in comma separate name_list,
739  * set the approprate bit in return value.
740  */
741 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
742 {
743         int i;
744         unsigned long long retval = 0;
745
746         while (name_list) {
747                 char *comma;
748
749                 comma = strchr(name_list, ',');
750
751                 if (comma)
752                         *comma = '\0';
753
754                 if (!strcmp(name_list, "all"))
755                         return ~0;
756
757                 for (i = 0; i < MAX_BIC; ++i) {
758                         if (!strcmp(name_list, bic[i].name)) {
759                                 retval |= (1ULL << i);
760                                 break;
761                         }
762                 }
763                 if (i == MAX_BIC) {
764                         if (mode == SHOW_LIST) {
765                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
766                                 exit(-1);
767                         }
768                         deferred_skip_names[deferred_skip_index++] = name_list;
769                         if (debug)
770                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
771                         if (deferred_skip_index >= MAX_DEFERRED) {
772                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
773                                         MAX_DEFERRED, name_list);
774                                 help();
775                                 exit(1);
776                         }
777                 }
778
779                 name_list = comma;
780                 if (name_list)
781                         name_list++;
782
783         }
784         return retval;
785 }
786
787
788 void print_header(char *delim)
789 {
790         struct msr_counter *mp;
791         int printed = 0;
792
793         if (DO_BIC(BIC_USEC))
794                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
795         if (DO_BIC(BIC_TOD))
796                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
797         if (DO_BIC(BIC_Package))
798                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
799         if (DO_BIC(BIC_Die))
800                 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
801         if (DO_BIC(BIC_Node))
802                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
803         if (DO_BIC(BIC_Core))
804                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
805         if (DO_BIC(BIC_CPU))
806                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
807         if (DO_BIC(BIC_APIC))
808                 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
809         if (DO_BIC(BIC_X2APIC))
810                 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
811         if (DO_BIC(BIC_Avg_MHz))
812                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
813         if (DO_BIC(BIC_Busy))
814                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
815         if (DO_BIC(BIC_Bzy_MHz))
816                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
817         if (DO_BIC(BIC_TSC_MHz))
818                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
819
820         if (DO_BIC(BIC_IPC))
821                 outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
822
823         if (DO_BIC(BIC_IRQ)) {
824                 if (sums_need_wide_columns)
825                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
826                 else
827                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
828         }
829
830         if (DO_BIC(BIC_SMI))
831                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
832
833         for (mp = sys.tp; mp; mp = mp->next) {
834
835                 if (mp->format == FORMAT_RAW) {
836                         if (mp->width == 64)
837                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
838                         else
839                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
840                 } else {
841                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
842                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
843                         else
844                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
845                 }
846         }
847
848         if (DO_BIC(BIC_CPU_c1))
849                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
850         if (DO_BIC(BIC_CPU_c3))
851                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
852         if (DO_BIC(BIC_CPU_c6))
853                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
854         if (DO_BIC(BIC_CPU_c7))
855                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
856
857         if (DO_BIC(BIC_Mod_c6))
858                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
859
860         if (DO_BIC(BIC_CoreTmp))
861                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
862
863         if (do_rapl && !rapl_joules) {
864                 if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
865                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
866         } else if (do_rapl && rapl_joules) {
867                 if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
868                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
869         }
870
871         for (mp = sys.cp; mp; mp = mp->next) {
872                 if (mp->format == FORMAT_RAW) {
873                         if (mp->width == 64)
874                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
875                         else
876                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
877                 } else {
878                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
879                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
880                         else
881                                 outp += sprintf(outp, "%s%s", delim, mp->name);
882                 }
883         }
884
885         if (DO_BIC(BIC_PkgTmp))
886                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
887
888         if (DO_BIC(BIC_GFX_rc6))
889                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
890
891         if (DO_BIC(BIC_GFXMHz))
892                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
893
894         if (DO_BIC(BIC_GFXACTMHz))
895                 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
896
897         if (DO_BIC(BIC_Totl_c0))
898                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
899         if (DO_BIC(BIC_Any_c0))
900                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
901         if (DO_BIC(BIC_GFX_c0))
902                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
903         if (DO_BIC(BIC_CPUGFX))
904                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
905
906         if (DO_BIC(BIC_Pkgpc2))
907                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
908         if (DO_BIC(BIC_Pkgpc3))
909                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
910         if (DO_BIC(BIC_Pkgpc6))
911                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
912         if (DO_BIC(BIC_Pkgpc7))
913                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
914         if (DO_BIC(BIC_Pkgpc8))
915                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
916         if (DO_BIC(BIC_Pkgpc9))
917                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
918         if (DO_BIC(BIC_Pkgpc10))
919                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
920         if (DO_BIC(BIC_CPU_LPI))
921                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
922         if (DO_BIC(BIC_SYS_LPI))
923                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
924
925         if (do_rapl && !rapl_joules) {
926                 if (DO_BIC(BIC_PkgWatt))
927                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
928                 if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
929                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
930                 if (DO_BIC(BIC_GFXWatt))
931                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
932                 if (DO_BIC(BIC_RAMWatt))
933                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
934                 if (DO_BIC(BIC_PKG__))
935                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
936                 if (DO_BIC(BIC_RAM__))
937                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
938         } else if (do_rapl && rapl_joules) {
939                 if (DO_BIC(BIC_Pkg_J))
940                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
941                 if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
942                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
943                 if (DO_BIC(BIC_GFX_J))
944                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
945                 if (DO_BIC(BIC_RAM_J))
946                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
947                 if (DO_BIC(BIC_PKG__))
948                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
949                 if (DO_BIC(BIC_RAM__))
950                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
951         }
952         for (mp = sys.pp; mp; mp = mp->next) {
953                 if (mp->format == FORMAT_RAW) {
954                         if (mp->width == 64)
955                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
956                         else
957                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
958                 } else {
959                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
960                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
961                         else
962                                 outp += sprintf(outp, "%s%s", delim, mp->name);
963                 }
964         }
965
966         outp += sprintf(outp, "\n");
967 }
968
969 int dump_counters(struct thread_data *t, struct core_data *c,
970         struct pkg_data *p)
971 {
972         int i;
973         struct msr_counter *mp;
974
975         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
976
977         if (t) {
978                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
979                         t->cpu_id, t->flags);
980                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
981                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
982                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
983                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
984
985                 if (DO_BIC(BIC_IPC))
986                         outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
987
988                 if (DO_BIC(BIC_IRQ))
989                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
990                 if (DO_BIC(BIC_SMI))
991                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
992
993                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
994                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
995                                 i, mp->msr_num, t->counter[i]);
996                 }
997         }
998
999         if (c) {
1000                 outp += sprintf(outp, "core: %d\n", c->core_id);
1001                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
1002                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
1003                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
1004                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
1005                 outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
1006
1007                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1008                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
1009                                 i, mp->msr_num, c->counter[i]);
1010                 }
1011                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
1012         }
1013
1014         if (p) {
1015                 outp += sprintf(outp, "package: %d\n", p->package_id);
1016
1017                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
1018                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
1019                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
1020                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
1021
1022                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
1023                 if (DO_BIC(BIC_Pkgpc3))
1024                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
1025                 if (DO_BIC(BIC_Pkgpc6))
1026                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
1027                 if (DO_BIC(BIC_Pkgpc7))
1028                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
1029                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
1030                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
1031                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
1032                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
1033                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
1034                 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
1035                 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
1036                 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
1037                 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
1038                 outp += sprintf(outp, "Throttle PKG: %0llX\n",
1039                         p->rapl_pkg_perf_status);
1040                 outp += sprintf(outp, "Throttle RAM: %0llX\n",
1041                         p->rapl_dram_perf_status);
1042                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
1043
1044                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1045                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
1046                                 i, mp->msr_num, p->counter[i]);
1047                 }
1048         }
1049
1050         outp += sprintf(outp, "\n");
1051
1052         return 0;
1053 }
1054
1055 /*
1056  * column formatting convention & formats
1057  */
1058 int format_counters(struct thread_data *t, struct core_data *c,
1059         struct pkg_data *p)
1060 {
1061         double interval_float, tsc;
1062         char *fmt8;
1063         int i;
1064         struct msr_counter *mp;
1065         char *delim = "\t";
1066         int printed = 0;
1067
1068          /* if showing only 1st thread in core and this isn't one, bail out */
1069         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1070                 return 0;
1071
1072          /* if showing only 1st thread in pkg and this isn't one, bail out */
1073         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1074                 return 0;
1075
1076         /*if not summary line and --cpu is used */
1077         if ((t != &average.threads) &&
1078                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
1079                 return 0;
1080
1081         if (DO_BIC(BIC_USEC)) {
1082                 /* on each row, print how many usec each timestamp took to gather */
1083                 struct timeval tv;
1084
1085                 timersub(&t->tv_end, &t->tv_begin, &tv);
1086                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
1087         }
1088
1089         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
1090         if (DO_BIC(BIC_TOD))
1091                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
1092
1093         interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
1094
1095         tsc = t->tsc * tsc_tweak;
1096
1097         /* topo columns, print blanks on 1st (average) line */
1098         if (t == &average.threads) {
1099                 if (DO_BIC(BIC_Package))
1100                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1101                 if (DO_BIC(BIC_Die))
1102                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1103                 if (DO_BIC(BIC_Node))
1104                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1105                 if (DO_BIC(BIC_Core))
1106                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1107                 if (DO_BIC(BIC_CPU))
1108                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1109                 if (DO_BIC(BIC_APIC))
1110                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1111                 if (DO_BIC(BIC_X2APIC))
1112                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1113         } else {
1114                 if (DO_BIC(BIC_Package)) {
1115                         if (p)
1116                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
1117                         else
1118                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1119                 }
1120                 if (DO_BIC(BIC_Die)) {
1121                         if (c)
1122                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
1123                         else
1124                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1125                 }
1126                 if (DO_BIC(BIC_Node)) {
1127                         if (t)
1128                                 outp += sprintf(outp, "%s%d",
1129                                                 (printed++ ? delim : ""),
1130                                               cpus[t->cpu_id].physical_node_id);
1131                         else
1132                                 outp += sprintf(outp, "%s-",
1133                                                 (printed++ ? delim : ""));
1134                 }
1135                 if (DO_BIC(BIC_Core)) {
1136                         if (c)
1137                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
1138                         else
1139                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1140                 }
1141                 if (DO_BIC(BIC_CPU))
1142                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
1143                 if (DO_BIC(BIC_APIC))
1144                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
1145                 if (DO_BIC(BIC_X2APIC))
1146                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
1147         }
1148
1149         if (DO_BIC(BIC_Avg_MHz))
1150                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1151                         1.0 / units * t->aperf / interval_float);
1152
1153         if (DO_BIC(BIC_Busy))
1154                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
1155
1156         if (DO_BIC(BIC_Bzy_MHz)) {
1157                 if (has_base_hz)
1158                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
1159                 else
1160                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1161                                 tsc / units * t->aperf / t->mperf / interval_float);
1162         }
1163
1164         if (DO_BIC(BIC_TSC_MHz))
1165                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
1166
1167         if (DO_BIC(BIC_IPC))
1168                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
1169
1170         /* IRQ */
1171         if (DO_BIC(BIC_IRQ)) {
1172                 if (sums_need_wide_columns)
1173                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
1174                 else
1175                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1176         }
1177
1178         /* SMI */
1179         if (DO_BIC(BIC_SMI))
1180                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1181
1182         /* Added counters */
1183         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1184                 if (mp->format == FORMAT_RAW) {
1185                         if (mp->width == 32)
1186                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
1187                         else
1188                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1189                 } else if (mp->format == FORMAT_DELTA) {
1190                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1191                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1192                         else
1193                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1194                 } else if (mp->format == FORMAT_PERCENT) {
1195                         if (mp->type == COUNTER_USEC)
1196                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
1197                         else
1198                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
1199                 }
1200         }
1201
1202         /* C1 */
1203         if (DO_BIC(BIC_CPU_c1))
1204                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1205
1206
1207         /* print per-core data only for 1st thread in core */
1208         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1209                 goto done;
1210
1211         if (DO_BIC(BIC_CPU_c3))
1212                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1213         if (DO_BIC(BIC_CPU_c6))
1214                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1215         if (DO_BIC(BIC_CPU_c7))
1216                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1217
1218         /* Mod%c6 */
1219         if (DO_BIC(BIC_Mod_c6))
1220                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1221
1222         if (DO_BIC(BIC_CoreTmp))
1223                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1224
1225         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1226                 if (mp->format == FORMAT_RAW) {
1227                         if (mp->width == 32)
1228                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1229                         else
1230                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1231                 } else if (mp->format == FORMAT_DELTA) {
1232                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1233                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1234                         else
1235                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1236                 } else if (mp->format == FORMAT_PERCENT) {
1237                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1238                 }
1239         }
1240
1241         fmt8 = "%s%.2f";
1242
1243         if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1244                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1245         if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1246                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1247
1248         /* print per-package data only for 1st core in package */
1249         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1250                 goto done;
1251
1252         /* PkgTmp */
1253         if (DO_BIC(BIC_PkgTmp))
1254                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1255
1256         /* GFXrc6 */
1257         if (DO_BIC(BIC_GFX_rc6)) {
1258                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1259                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1260                 } else {
1261                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1262                                 p->gfx_rc6_ms / 10.0 / interval_float);
1263                 }
1264         }
1265
1266         /* GFXMHz */
1267         if (DO_BIC(BIC_GFXMHz))
1268                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1269
1270         /* GFXACTMHz */
1271         if (DO_BIC(BIC_GFXACTMHz))
1272                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
1273
1274         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1275         if (DO_BIC(BIC_Totl_c0))
1276                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1277         if (DO_BIC(BIC_Any_c0))
1278                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1279         if (DO_BIC(BIC_GFX_c0))
1280                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1281         if (DO_BIC(BIC_CPUGFX))
1282                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1283
1284         if (DO_BIC(BIC_Pkgpc2))
1285                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1286         if (DO_BIC(BIC_Pkgpc3))
1287                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1288         if (DO_BIC(BIC_Pkgpc6))
1289                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1290         if (DO_BIC(BIC_Pkgpc7))
1291                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1292         if (DO_BIC(BIC_Pkgpc8))
1293                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1294         if (DO_BIC(BIC_Pkgpc9))
1295                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1296         if (DO_BIC(BIC_Pkgpc10))
1297                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1298
1299         if (DO_BIC(BIC_CPU_LPI))
1300                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1301         if (DO_BIC(BIC_SYS_LPI))
1302                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1303
1304         if (DO_BIC(BIC_PkgWatt))
1305                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1306         if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1307                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1308         if (DO_BIC(BIC_GFXWatt))
1309                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1310         if (DO_BIC(BIC_RAMWatt))
1311                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1312         if (DO_BIC(BIC_Pkg_J))
1313                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1314         if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1315                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1316         if (DO_BIC(BIC_GFX_J))
1317                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1318         if (DO_BIC(BIC_RAM_J))
1319                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1320         if (DO_BIC(BIC_PKG__))
1321                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1322         if (DO_BIC(BIC_RAM__))
1323                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1324
1325         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1326                 if (mp->format == FORMAT_RAW) {
1327                         if (mp->width == 32)
1328                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1329                         else
1330                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1331                 } else if (mp->format == FORMAT_DELTA) {
1332                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1333                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1334                         else
1335                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1336                 } else if (mp->format == FORMAT_PERCENT) {
1337                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1338                 }
1339         }
1340
1341 done:
1342         if (*(outp - 1) != '\n')
1343                 outp += sprintf(outp, "\n");
1344
1345         return 0;
1346 }
1347
1348 void flush_output_stdout(void)
1349 {
1350         FILE *filep;
1351
1352         if (outf == stderr)
1353                 filep = stdout;
1354         else
1355                 filep = outf;
1356
1357         fputs(output_buffer, filep);
1358         fflush(filep);
1359
1360         outp = output_buffer;
1361 }
1362 void flush_output_stderr(void)
1363 {
1364         fputs(output_buffer, outf);
1365         fflush(outf);
1366         outp = output_buffer;
1367 }
1368 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1369 {
1370         static int printed;
1371
1372         if (!printed || !summary_only)
1373                 print_header("\t");
1374
1375         format_counters(&average.threads, &average.cores, &average.packages);
1376
1377         printed = 1;
1378
1379         if (summary_only)
1380                 return;
1381
1382         for_all_cpus(format_counters, t, c, p);
1383 }
1384
1385 #define DELTA_WRAP32(new, old)                  \
1386         old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
1387
1388 int
1389 delta_package(struct pkg_data *new, struct pkg_data *old)
1390 {
1391         int i;
1392         struct msr_counter *mp;
1393
1394
1395         if (DO_BIC(BIC_Totl_c0))
1396                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1397         if (DO_BIC(BIC_Any_c0))
1398                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1399         if (DO_BIC(BIC_GFX_c0))
1400                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1401         if (DO_BIC(BIC_CPUGFX))
1402                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1403
1404         old->pc2 = new->pc2 - old->pc2;
1405         if (DO_BIC(BIC_Pkgpc3))
1406                 old->pc3 = new->pc3 - old->pc3;
1407         if (DO_BIC(BIC_Pkgpc6))
1408                 old->pc6 = new->pc6 - old->pc6;
1409         if (DO_BIC(BIC_Pkgpc7))
1410                 old->pc7 = new->pc7 - old->pc7;
1411         old->pc8 = new->pc8 - old->pc8;
1412         old->pc9 = new->pc9 - old->pc9;
1413         old->pc10 = new->pc10 - old->pc10;
1414         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1415         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1416         old->pkg_temp_c = new->pkg_temp_c;
1417
1418         /* flag an error when rc6 counter resets/wraps */
1419         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1420                 old->gfx_rc6_ms = -1;
1421         else
1422                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1423
1424         old->gfx_mhz = new->gfx_mhz;
1425         old->gfx_act_mhz = new->gfx_act_mhz;
1426
1427         old->energy_pkg = new->energy_pkg - old->energy_pkg;
1428         old->energy_cores = new->energy_cores - old->energy_cores;
1429         old->energy_gfx = new->energy_gfx - old->energy_gfx;
1430         old->energy_dram = new->energy_dram - old->energy_dram;
1431         old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
1432         old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
1433
1434         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1435                 if (mp->format == FORMAT_RAW)
1436                         old->counter[i] = new->counter[i];
1437                 else
1438                         old->counter[i] = new->counter[i] - old->counter[i];
1439         }
1440
1441         return 0;
1442 }
1443
1444 void
1445 delta_core(struct core_data *new, struct core_data *old)
1446 {
1447         int i;
1448         struct msr_counter *mp;
1449
1450         old->c3 = new->c3 - old->c3;
1451         old->c6 = new->c6 - old->c6;
1452         old->c7 = new->c7 - old->c7;
1453         old->core_temp_c = new->core_temp_c;
1454         old->mc6_us = new->mc6_us - old->mc6_us;
1455
1456         DELTA_WRAP32(new->core_energy, old->core_energy);
1457
1458         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1459                 if (mp->format == FORMAT_RAW)
1460                         old->counter[i] = new->counter[i];
1461                 else
1462                         old->counter[i] = new->counter[i] - old->counter[i];
1463         }
1464 }
1465
1466 int soft_c1_residency_display(int bic)
1467 {
1468         if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1469                 return 0;
1470
1471         return DO_BIC_READ(bic);
1472 }
1473
1474 /*
1475  * old = new - old
1476  */
1477 int
1478 delta_thread(struct thread_data *new, struct thread_data *old,
1479         struct core_data *core_delta)
1480 {
1481         int i;
1482         struct msr_counter *mp;
1483
1484         /* we run cpuid just the 1st time, copy the results */
1485         if (DO_BIC(BIC_APIC))
1486                 new->apic_id = old->apic_id;
1487         if (DO_BIC(BIC_X2APIC))
1488                 new->x2apic_id = old->x2apic_id;
1489
1490         /*
1491          * the timestamps from start of measurement interval are in "old"
1492          * the timestamp from end of measurement interval are in "new"
1493          * over-write old w/ new so we can print end of interval values
1494          */
1495
1496         timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1497         old->tv_begin = new->tv_begin;
1498         old->tv_end = new->tv_end;
1499
1500         old->tsc = new->tsc - old->tsc;
1501
1502         /* check for TSC < 1 Mcycles over interval */
1503         if (old->tsc < (1000 * 1000))
1504                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1505                      "You can disable all c-states by booting with \"idle=poll\"\n"
1506                      "or just the deep ones with \"processor.max_cstate=1\"");
1507
1508         old->c1 = new->c1 - old->c1;
1509
1510         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
1511             soft_c1_residency_display(BIC_Avg_MHz)) {
1512                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1513                         old->aperf = new->aperf - old->aperf;
1514                         old->mperf = new->mperf - old->mperf;
1515                 } else {
1516                         return -1;
1517                 }
1518         }
1519
1520
1521         if (use_c1_residency_msr) {
1522                 /*
1523                  * Some models have a dedicated C1 residency MSR,
1524                  * which should be more accurate than the derivation below.
1525                  */
1526         } else {
1527                 /*
1528                  * As counter collection is not atomic,
1529                  * it is possible for mperf's non-halted cycles + idle states
1530                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1531                  */
1532                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1533                         old->c1 = 0;
1534                 else {
1535                         /* normal case, derive c1 */
1536                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1537                                 - core_delta->c6 - core_delta->c7;
1538                 }
1539         }
1540
1541         if (old->mperf == 0) {
1542                 if (debug > 1)
1543                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1544                 old->mperf = 1; /* divide by 0 protection */
1545         }
1546
1547         if (DO_BIC(BIC_IPC))
1548                 old->instr_count = new->instr_count - old->instr_count;
1549
1550         if (DO_BIC(BIC_IRQ))
1551                 old->irq_count = new->irq_count - old->irq_count;
1552
1553         if (DO_BIC(BIC_SMI))
1554                 old->smi_count = new->smi_count - old->smi_count;
1555
1556         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1557                 if (mp->format == FORMAT_RAW)
1558                         old->counter[i] = new->counter[i];
1559                 else
1560                         old->counter[i] = new->counter[i] - old->counter[i];
1561         }
1562         return 0;
1563 }
1564
1565 int delta_cpu(struct thread_data *t, struct core_data *c,
1566         struct pkg_data *p, struct thread_data *t2,
1567         struct core_data *c2, struct pkg_data *p2)
1568 {
1569         int retval = 0;
1570
1571         /* calculate core delta only for 1st thread in core */
1572         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1573                 delta_core(c, c2);
1574
1575         /* always calculate thread delta */
1576         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1577         if (retval)
1578                 return retval;
1579
1580         /* calculate package delta only for 1st core in package */
1581         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1582                 retval = delta_package(p, p2);
1583
1584         return retval;
1585 }
1586
1587 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1588 {
1589         int i;
1590         struct msr_counter  *mp;
1591
1592         t->tv_begin.tv_sec = 0;
1593         t->tv_begin.tv_usec = 0;
1594         t->tv_end.tv_sec = 0;
1595         t->tv_end.tv_usec = 0;
1596         t->tv_delta.tv_sec = 0;
1597         t->tv_delta.tv_usec = 0;
1598
1599         t->tsc = 0;
1600         t->aperf = 0;
1601         t->mperf = 0;
1602         t->c1 = 0;
1603
1604         t->instr_count = 0;
1605
1606         t->irq_count = 0;
1607         t->smi_count = 0;
1608
1609         /* tells format_counters to dump all fields from this set */
1610         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1611
1612         c->c3 = 0;
1613         c->c6 = 0;
1614         c->c7 = 0;
1615         c->mc6_us = 0;
1616         c->core_temp_c = 0;
1617         c->core_energy = 0;
1618
1619         p->pkg_wtd_core_c0 = 0;
1620         p->pkg_any_core_c0 = 0;
1621         p->pkg_any_gfxe_c0 = 0;
1622         p->pkg_both_core_gfxe_c0 = 0;
1623
1624         p->pc2 = 0;
1625         if (DO_BIC(BIC_Pkgpc3))
1626                 p->pc3 = 0;
1627         if (DO_BIC(BIC_Pkgpc6))
1628                 p->pc6 = 0;
1629         if (DO_BIC(BIC_Pkgpc7))
1630                 p->pc7 = 0;
1631         p->pc8 = 0;
1632         p->pc9 = 0;
1633         p->pc10 = 0;
1634         p->cpu_lpi = 0;
1635         p->sys_lpi = 0;
1636
1637         p->energy_pkg = 0;
1638         p->energy_dram = 0;
1639         p->energy_cores = 0;
1640         p->energy_gfx = 0;
1641         p->rapl_pkg_perf_status = 0;
1642         p->rapl_dram_perf_status = 0;
1643         p->pkg_temp_c = 0;
1644
1645         p->gfx_rc6_ms = 0;
1646         p->gfx_mhz = 0;
1647         p->gfx_act_mhz = 0;
1648         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1649                 t->counter[i] = 0;
1650
1651         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1652                 c->counter[i] = 0;
1653
1654         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1655                 p->counter[i] = 0;
1656 }
1657 int sum_counters(struct thread_data *t, struct core_data *c,
1658         struct pkg_data *p)
1659 {
1660         int i;
1661         struct msr_counter *mp;
1662
1663         /* copy un-changing apic_id's */
1664         if (DO_BIC(BIC_APIC))
1665                 average.threads.apic_id = t->apic_id;
1666         if (DO_BIC(BIC_X2APIC))
1667                 average.threads.x2apic_id = t->x2apic_id;
1668
1669         /* remember first tv_begin */
1670         if (average.threads.tv_begin.tv_sec == 0)
1671                 average.threads.tv_begin = t->tv_begin;
1672
1673         /* remember last tv_end */
1674         average.threads.tv_end = t->tv_end;
1675
1676         average.threads.tsc += t->tsc;
1677         average.threads.aperf += t->aperf;
1678         average.threads.mperf += t->mperf;
1679         average.threads.c1 += t->c1;
1680
1681         average.threads.instr_count += t->instr_count;
1682
1683         average.threads.irq_count += t->irq_count;
1684         average.threads.smi_count += t->smi_count;
1685
1686         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1687                 if (mp->format == FORMAT_RAW)
1688                         continue;
1689                 average.threads.counter[i] += t->counter[i];
1690         }
1691
1692         /* sum per-core values only for 1st thread in core */
1693         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1694                 return 0;
1695
1696         average.cores.c3 += c->c3;
1697         average.cores.c6 += c->c6;
1698         average.cores.c7 += c->c7;
1699         average.cores.mc6_us += c->mc6_us;
1700
1701         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1702
1703         average.cores.core_energy += c->core_energy;
1704
1705         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1706                 if (mp->format == FORMAT_RAW)
1707                         continue;
1708                 average.cores.counter[i] += c->counter[i];
1709         }
1710
1711         /* sum per-pkg values only for 1st core in pkg */
1712         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1713                 return 0;
1714
1715         if (DO_BIC(BIC_Totl_c0))
1716                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1717         if (DO_BIC(BIC_Any_c0))
1718                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1719         if (DO_BIC(BIC_GFX_c0))
1720                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1721         if (DO_BIC(BIC_CPUGFX))
1722                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1723
1724         average.packages.pc2 += p->pc2;
1725         if (DO_BIC(BIC_Pkgpc3))
1726                 average.packages.pc3 += p->pc3;
1727         if (DO_BIC(BIC_Pkgpc6))
1728                 average.packages.pc6 += p->pc6;
1729         if (DO_BIC(BIC_Pkgpc7))
1730                 average.packages.pc7 += p->pc7;
1731         average.packages.pc8 += p->pc8;
1732         average.packages.pc9 += p->pc9;
1733         average.packages.pc10 += p->pc10;
1734
1735         average.packages.cpu_lpi = p->cpu_lpi;
1736         average.packages.sys_lpi = p->sys_lpi;
1737
1738         average.packages.energy_pkg += p->energy_pkg;
1739         average.packages.energy_dram += p->energy_dram;
1740         average.packages.energy_cores += p->energy_cores;
1741         average.packages.energy_gfx += p->energy_gfx;
1742
1743         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1744         average.packages.gfx_mhz = p->gfx_mhz;
1745         average.packages.gfx_act_mhz = p->gfx_act_mhz;
1746
1747         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1748
1749         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1750         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1751
1752         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1753                 if (mp->format == FORMAT_RAW)
1754                         continue;
1755                 average.packages.counter[i] += p->counter[i];
1756         }
1757         return 0;
1758 }
1759 /*
1760  * sum the counters for all cpus in the system
1761  * compute the weighted average
1762  */
1763 void compute_average(struct thread_data *t, struct core_data *c,
1764         struct pkg_data *p)
1765 {
1766         int i;
1767         struct msr_counter *mp;
1768
1769         clear_counters(&average.threads, &average.cores, &average.packages);
1770
1771         for_all_cpus(sum_counters, t, c, p);
1772
1773         /* Use the global time delta for the average. */
1774         average.threads.tv_delta = tv_delta;
1775
1776         average.threads.tsc /= topo.num_cpus;
1777         average.threads.aperf /= topo.num_cpus;
1778         average.threads.mperf /= topo.num_cpus;
1779         average.threads.instr_count /= topo.num_cpus;
1780         average.threads.c1 /= topo.num_cpus;
1781
1782         if (average.threads.irq_count > 9999999)
1783                 sums_need_wide_columns = 1;
1784
1785         average.cores.c3 /= topo.num_cores;
1786         average.cores.c6 /= topo.num_cores;
1787         average.cores.c7 /= topo.num_cores;
1788         average.cores.mc6_us /= topo.num_cores;
1789
1790         if (DO_BIC(BIC_Totl_c0))
1791                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1792         if (DO_BIC(BIC_Any_c0))
1793                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1794         if (DO_BIC(BIC_GFX_c0))
1795                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1796         if (DO_BIC(BIC_CPUGFX))
1797                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1798
1799         average.packages.pc2 /= topo.num_packages;
1800         if (DO_BIC(BIC_Pkgpc3))
1801                 average.packages.pc3 /= topo.num_packages;
1802         if (DO_BIC(BIC_Pkgpc6))
1803                 average.packages.pc6 /= topo.num_packages;
1804         if (DO_BIC(BIC_Pkgpc7))
1805                 average.packages.pc7 /= topo.num_packages;
1806
1807         average.packages.pc8 /= topo.num_packages;
1808         average.packages.pc9 /= topo.num_packages;
1809         average.packages.pc10 /= topo.num_packages;
1810
1811         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1812                 if (mp->format == FORMAT_RAW)
1813                         continue;
1814                 if (mp->type == COUNTER_ITEMS) {
1815                         if (average.threads.counter[i] > 9999999)
1816                                 sums_need_wide_columns = 1;
1817                         continue;
1818                 }
1819                 average.threads.counter[i] /= topo.num_cpus;
1820         }
1821         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1822                 if (mp->format == FORMAT_RAW)
1823                         continue;
1824                 if (mp->type == COUNTER_ITEMS) {
1825                         if (average.cores.counter[i] > 9999999)
1826                                 sums_need_wide_columns = 1;
1827                 }
1828                 average.cores.counter[i] /= topo.num_cores;
1829         }
1830         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1831                 if (mp->format == FORMAT_RAW)
1832                         continue;
1833                 if (mp->type == COUNTER_ITEMS) {
1834                         if (average.packages.counter[i] > 9999999)
1835                                 sums_need_wide_columns = 1;
1836                 }
1837                 average.packages.counter[i] /= topo.num_packages;
1838         }
1839 }
1840
1841 static unsigned long long rdtsc(void)
1842 {
1843         unsigned int low, high;
1844
1845         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1846
1847         return low | ((unsigned long long)high) << 32;
1848 }
1849
1850 /*
1851  * Open a file, and exit on failure
1852  */
1853 FILE *fopen_or_die(const char *path, const char *mode)
1854 {
1855         FILE *filep = fopen(path, mode);
1856
1857         if (!filep)
1858                 err(1, "%s: open failed", path);
1859         return filep;
1860 }
1861 /*
1862  * snapshot_sysfs_counter()
1863  *
1864  * return snapshot of given counter
1865  */
1866 unsigned long long snapshot_sysfs_counter(char *path)
1867 {
1868         FILE *fp;
1869         int retval;
1870         unsigned long long counter;
1871
1872         fp = fopen_or_die(path, "r");
1873
1874         retval = fscanf(fp, "%lld", &counter);
1875         if (retval != 1)
1876                 err(1, "snapshot_sysfs_counter(%s)", path);
1877
1878         fclose(fp);
1879
1880         return counter;
1881 }
1882
1883 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1884 {
1885         if (mp->msr_num != 0) {
1886                 if (get_msr(cpu, mp->msr_num, counterp))
1887                         return -1;
1888         } else {
1889                 char path[128 + PATH_BYTES];
1890
1891                 if (mp->flags & SYSFS_PERCPU) {
1892                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1893                                  cpu, mp->path);
1894
1895                         *counterp = snapshot_sysfs_counter(path);
1896                 } else {
1897                         *counterp = snapshot_sysfs_counter(mp->path);
1898                 }
1899         }
1900
1901         return 0;
1902 }
1903
1904 int get_epb(int cpu)
1905 {
1906         char path[128 + PATH_BYTES];
1907         unsigned long long msr;
1908         int ret, epb = -1;
1909         FILE *fp;
1910
1911         sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
1912
1913         fp = fopen(path, "r");
1914         if (!fp)
1915                 goto msr_fallback;
1916
1917         ret = fscanf(fp, "%d", &epb);
1918         if (ret != 1)
1919                 err(1, "%s(%s)", __func__, path);
1920
1921         fclose(fp);
1922
1923         return epb;
1924
1925 msr_fallback:
1926         get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
1927
1928         return msr & 0xf;
1929 }
1930
1931 void get_apic_id(struct thread_data *t)
1932 {
1933         unsigned int eax, ebx, ecx, edx;
1934
1935         if (DO_BIC(BIC_APIC)) {
1936                 eax = ebx = ecx = edx = 0;
1937                 __cpuid(1, eax, ebx, ecx, edx);
1938
1939                 t->apic_id = (ebx >> 24) & 0xff;
1940         }
1941
1942         if (!DO_BIC(BIC_X2APIC))
1943                 return;
1944
1945         if (authentic_amd || hygon_genuine) {
1946                 unsigned int topology_extensions;
1947
1948                 if (max_extended_level < 0x8000001e)
1949                         return;
1950
1951                 eax = ebx = ecx = edx = 0;
1952                 __cpuid(0x80000001, eax, ebx, ecx, edx);
1953                         topology_extensions = ecx & (1 << 22);
1954
1955                 if (topology_extensions == 0)
1956                         return;
1957
1958                 eax = ebx = ecx = edx = 0;
1959                 __cpuid(0x8000001e, eax, ebx, ecx, edx);
1960
1961                 t->x2apic_id = eax;
1962                 return;
1963         }
1964
1965         if (!genuine_intel)
1966                 return;
1967
1968         if (max_level < 0xb)
1969                 return;
1970
1971         ecx = 0;
1972         __cpuid(0xb, eax, ebx, ecx, edx);
1973         t->x2apic_id = edx;
1974
1975         if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1976                 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
1977                                 t->cpu_id, t->apic_id, t->x2apic_id);
1978 }
1979
1980 /*
1981  * get_counters(...)
1982  * migrate to cpu
1983  * acquire and record local counters for that cpu
1984  */
1985 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1986 {
1987         int cpu = t->cpu_id;
1988         unsigned long long msr;
1989         int aperf_mperf_retry_count = 0;
1990         struct msr_counter *mp;
1991         int i;
1992
1993         if (cpu_migrate(cpu)) {
1994                 fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
1995                 return -1;
1996         }
1997
1998         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1999
2000         if (first_counter_read)
2001                 get_apic_id(t);
2002 retry:
2003         t->tsc = rdtsc();       /* we are running on local CPU of interest */
2004
2005         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
2006             soft_c1_residency_display(BIC_Avg_MHz)) {
2007                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
2008
2009                 /*
2010                  * The TSC, APERF and MPERF must be read together for
2011                  * APERF/MPERF and MPERF/TSC to give accurate results.
2012                  *
2013                  * Unfortunately, APERF and MPERF are read by
2014                  * individual system call, so delays may occur
2015                  * between them.  If the time to read them
2016                  * varies by a large amount, we re-read them.
2017                  */
2018
2019                 /*
2020                  * This initial dummy APERF read has been seen to
2021                  * reduce jitter in the subsequent reads.
2022                  */
2023
2024                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2025                         return -3;
2026
2027                 t->tsc = rdtsc();       /* re-read close to APERF */
2028
2029                 tsc_before = t->tsc;
2030
2031                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2032                         return -3;
2033
2034                 tsc_between = rdtsc();
2035
2036                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
2037                         return -4;
2038
2039                 tsc_after = rdtsc();
2040
2041                 aperf_time = tsc_between - tsc_before;
2042                 mperf_time = tsc_after - tsc_between;
2043
2044                 /*
2045                  * If the system call latency to read APERF and MPERF
2046                  * differ by more than 2x, then try again.
2047                  */
2048                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
2049                         aperf_mperf_retry_count++;
2050                         if (aperf_mperf_retry_count < 5)
2051                                 goto retry;
2052                         else
2053                                 warnx("cpu%d jitter %lld %lld",
2054                                         cpu, aperf_time, mperf_time);
2055                 }
2056                 aperf_mperf_retry_count = 0;
2057
2058                 t->aperf = t->aperf * aperf_mperf_multiplier;
2059                 t->mperf = t->mperf * aperf_mperf_multiplier;
2060         }
2061
2062         if (DO_BIC(BIC_IPC))
2063                 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
2064                         return -4;
2065
2066         if (DO_BIC(BIC_IRQ))
2067                 t->irq_count = irqs_per_cpu[cpu];
2068         if (DO_BIC(BIC_SMI)) {
2069                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
2070                         return -5;
2071                 t->smi_count = msr & 0xFFFFFFFF;
2072         }
2073         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
2074                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
2075                         return -6;
2076         }
2077
2078         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2079                 if (get_mp(cpu, mp, &t->counter[i]))
2080                         return -10;
2081         }
2082
2083         /* collect core counters only for 1st thread in core */
2084         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2085                 goto done;
2086
2087         if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
2088                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
2089                         return -6;
2090         }
2091
2092         if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
2093                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
2094                         return -7;
2095         } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
2096                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
2097                         return -7;
2098         }
2099
2100         if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
2101                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
2102                         return -8;
2103                 else if (t->is_atom) {
2104                         /*
2105                          * For Atom CPUs that has core cstate deeper than c6,
2106                          * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
2107                          * Minus CC7 (and deeper cstates) residency to get
2108                          * accturate cc6 residency.
2109                          */
2110                         c->c6 -= c->c7;
2111                 }
2112         }
2113
2114         if (DO_BIC(BIC_Mod_c6))
2115                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
2116                         return -8;
2117
2118         if (DO_BIC(BIC_CoreTmp)) {
2119                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2120                         return -9;
2121                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
2122         }
2123
2124         if (do_rapl & RAPL_AMD_F17H) {
2125                 if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
2126                         return -14;
2127                 c->core_energy = msr & 0xFFFFFFFF;
2128         }
2129
2130         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2131                 if (get_mp(cpu, mp, &c->counter[i]))
2132                         return -10;
2133         }
2134
2135         /* collect package counters only for 1st core in package */
2136         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2137                 goto done;
2138
2139         if (DO_BIC(BIC_Totl_c0)) {
2140                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
2141                         return -10;
2142         }
2143         if (DO_BIC(BIC_Any_c0)) {
2144                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
2145                         return -11;
2146         }
2147         if (DO_BIC(BIC_GFX_c0)) {
2148                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
2149                         return -12;
2150         }
2151         if (DO_BIC(BIC_CPUGFX)) {
2152                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
2153                         return -13;
2154         }
2155         if (DO_BIC(BIC_Pkgpc3))
2156                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
2157                         return -9;
2158         if (DO_BIC(BIC_Pkgpc6)) {
2159                 if (do_slm_cstates) {
2160                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
2161                                 return -10;
2162                 } else {
2163                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
2164                                 return -10;
2165                 }
2166         }
2167
2168         if (DO_BIC(BIC_Pkgpc2))
2169                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
2170                         return -11;
2171         if (DO_BIC(BIC_Pkgpc7))
2172                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
2173                         return -12;
2174         if (DO_BIC(BIC_Pkgpc8))
2175                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
2176                         return -13;
2177         if (DO_BIC(BIC_Pkgpc9))
2178                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
2179                         return -13;
2180         if (DO_BIC(BIC_Pkgpc10))
2181                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
2182                         return -13;
2183
2184         if (DO_BIC(BIC_CPU_LPI))
2185                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
2186         if (DO_BIC(BIC_SYS_LPI))
2187                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
2188
2189         if (do_rapl & RAPL_PKG) {
2190                 if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
2191                         return -13;
2192                 p->energy_pkg = msr;
2193         }
2194         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
2195                 if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
2196                         return -14;
2197                 p->energy_cores = msr;
2198         }
2199         if (do_rapl & RAPL_DRAM) {
2200                 if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
2201                         return -15;
2202                 p->energy_dram = msr;
2203         }
2204         if (do_rapl & RAPL_GFX) {
2205                 if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
2206                         return -16;
2207                 p->energy_gfx = msr;
2208         }
2209         if (do_rapl & RAPL_PKG_PERF_STATUS) {
2210                 if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
2211                         return -16;
2212                 p->rapl_pkg_perf_status = msr;
2213         }
2214         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
2215                 if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
2216                         return -16;
2217                 p->rapl_dram_perf_status = msr;
2218         }
2219         if (do_rapl & RAPL_AMD_F17H) {
2220                 if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
2221                         return -13;
2222                 p->energy_pkg = msr;
2223         }
2224         if (DO_BIC(BIC_PkgTmp)) {
2225                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2226                         return -17;
2227                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
2228         }
2229
2230         if (DO_BIC(BIC_GFX_rc6))
2231                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
2232
2233         if (DO_BIC(BIC_GFXMHz))
2234                 p->gfx_mhz = gfx_cur_mhz;
2235
2236         if (DO_BIC(BIC_GFXACTMHz))
2237                 p->gfx_act_mhz = gfx_act_mhz;
2238
2239         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2240                 if (get_mp(cpu, mp, &p->counter[i]))
2241                         return -10;
2242         }
2243 done:
2244         gettimeofday(&t->tv_end, (struct timezone *)NULL);
2245
2246         return 0;
2247 }
2248
2249 /*
2250  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2251  * If you change the values, note they are used both in comparisons
2252  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2253  */
2254
2255 #define PCLUKN 0 /* Unknown */
2256 #define PCLRSV 1 /* Reserved */
2257 #define PCL__0 2 /* PC0 */
2258 #define PCL__1 3 /* PC1 */
2259 #define PCL__2 4 /* PC2 */
2260 #define PCL__3 5 /* PC3 */
2261 #define PCL__4 6 /* PC4 */
2262 #define PCL__6 7 /* PC6 */
2263 #define PCL_6N 8 /* PC6 No Retention */
2264 #define PCL_6R 9 /* PC6 Retention */
2265 #define PCL__7 10 /* PC7 */
2266 #define PCL_7S 11 /* PC7 Shrink */
2267 #define PCL__8 12 /* PC8 */
2268 #define PCL__9 13 /* PC9 */
2269 #define PCL_10 14 /* PC10 */
2270 #define PCLUNL 15 /* Unlimited */
2271
2272 int pkg_cstate_limit = PCLUKN;
2273 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2274         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
2275
2276 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2277 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2278 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2279 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
2280 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2281 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2282 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2283 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2284 int icx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2285
2286 static void
2287 calculate_tsc_tweak()
2288 {
2289         tsc_tweak = base_hz / tsc_hz;
2290 }
2291
2292 void prewake_cstate_probe(unsigned int family, unsigned int model);
2293
2294 static void
2295 dump_nhm_platform_info(void)
2296 {
2297         unsigned long long msr;
2298         unsigned int ratio;
2299
2300         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2301
2302         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2303
2304         ratio = (msr >> 40) & 0xFF;
2305         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
2306                 ratio, bclk, ratio * bclk);
2307
2308         ratio = (msr >> 8) & 0xFF;
2309         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2310                 ratio, bclk, ratio * bclk);
2311
2312         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2313         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2314                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2315
2316         /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
2317         if (dis_cstate_prewake)
2318                 fprintf(outf, "C-state Pre-wake: %sabled\n",
2319                         msr & 0x40000000 ? "DIS" : "EN");
2320
2321         return;
2322 }
2323
2324 static void
2325 dump_hsw_turbo_ratio_limits(void)
2326 {
2327         unsigned long long msr;
2328         unsigned int ratio;
2329
2330         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2331
2332         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2333
2334         ratio = (msr >> 8) & 0xFF;
2335         if (ratio)
2336                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2337                         ratio, bclk, ratio * bclk);
2338
2339         ratio = (msr >> 0) & 0xFF;
2340         if (ratio)
2341                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2342                         ratio, bclk, ratio * bclk);
2343         return;
2344 }
2345
2346 static void
2347 dump_ivt_turbo_ratio_limits(void)
2348 {
2349         unsigned long long msr;
2350         unsigned int ratio;
2351
2352         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2353
2354         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2355
2356         ratio = (msr >> 56) & 0xFF;
2357         if (ratio)
2358                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2359                         ratio, bclk, ratio * bclk);
2360
2361         ratio = (msr >> 48) & 0xFF;
2362         if (ratio)
2363                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2364                         ratio, bclk, ratio * bclk);
2365
2366         ratio = (msr >> 40) & 0xFF;
2367         if (ratio)
2368                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2369                         ratio, bclk, ratio * bclk);
2370
2371         ratio = (msr >> 32) & 0xFF;
2372         if (ratio)
2373                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2374                         ratio, bclk, ratio * bclk);
2375
2376         ratio = (msr >> 24) & 0xFF;
2377         if (ratio)
2378                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2379                         ratio, bclk, ratio * bclk);
2380
2381         ratio = (msr >> 16) & 0xFF;
2382         if (ratio)
2383                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2384                         ratio, bclk, ratio * bclk);
2385
2386         ratio = (msr >> 8) & 0xFF;
2387         if (ratio)
2388                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2389                         ratio, bclk, ratio * bclk);
2390
2391         ratio = (msr >> 0) & 0xFF;
2392         if (ratio)
2393                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2394                         ratio, bclk, ratio * bclk);
2395         return;
2396 }
2397 int has_turbo_ratio_group_limits(int family, int model)
2398 {
2399
2400         if (!genuine_intel)
2401                 return 0;
2402
2403         switch (model) {
2404         case INTEL_FAM6_ATOM_GOLDMONT:
2405         case INTEL_FAM6_SKYLAKE_X:
2406         case INTEL_FAM6_ICELAKE_X:
2407         case INTEL_FAM6_ATOM_GOLDMONT_D:
2408         case INTEL_FAM6_ATOM_TREMONT_D:
2409                 return 1;
2410         }
2411         return 0;
2412 }
2413
2414 static void
2415 dump_turbo_ratio_limits(int family, int model)
2416 {
2417         unsigned long long msr, core_counts;
2418         unsigned int ratio, group_size;
2419
2420         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2421         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2422
2423         if (has_turbo_ratio_group_limits(family, model)) {
2424                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2425                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2426         } else {
2427                 core_counts = 0x0807060504030201;
2428         }
2429
2430         ratio = (msr >> 56) & 0xFF;
2431         group_size = (core_counts >> 56) & 0xFF;
2432         if (ratio)
2433                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2434                         ratio, bclk, ratio * bclk, group_size);
2435
2436         ratio = (msr >> 48) & 0xFF;
2437         group_size = (core_counts >> 48) & 0xFF;
2438         if (ratio)
2439                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2440                         ratio, bclk, ratio * bclk, group_size);
2441
2442         ratio = (msr >> 40) & 0xFF;
2443         group_size = (core_counts >> 40) & 0xFF;
2444         if (ratio)
2445                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2446                         ratio, bclk, ratio * bclk, group_size);
2447
2448         ratio = (msr >> 32) & 0xFF;
2449         group_size = (core_counts >> 32) & 0xFF;
2450         if (ratio)
2451                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2452                         ratio, bclk, ratio * bclk, group_size);
2453
2454         ratio = (msr >> 24) & 0xFF;
2455         group_size = (core_counts >> 24) & 0xFF;
2456         if (ratio)
2457                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2458                         ratio, bclk, ratio * bclk, group_size);
2459
2460         ratio = (msr >> 16) & 0xFF;
2461         group_size = (core_counts >> 16) & 0xFF;
2462         if (ratio)
2463                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2464                         ratio, bclk, ratio * bclk, group_size);
2465
2466         ratio = (msr >> 8) & 0xFF;
2467         group_size = (core_counts >> 8) & 0xFF;
2468         if (ratio)
2469                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2470                         ratio, bclk, ratio * bclk, group_size);
2471
2472         ratio = (msr >> 0) & 0xFF;
2473         group_size = (core_counts >> 0) & 0xFF;
2474         if (ratio)
2475                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2476                         ratio, bclk, ratio * bclk, group_size);
2477         return;
2478 }
2479
2480 static void
2481 dump_atom_turbo_ratio_limits(void)
2482 {
2483         unsigned long long msr;
2484         unsigned int ratio;
2485
2486         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2487         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2488
2489         ratio = (msr >> 0) & 0x3F;
2490         if (ratio)
2491                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2492                         ratio, bclk, ratio * bclk);
2493
2494         ratio = (msr >> 8) & 0x3F;
2495         if (ratio)
2496                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2497                         ratio, bclk, ratio * bclk);
2498
2499         ratio = (msr >> 16) & 0x3F;
2500         if (ratio)
2501                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2502                         ratio, bclk, ratio * bclk);
2503
2504         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2505         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2506
2507         ratio = (msr >> 24) & 0x3F;
2508         if (ratio)
2509                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2510                         ratio, bclk, ratio * bclk);
2511
2512         ratio = (msr >> 16) & 0x3F;
2513         if (ratio)
2514                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2515                         ratio, bclk, ratio * bclk);
2516
2517         ratio = (msr >> 8) & 0x3F;
2518         if (ratio)
2519                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2520                         ratio, bclk, ratio * bclk);
2521
2522         ratio = (msr >> 0) & 0x3F;
2523         if (ratio)
2524                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2525                         ratio, bclk, ratio * bclk);
2526 }
2527
2528 static void
2529 dump_knl_turbo_ratio_limits(void)
2530 {
2531         const unsigned int buckets_no = 7;
2532
2533         unsigned long long msr;
2534         int delta_cores, delta_ratio;
2535         int i, b_nr;
2536         unsigned int cores[buckets_no];
2537         unsigned int ratio[buckets_no];
2538
2539         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2540
2541         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2542                 base_cpu, msr);
2543
2544         /*
2545          * Turbo encoding in KNL is as follows:
2546          * [0] -- Reserved
2547          * [7:1] -- Base value of number of active cores of bucket 1.
2548          * [15:8] -- Base value of freq ratio of bucket 1.
2549          * [20:16] -- +ve delta of number of active cores of bucket 2.
2550          * i.e. active cores of bucket 2 =
2551          * active cores of bucket 1 + delta
2552          * [23:21] -- Negative delta of freq ratio of bucket 2.
2553          * i.e. freq ratio of bucket 2 =
2554          * freq ratio of bucket 1 - delta
2555          * [28:24]-- +ve delta of number of active cores of bucket 3.
2556          * [31:29]-- -ve delta of freq ratio of bucket 3.
2557          * [36:32]-- +ve delta of number of active cores of bucket 4.
2558          * [39:37]-- -ve delta of freq ratio of bucket 4.
2559          * [44:40]-- +ve delta of number of active cores of bucket 5.
2560          * [47:45]-- -ve delta of freq ratio of bucket 5.
2561          * [52:48]-- +ve delta of number of active cores of bucket 6.
2562          * [55:53]-- -ve delta of freq ratio of bucket 6.
2563          * [60:56]-- +ve delta of number of active cores of bucket 7.
2564          * [63:61]-- -ve delta of freq ratio of bucket 7.
2565          */
2566
2567         b_nr = 0;
2568         cores[b_nr] = (msr & 0xFF) >> 1;
2569         ratio[b_nr] = (msr >> 8) & 0xFF;
2570
2571         for (i = 16; i < 64; i += 8) {
2572                 delta_cores = (msr >> i) & 0x1F;
2573                 delta_ratio = (msr >> (i + 5)) & 0x7;
2574
2575                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2576                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2577                 b_nr++;
2578         }
2579
2580         for (i = buckets_no - 1; i >= 0; i--)
2581                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2582                         fprintf(outf,
2583                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2584                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2585 }
2586
2587 static void
2588 dump_nhm_cst_cfg(void)
2589 {
2590         unsigned long long msr;
2591
2592         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2593
2594         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2595
2596         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2597                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2598                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2599                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2600                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2601                 (msr & (1 << 15)) ? "" : "UN",
2602                 (unsigned int)msr & 0xF,
2603                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2604
2605 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2606         if (has_automatic_cstate_conversion) {
2607                 fprintf(outf, ", automatic c-state conversion=%s",
2608                         (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2609         }
2610
2611         fprintf(outf, ")\n");
2612
2613         return;
2614 }
2615
2616 static void
2617 dump_config_tdp(void)
2618 {
2619         unsigned long long msr;
2620
2621         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2622         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2623         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2624
2625         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2626         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2627         if (msr) {
2628                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2629                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2630                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2631                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2632         }
2633         fprintf(outf, ")\n");
2634
2635         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2636         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2637         if (msr) {
2638                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2639                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2640                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2641                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2642         }
2643         fprintf(outf, ")\n");
2644
2645         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2646         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2647         if ((msr) & 0x3)
2648                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2649         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2650         fprintf(outf, ")\n");
2651
2652         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2653         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2654         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2655         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2656         fprintf(outf, ")\n");
2657 }
2658
2659 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2660
2661 void print_irtl(void)
2662 {
2663         unsigned long long msr;
2664
2665         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2666         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2667         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2668                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2669
2670         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2671         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2672         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2673                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2674
2675         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2676         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2677         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2678                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2679
2680         if (!do_irtl_hsw)
2681                 return;
2682
2683         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2684         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2685         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2686                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2687
2688         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2689         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2690         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2691                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2692
2693         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2694         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2695         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2696                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2697
2698 }
2699 void free_fd_percpu(void)
2700 {
2701         int i;
2702
2703         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2704                 if (fd_percpu[i] != 0)
2705                         close(fd_percpu[i]);
2706         }
2707
2708         free(fd_percpu);
2709 }
2710
2711 void free_all_buffers(void)
2712 {
2713         int i;
2714
2715         CPU_FREE(cpu_present_set);
2716         cpu_present_set = NULL;
2717         cpu_present_setsize = 0;
2718
2719         CPU_FREE(cpu_affinity_set);
2720         cpu_affinity_set = NULL;
2721         cpu_affinity_setsize = 0;
2722
2723         free(thread_even);
2724         free(core_even);
2725         free(package_even);
2726
2727         thread_even = NULL;
2728         core_even = NULL;
2729         package_even = NULL;
2730
2731         free(thread_odd);
2732         free(core_odd);
2733         free(package_odd);
2734
2735         thread_odd = NULL;
2736         core_odd = NULL;
2737         package_odd = NULL;
2738
2739         free(output_buffer);
2740         output_buffer = NULL;
2741         outp = NULL;
2742
2743         free_fd_percpu();
2744
2745         free(irq_column_2_cpu);
2746         free(irqs_per_cpu);
2747
2748         for (i = 0; i <= topo.max_cpu_num; ++i) {
2749                 if (cpus[i].put_ids)
2750                         CPU_FREE(cpus[i].put_ids);
2751         }
2752         free(cpus);
2753 }
2754
2755
2756 /*
2757  * Parse a file containing a single int.
2758  * Return 0 if file can not be opened
2759  * Exit if file can be opened, but can not be parsed
2760  */
2761 int parse_int_file(const char *fmt, ...)
2762 {
2763         va_list args;
2764         char path[PATH_MAX];
2765         FILE *filep;
2766         int value;
2767
2768         va_start(args, fmt);
2769         vsnprintf(path, sizeof(path), fmt, args);
2770         va_end(args);
2771         filep = fopen(path, "r");
2772         if (!filep)
2773                 return 0;
2774         if (fscanf(filep, "%d", &value) != 1)
2775                 err(1, "%s: failed to parse number from file", path);
2776         fclose(filep);
2777         return value;
2778 }
2779
2780 /*
2781  * cpu_is_first_core_in_package(cpu)
2782  * return 1 if given CPU is 1st core in package
2783  */
2784 int cpu_is_first_core_in_package(int cpu)
2785 {
2786         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2787 }
2788
2789 int get_physical_package_id(int cpu)
2790 {
2791         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2792 }
2793
2794 int get_die_id(int cpu)
2795 {
2796         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2797 }
2798
2799 int get_core_id(int cpu)
2800 {
2801         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2802 }
2803
2804 void set_node_data(void)
2805 {
2806         int pkg, node, lnode, cpu, cpux;
2807         int cpu_count;
2808
2809         /* initialize logical_node_id */
2810         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2811                 cpus[cpu].logical_node_id = -1;
2812
2813         cpu_count = 0;
2814         for (pkg = 0; pkg < topo.num_packages; pkg++) {
2815                 lnode = 0;
2816                 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2817                         if (cpus[cpu].physical_package_id != pkg)
2818                                 continue;
2819                         /* find a cpu with an unset logical_node_id */
2820                         if (cpus[cpu].logical_node_id != -1)
2821                                 continue;
2822                         cpus[cpu].logical_node_id = lnode;
2823                         node = cpus[cpu].physical_node_id;
2824                         cpu_count++;
2825                         /*
2826                          * find all matching cpus on this pkg and set
2827                          * the logical_node_id
2828                          */
2829                         for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2830                                 if ((cpus[cpux].physical_package_id == pkg) &&
2831                                    (cpus[cpux].physical_node_id == node)) {
2832                                         cpus[cpux].logical_node_id = lnode;
2833                                         cpu_count++;
2834                                 }
2835                         }
2836                         lnode++;
2837                         if (lnode > topo.nodes_per_pkg)
2838                                 topo.nodes_per_pkg = lnode;
2839                 }
2840                 if (cpu_count >= topo.max_cpu_num)
2841                         break;
2842         }
2843 }
2844
2845 int get_physical_node_id(struct cpu_topology *thiscpu)
2846 {
2847         char path[80];
2848         FILE *filep;
2849         int i;
2850         int cpu = thiscpu->logical_cpu_id;
2851
2852         for (i = 0; i <= topo.max_cpu_num; i++) {
2853                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2854                         cpu, i);
2855                 filep = fopen(path, "r");
2856                 if (!filep)
2857                         continue;
2858                 fclose(filep);
2859                 return i;
2860         }
2861         return -1;
2862 }
2863
2864 int get_thread_siblings(struct cpu_topology *thiscpu)
2865 {
2866         char path[80], character;
2867         FILE *filep;
2868         unsigned long map;
2869         int so, shift, sib_core;
2870         int cpu = thiscpu->logical_cpu_id;
2871         int offset = topo.max_cpu_num + 1;
2872         size_t size;
2873         int thread_id = 0;
2874
2875         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2876         if (thiscpu->thread_id < 0)
2877                 thiscpu->thread_id = thread_id++;
2878         if (!thiscpu->put_ids)
2879                 return -1;
2880
2881         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2882         CPU_ZERO_S(size, thiscpu->put_ids);
2883
2884         sprintf(path,
2885                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2886         filep = fopen(path, "r");
2887
2888         if (!filep) {
2889                 warnx("%s: open failed", path);
2890                 return -1;
2891         }
2892         do {
2893                 offset -= BITMASK_SIZE;
2894                 if (fscanf(filep, "%lx%c", &map, &character) != 2)
2895                         err(1, "%s: failed to parse file", path);
2896                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2897                         if ((map >> shift) & 0x1) {
2898                                 so = shift + offset;
2899                                 sib_core = get_core_id(so);
2900                                 if (sib_core == thiscpu->physical_core_id) {
2901                                         CPU_SET_S(so, size, thiscpu->put_ids);
2902                                         if ((so != cpu) &&
2903                                             (cpus[so].thread_id < 0))
2904                                                 cpus[so].thread_id =
2905                                                                     thread_id++;
2906                                 }
2907                         }
2908                 }
2909         } while (!strncmp(&character, ",", 1));
2910         fclose(filep);
2911
2912         return CPU_COUNT_S(size, thiscpu->put_ids);
2913 }
2914
2915 /*
2916  * run func(thread, core, package) in topology order
2917  * skip non-present cpus
2918  */
2919
2920 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2921         struct pkg_data *, struct thread_data *, struct core_data *,
2922         struct pkg_data *), struct thread_data *thread_base,
2923         struct core_data *core_base, struct pkg_data *pkg_base,
2924         struct thread_data *thread_base2, struct core_data *core_base2,
2925         struct pkg_data *pkg_base2)
2926 {
2927         int retval, pkg_no, node_no, core_no, thread_no;
2928
2929         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2930                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2931                         for (core_no = 0; core_no < topo.cores_per_node;
2932                              ++core_no) {
2933                                 for (thread_no = 0; thread_no <
2934                                         topo.threads_per_core; ++thread_no) {
2935                                         struct thread_data *t, *t2;
2936                                         struct core_data *c, *c2;
2937                                         struct pkg_data *p, *p2;
2938
2939                                         t = GET_THREAD(thread_base, thread_no,
2940                                                        core_no, node_no,
2941                                                        pkg_no);
2942
2943                                         if (cpu_is_not_present(t->cpu_id))
2944                                                 continue;
2945
2946                                         t2 = GET_THREAD(thread_base2, thread_no,
2947                                                         core_no, node_no,
2948                                                         pkg_no);
2949
2950                                         c = GET_CORE(core_base, core_no,
2951                                                      node_no, pkg_no);
2952                                         c2 = GET_CORE(core_base2, core_no,
2953                                                       node_no,
2954                                                       pkg_no);
2955
2956                                         p = GET_PKG(pkg_base, pkg_no);
2957                                         p2 = GET_PKG(pkg_base2, pkg_no);
2958
2959                                         retval = func(t, c, p, t2, c2, p2);
2960                                         if (retval)
2961                                                 return retval;
2962                                 }
2963                         }
2964                 }
2965         }
2966         return 0;
2967 }
2968
2969 /*
2970  * run func(cpu) on every cpu in /proc/stat
2971  * return max_cpu number
2972  */
2973 int for_all_proc_cpus(int (func)(int))
2974 {
2975         FILE *fp;
2976         int cpu_num;
2977         int retval;
2978
2979         fp = fopen_or_die(proc_stat, "r");
2980
2981         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2982         if (retval != 0)
2983                 err(1, "%s: failed to parse format", proc_stat);
2984
2985         while (1) {
2986                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2987                 if (retval != 1)
2988                         break;
2989
2990                 retval = func(cpu_num);
2991                 if (retval) {
2992                         fclose(fp);
2993                         return(retval);
2994                 }
2995         }
2996         fclose(fp);
2997         return 0;
2998 }
2999
3000 void re_initialize(void)
3001 {
3002         free_all_buffers();
3003         setup_all_buffers();
3004         fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
3005 }
3006
3007 void set_max_cpu_num(void)
3008 {
3009         FILE *filep;
3010         int base_cpu;
3011         unsigned long dummy;
3012         char pathname[64];
3013
3014         base_cpu = sched_getcpu();
3015         if (base_cpu < 0)
3016                 err(1, "cannot find calling cpu ID");
3017         sprintf(pathname,
3018                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings",
3019                 base_cpu);
3020
3021         filep = fopen_or_die(pathname, "r");
3022         topo.max_cpu_num = 0;
3023         while (fscanf(filep, "%lx,", &dummy) == 1)
3024                 topo.max_cpu_num += BITMASK_SIZE;
3025         fclose(filep);
3026         topo.max_cpu_num--; /* 0 based */
3027 }
3028
3029 /*
3030  * count_cpus()
3031  * remember the last one seen, it will be the max
3032  */
3033 int count_cpus(int cpu)
3034 {
3035         topo.num_cpus++;
3036         return 0;
3037 }
3038 int mark_cpu_present(int cpu)
3039 {
3040         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
3041         return 0;
3042 }
3043
3044 int init_thread_id(int cpu)
3045 {
3046         cpus[cpu].thread_id = -1;
3047         return 0;
3048 }
3049
3050 /*
3051  * snapshot_proc_interrupts()
3052  *
3053  * read and record summary of /proc/interrupts
3054  *
3055  * return 1 if config change requires a restart, else return 0
3056  */
3057 int snapshot_proc_interrupts(void)
3058 {
3059         static FILE *fp;
3060         int column, retval;
3061
3062         if (fp == NULL)
3063                 fp = fopen_or_die("/proc/interrupts", "r");
3064         else
3065                 rewind(fp);
3066
3067         /* read 1st line of /proc/interrupts to get cpu* name for each column */
3068         for (column = 0; column < topo.num_cpus; ++column) {
3069                 int cpu_number;
3070
3071                 retval = fscanf(fp, " CPU%d", &cpu_number);
3072                 if (retval != 1)
3073                         break;
3074
3075                 if (cpu_number > topo.max_cpu_num) {
3076                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
3077                         return 1;
3078                 }
3079
3080                 irq_column_2_cpu[column] = cpu_number;
3081                 irqs_per_cpu[cpu_number] = 0;
3082         }
3083
3084         /* read /proc/interrupt count lines and sum up irqs per cpu */
3085         while (1) {
3086                 int column;
3087                 char buf[64];
3088
3089                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
3090                 if (retval != 1)
3091                         break;
3092
3093                 /* read the count per cpu */
3094                 for (column = 0; column < topo.num_cpus; ++column) {
3095
3096                         int cpu_number, irq_count;
3097
3098                         retval = fscanf(fp, " %d", &irq_count);
3099                         if (retval != 1)
3100                                 break;
3101
3102                         cpu_number = irq_column_2_cpu[column];
3103                         irqs_per_cpu[cpu_number] += irq_count;
3104
3105                 }
3106
3107                 while (getc(fp) != '\n')
3108                         ;       /* flush interrupt description */
3109
3110         }
3111         return 0;
3112 }
3113 /*
3114  * snapshot_gfx_rc6_ms()
3115  *
3116  * record snapshot of
3117  * /sys/class/drm/card0/power/rc6_residency_ms
3118  *
3119  * return 1 if config change requires a restart, else return 0
3120  */
3121 int snapshot_gfx_rc6_ms(void)
3122 {
3123         FILE *fp;
3124         int retval;
3125
3126         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
3127
3128         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
3129         if (retval != 1)
3130                 err(1, "GFX rc6");
3131
3132         fclose(fp);
3133
3134         return 0;
3135 }
3136 /*
3137  * snapshot_gfx_mhz()
3138  *
3139  * record snapshot of
3140  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
3141  *
3142  * return 1 if config change requires a restart, else return 0
3143  */
3144 int snapshot_gfx_mhz(void)
3145 {
3146         static FILE *fp;
3147         int retval;
3148
3149         if (fp == NULL)
3150                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
3151         else {
3152                 rewind(fp);
3153                 fflush(fp);
3154         }
3155
3156         retval = fscanf(fp, "%d", &gfx_cur_mhz);
3157         if (retval != 1)
3158                 err(1, "GFX MHz");
3159
3160         return 0;
3161 }
3162
3163 /*
3164  * snapshot_gfx_cur_mhz()
3165  *
3166  * record snapshot of
3167  * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
3168  *
3169  * return 1 if config change requires a restart, else return 0
3170  */
3171 int snapshot_gfx_act_mhz(void)
3172 {
3173         static FILE *fp;
3174         int retval;
3175
3176         if (fp == NULL)
3177                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
3178         else {
3179                 rewind(fp);
3180                 fflush(fp);
3181         }
3182
3183         retval = fscanf(fp, "%d", &gfx_act_mhz);
3184         if (retval != 1)
3185                 err(1, "GFX ACT MHz");
3186
3187         return 0;
3188 }
3189
3190 /*
3191  * snapshot_cpu_lpi()
3192  *
3193  * record snapshot of
3194  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
3195  */
3196 int snapshot_cpu_lpi_us(void)
3197 {
3198         FILE *fp;
3199         int retval;
3200
3201         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
3202
3203         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
3204         if (retval != 1) {
3205                 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
3206                 BIC_NOT_PRESENT(BIC_CPU_LPI);
3207                 fclose(fp);
3208                 return -1;
3209         }
3210
3211         fclose(fp);
3212
3213         return 0;
3214 }
3215 /*
3216  * snapshot_sys_lpi()
3217  *
3218  * record snapshot of sys_lpi_file
3219  */
3220 int snapshot_sys_lpi_us(void)
3221 {
3222         FILE *fp;
3223         int retval;
3224
3225         fp = fopen_or_die(sys_lpi_file, "r");
3226
3227         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
3228         if (retval != 1) {
3229                 fprintf(stderr, "Disabling Low Power Idle System output\n");
3230                 BIC_NOT_PRESENT(BIC_SYS_LPI);
3231                 fclose(fp);
3232                 return -1;
3233         }
3234         fclose(fp);
3235
3236         return 0;
3237 }
3238 /*
3239  * snapshot /proc and /sys files
3240  *
3241  * return 1 if configuration restart needed, else return 0
3242  */
3243 int snapshot_proc_sysfs_files(void)
3244 {
3245         if (DO_BIC(BIC_IRQ))
3246                 if (snapshot_proc_interrupts())
3247                         return 1;
3248
3249         if (DO_BIC(BIC_GFX_rc6))
3250                 snapshot_gfx_rc6_ms();
3251
3252         if (DO_BIC(BIC_GFXMHz))
3253                 snapshot_gfx_mhz();
3254
3255         if (DO_BIC(BIC_GFXACTMHz))
3256                 snapshot_gfx_act_mhz();
3257
3258         if (DO_BIC(BIC_CPU_LPI))
3259                 snapshot_cpu_lpi_us();
3260
3261         if (DO_BIC(BIC_SYS_LPI))
3262                 snapshot_sys_lpi_us();
3263
3264         return 0;
3265 }
3266
3267 int exit_requested;
3268
3269 static void signal_handler (int signal)
3270 {
3271         switch (signal) {
3272         case SIGINT:
3273                 exit_requested = 1;
3274                 if (debug)
3275                         fprintf(stderr, " SIGINT\n");
3276                 break;
3277         case SIGUSR1:
3278                 if (debug > 1)
3279                         fprintf(stderr, "SIGUSR1\n");
3280                 break;
3281         }
3282 }
3283
3284 void setup_signal_handler(void)
3285 {
3286         struct sigaction sa;
3287
3288         memset(&sa, 0, sizeof(sa));
3289
3290         sa.sa_handler = &signal_handler;
3291
3292         if (sigaction(SIGINT, &sa, NULL) < 0)
3293                 err(1, "sigaction SIGINT");
3294         if (sigaction(SIGUSR1, &sa, NULL) < 0)
3295                 err(1, "sigaction SIGUSR1");
3296 }
3297
3298 void do_sleep(void)
3299 {
3300         struct timeval tout;
3301         struct timespec rest;
3302         fd_set readfds;
3303         int retval;
3304
3305         FD_ZERO(&readfds);
3306         FD_SET(0, &readfds);
3307
3308         if (ignore_stdin) {
3309                 nanosleep(&interval_ts, NULL);
3310                 return;
3311         }
3312
3313         tout = interval_tv;
3314         retval = select(1, &readfds, NULL, NULL, &tout);
3315
3316         if (retval == 1) {
3317                 switch (getc(stdin)) {
3318                 case 'q':
3319                         exit_requested = 1;
3320                         break;
3321                 case EOF:
3322                         /*
3323                          * 'stdin' is a pipe closed on the other end. There
3324                          * won't be any further input.
3325                          */
3326                         ignore_stdin = 1;
3327                         /* Sleep the rest of the time */
3328                         rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3329                         rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3330                         nanosleep(&rest, NULL);
3331                 }
3332         }
3333 }
3334
3335 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
3336 {
3337         int ret, idx;
3338         unsigned long long msr_cur, msr_last;
3339
3340         if (!per_cpu_msr_sum)
3341                 return 1;
3342
3343         idx = offset_to_idx(offset);
3344         if (idx < 0)
3345                 return idx;
3346         /* get_msr_sum() = sum + (get_msr() - last) */
3347         ret = get_msr(cpu, offset, &msr_cur);
3348         if (ret)
3349                 return ret;
3350         msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
3351         DELTA_WRAP32(msr_cur, msr_last);
3352         *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
3353
3354         return 0;
3355 }
3356
3357 timer_t timerid;
3358
3359 /* Timer callback, update the sum of MSRs periodically. */
3360 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3361 {
3362         int i, ret;
3363         int cpu = t->cpu_id;
3364
3365         for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
3366                 unsigned long long msr_cur, msr_last;
3367                 off_t offset;
3368
3369                 if (!idx_valid(i))
3370                         continue;
3371                 offset = idx_to_offset(i);
3372                 if (offset < 0)
3373                         continue;
3374                 ret = get_msr(cpu, offset, &msr_cur);
3375                 if (ret) {
3376                         fprintf(outf, "Can not update msr(0x%llx)\n",
3377                                 (unsigned long long)offset);
3378                         continue;
3379                 }
3380
3381                 msr_last = per_cpu_msr_sum[cpu].entries[i].last;
3382                 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
3383
3384                 DELTA_WRAP32(msr_cur, msr_last);
3385                 per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
3386         }
3387         return 0;
3388 }
3389
3390 static void
3391 msr_record_handler(union sigval v)
3392 {
3393         for_all_cpus(update_msr_sum, EVEN_COUNTERS);
3394 }
3395
3396 void msr_sum_record(void)
3397 {
3398         struct itimerspec its;
3399         struct sigevent sev;
3400
3401         per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
3402         if (!per_cpu_msr_sum) {
3403                 fprintf(outf, "Can not allocate memory for long time MSR.\n");
3404                 return;
3405         }
3406         /*
3407          * Signal handler might be restricted, so use thread notifier instead.
3408          */
3409         memset(&sev, 0, sizeof(struct sigevent));
3410         sev.sigev_notify = SIGEV_THREAD;
3411         sev.sigev_notify_function = msr_record_handler;
3412
3413         sev.sigev_value.sival_ptr = &timerid;
3414         if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
3415                 fprintf(outf, "Can not create timer.\n");
3416                 goto release_msr;
3417         }
3418
3419         its.it_value.tv_sec = 0;
3420         its.it_value.tv_nsec = 1;
3421         /*
3422          * A wraparound time has been calculated early.
3423          * Some sources state that the peak power for a
3424          * microprocessor is usually 1.5 times the TDP rating,
3425          * use 2 * TDP for safety.
3426          */
3427         its.it_interval.tv_sec = rapl_joule_counter_range / 2;
3428         its.it_interval.tv_nsec = 0;
3429
3430         if (timer_settime(timerid, 0, &its, NULL) == -1) {
3431                 fprintf(outf, "Can not set timer.\n");
3432                 goto release_timer;
3433         }
3434         return;
3435
3436  release_timer:
3437         timer_delete(timerid);
3438  release_msr:
3439         free(per_cpu_msr_sum);
3440 }
3441
3442 void turbostat_loop()
3443 {
3444         int retval;
3445         int restarted = 0;
3446         int done_iters = 0;
3447
3448         setup_signal_handler();
3449
3450 restart:
3451         restarted++;
3452
3453         snapshot_proc_sysfs_files();
3454         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3455         first_counter_read = 0;
3456         if (retval < -1) {
3457                 exit(retval);
3458         } else if (retval == -1) {
3459                 if (restarted > 10) {
3460                         exit(retval);
3461                 }
3462                 re_initialize();
3463                 goto restart;
3464         }
3465         restarted = 0;
3466         done_iters = 0;
3467         gettimeofday(&tv_even, (struct timezone *)NULL);
3468
3469         while (1) {
3470                 if (for_all_proc_cpus(cpu_is_not_present)) {
3471                         re_initialize();
3472                         goto restart;
3473                 }
3474                 do_sleep();
3475                 if (snapshot_proc_sysfs_files())
3476                         goto restart;
3477                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
3478                 if (retval < -1) {
3479                         exit(retval);
3480                 } else if (retval == -1) {
3481                         re_initialize();
3482                         goto restart;
3483                 }
3484                 gettimeofday(&tv_odd, (struct timezone *)NULL);
3485                 timersub(&tv_odd, &tv_even, &tv_delta);
3486                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3487                         re_initialize();
3488                         goto restart;
3489                 }
3490                 compute_average(EVEN_COUNTERS);
3491                 format_all_counters(EVEN_COUNTERS);
3492                 flush_output_stdout();
3493                 if (exit_requested)
3494                         break;
3495                 if (num_iterations && ++done_iters >= num_iterations)
3496                         break;
3497                 do_sleep();
3498                 if (snapshot_proc_sysfs_files())
3499                         goto restart;
3500                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3501                 if (retval < -1) {
3502                         exit(retval);
3503                 } else if (retval == -1) {
3504                         re_initialize();
3505                         goto restart;
3506                 }
3507                 gettimeofday(&tv_even, (struct timezone *)NULL);
3508                 timersub(&tv_even, &tv_odd, &tv_delta);
3509                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3510                         re_initialize();
3511                         goto restart;
3512                 }
3513                 compute_average(ODD_COUNTERS);
3514                 format_all_counters(ODD_COUNTERS);
3515                 flush_output_stdout();
3516                 if (exit_requested)
3517                         break;
3518                 if (num_iterations && ++done_iters >= num_iterations)
3519                         break;
3520         }
3521 }
3522
3523 void check_dev_msr()
3524 {
3525         struct stat sb;
3526         char pathname[32];
3527
3528         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3529         if (stat(pathname, &sb))
3530                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3531                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3532 }
3533
3534 /*
3535  * check for CAP_SYS_RAWIO
3536  * return 0 on success
3537  * return 1 on fail
3538  */
3539 int check_for_cap_sys_rawio(void)
3540 {
3541         cap_t caps;
3542         cap_flag_value_t cap_flag_value;
3543
3544         caps = cap_get_proc();
3545         if (caps == NULL)
3546                 err(-6, "cap_get_proc\n");
3547
3548         if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3549                 err(-6, "cap_get\n");
3550
3551         if (cap_flag_value != CAP_SET) {
3552                 warnx("capget(CAP_SYS_RAWIO) failed,"
3553                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3554                 return 1;
3555         }
3556
3557         if (cap_free(caps) == -1)
3558                 err(-6, "cap_free\n");
3559
3560         return 0;
3561 }
3562 void check_permissions(void)
3563 {
3564         int do_exit = 0;
3565         char pathname[32];
3566
3567         /* check for CAP_SYS_RAWIO */
3568         do_exit += check_for_cap_sys_rawio();
3569
3570         /* test file permissions */
3571         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3572         if (euidaccess(pathname, R_OK)) {
3573                 do_exit++;
3574                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3575         }
3576
3577         /* if all else fails, thell them to be root */
3578         if (do_exit)
3579                 if (getuid() != 0)
3580                         warnx("... or simply run as root");
3581
3582         if (do_exit)
3583                 exit(-6);
3584 }
3585
3586 /*
3587  * NHM adds support for additional MSRs:
3588  *
3589  * MSR_SMI_COUNT                   0x00000034
3590  *
3591  * MSR_PLATFORM_INFO               0x000000ce
3592  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3593  *
3594  * MSR_MISC_PWR_MGMT               0x000001aa
3595  *
3596  * MSR_PKG_C3_RESIDENCY            0x000003f8
3597  * MSR_PKG_C6_RESIDENCY            0x000003f9
3598  * MSR_CORE_C3_RESIDENCY           0x000003fc
3599  * MSR_CORE_C6_RESIDENCY           0x000003fd
3600  *
3601  * Side effect:
3602  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3603  * sets has_misc_feature_control
3604  */
3605 int probe_nhm_msrs(unsigned int family, unsigned int model)
3606 {
3607         unsigned long long msr;
3608         unsigned int base_ratio;
3609         int *pkg_cstate_limits;
3610
3611         if (!genuine_intel)
3612                 return 0;
3613
3614         if (family != 6)
3615                 return 0;
3616
3617         bclk = discover_bclk(family, model);
3618
3619         switch (model) {
3620         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3621         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3622                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3623                 break;
3624         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3625         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3626         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3627         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3628                 pkg_cstate_limits = snb_pkg_cstate_limits;
3629                 has_misc_feature_control = 1;
3630                 break;
3631         case INTEL_FAM6_HASWELL:        /* HSW */
3632         case INTEL_FAM6_HASWELL_G:      /* HSW */
3633         case INTEL_FAM6_HASWELL_X:      /* HSX */
3634         case INTEL_FAM6_HASWELL_L:      /* HSW */
3635         case INTEL_FAM6_BROADWELL:      /* BDW */
3636         case INTEL_FAM6_BROADWELL_G:    /* BDW */
3637         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3638         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3639         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3640                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3641                 has_misc_feature_control = 1;
3642                 break;
3643         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3644                 pkg_cstate_limits = skx_pkg_cstate_limits;
3645                 has_misc_feature_control = 1;
3646                 break;
3647         case INTEL_FAM6_ICELAKE_X:      /* ICX */
3648                 pkg_cstate_limits = icx_pkg_cstate_limits;
3649                 has_misc_feature_control = 1;
3650                 break;
3651         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3652                 no_MSR_MISC_PWR_MGMT = 1;
3653         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
3654                 pkg_cstate_limits = slv_pkg_cstate_limits;
3655                 break;
3656         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3657                 pkg_cstate_limits = amt_pkg_cstate_limits;
3658                 no_MSR_MISC_PWR_MGMT = 1;
3659                 break;
3660         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3661                 pkg_cstate_limits = phi_pkg_cstate_limits;
3662                 break;
3663         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3664         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3665         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
3666         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
3667         case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
3668                 pkg_cstate_limits = glm_pkg_cstate_limits;
3669                 break;
3670         default:
3671                 return 0;
3672         }
3673         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3674         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3675
3676         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3677         base_ratio = (msr >> 8) & 0xFF;
3678
3679         base_hz = base_ratio * bclk * 1000000;
3680         has_base_hz = 1;
3681         return 1;
3682 }
3683 /*
3684  * SLV client has support for unique MSRs:
3685  *
3686  * MSR_CC6_DEMOTION_POLICY_CONFIG
3687  * MSR_MC6_DEMOTION_POLICY_CONFIG
3688  */
3689
3690 int has_slv_msrs(unsigned int family, unsigned int model)
3691 {
3692         if (!genuine_intel)
3693                 return 0;
3694
3695         switch (model) {
3696         case INTEL_FAM6_ATOM_SILVERMONT:
3697         case INTEL_FAM6_ATOM_SILVERMONT_MID:
3698         case INTEL_FAM6_ATOM_AIRMONT_MID:
3699                 return 1;
3700         }
3701         return 0;
3702 }
3703 int is_dnv(unsigned int family, unsigned int model)
3704 {
3705
3706         if (!genuine_intel)
3707                 return 0;
3708
3709         switch (model) {
3710         case INTEL_FAM6_ATOM_GOLDMONT_D:
3711                 return 1;
3712         }
3713         return 0;
3714 }
3715 int is_bdx(unsigned int family, unsigned int model)
3716 {
3717
3718         if (!genuine_intel)
3719                 return 0;
3720
3721         switch (model) {
3722         case INTEL_FAM6_BROADWELL_X:
3723                 return 1;
3724         }
3725         return 0;
3726 }
3727 int is_skx(unsigned int family, unsigned int model)
3728 {
3729
3730         if (!genuine_intel)
3731                 return 0;
3732
3733         switch (model) {
3734         case INTEL_FAM6_SKYLAKE_X:
3735                 return 1;
3736         }
3737         return 0;
3738 }
3739
3740 int is_icx(unsigned int family, unsigned int model)
3741 {
3742
3743         if (!genuine_intel)
3744                 return 0;
3745
3746         switch (model) {
3747         case INTEL_FAM6_ICELAKE_X:
3748                 return 1;
3749         }
3750         return 0;
3751 }
3752
3753 int is_ehl(unsigned int family, unsigned int model)
3754 {
3755         if (!genuine_intel)
3756                 return 0;
3757
3758         switch (model) {
3759         case INTEL_FAM6_ATOM_TREMONT:
3760                 return 1;
3761         }
3762         return 0;
3763 }
3764 int is_jvl(unsigned int family, unsigned int model)
3765 {
3766         if (!genuine_intel)
3767                 return 0;
3768
3769         switch (model) {
3770         case INTEL_FAM6_ATOM_TREMONT_D:
3771                 return 1;
3772         }
3773         return 0;
3774 }
3775
3776 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3777 {
3778         if (has_slv_msrs(family, model))
3779                 return 0;
3780
3781         switch (model) {
3782         /* Nehalem compatible, but do not include turbo-ratio limit support */
3783         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3784         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3785                 return 0;
3786         default:
3787                 return 1;
3788         }
3789 }
3790 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3791 {
3792         if (has_slv_msrs(family, model))
3793                 return 1;
3794
3795         return 0;
3796 }
3797 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3798 {
3799         if (!genuine_intel)
3800                 return 0;
3801
3802         if (family != 6)
3803                 return 0;
3804
3805         switch (model) {
3806         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3807         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3808                 return 1;
3809         default:
3810                 return 0;
3811         }
3812 }
3813 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3814 {
3815         if (!genuine_intel)
3816                 return 0;
3817
3818         if (family != 6)
3819                 return 0;
3820
3821         switch (model) {
3822         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3823                 return 1;
3824         default:
3825                 return 0;
3826         }
3827 }
3828
3829 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3830 {
3831         if (!genuine_intel)
3832                 return 0;
3833
3834         if (family != 6)
3835                 return 0;
3836
3837         switch (model) {
3838         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3839                 return 1;
3840         default:
3841                 return 0;
3842         }
3843 }
3844 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3845 {
3846         if (!genuine_intel)
3847                 return 0;
3848
3849         if (family != 6)
3850                 return 0;
3851
3852         switch (model) {
3853         case INTEL_FAM6_ATOM_GOLDMONT:
3854         case INTEL_FAM6_SKYLAKE_X:
3855         case INTEL_FAM6_ICELAKE_X:
3856                 return 1;
3857         default:
3858                 return 0;
3859         }
3860 }
3861 int has_config_tdp(unsigned int family, unsigned int model)
3862 {
3863         if (!genuine_intel)
3864                 return 0;
3865
3866         if (family != 6)
3867                 return 0;
3868
3869         switch (model) {
3870         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3871         case INTEL_FAM6_HASWELL:        /* HSW */
3872         case INTEL_FAM6_HASWELL_X:      /* HSX */
3873         case INTEL_FAM6_HASWELL_L:      /* HSW */
3874         case INTEL_FAM6_HASWELL_G:      /* HSW */
3875         case INTEL_FAM6_BROADWELL:      /* BDW */
3876         case INTEL_FAM6_BROADWELL_G:    /* BDW */
3877         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3878         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3879         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3880         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3881         case INTEL_FAM6_ICELAKE_X:      /* ICX */
3882
3883         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3884                 return 1;
3885         default:
3886                 return 0;
3887         }
3888 }
3889
3890 /*
3891  * tcc_offset_bits:
3892  * 0: Tcc Offset not supported (Default)
3893  * 6: Bit 29:24 of MSR_PLATFORM_INFO
3894  * 4: Bit 27:24 of MSR_PLATFORM_INFO
3895  */
3896 void check_tcc_offset(int model)
3897 {
3898         unsigned long long msr;
3899
3900         if (!genuine_intel)
3901                 return;
3902
3903         switch (model) {
3904         case INTEL_FAM6_SKYLAKE_L:
3905         case INTEL_FAM6_SKYLAKE:
3906         case INTEL_FAM6_KABYLAKE_L:
3907         case INTEL_FAM6_KABYLAKE:
3908         case INTEL_FAM6_ICELAKE_L:
3909         case INTEL_FAM6_ICELAKE:
3910         case INTEL_FAM6_TIGERLAKE_L:
3911         case INTEL_FAM6_TIGERLAKE:
3912         case INTEL_FAM6_COMETLAKE:
3913                 if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
3914                         msr = (msr >> 30) & 1;
3915                         if (msr)
3916                                 tcc_offset_bits = 6;
3917                 }
3918                 return;
3919         default:
3920                 return;
3921         }
3922 }
3923
3924 static void
3925 remove_underbar(char *s)
3926 {
3927         char *to = s;
3928
3929         while (*s) {
3930                 if (*s != '_')
3931                         *to++ = *s;
3932                 s++;
3933         }
3934
3935         *to = 0;
3936 }
3937
3938 static void
3939 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3940 {
3941         if (!do_nhm_platform_info)
3942                 return;
3943
3944         dump_nhm_platform_info();
3945
3946         if (has_hsw_turbo_ratio_limit(family, model))
3947                 dump_hsw_turbo_ratio_limits();
3948
3949         if (has_ivt_turbo_ratio_limit(family, model))
3950                 dump_ivt_turbo_ratio_limits();
3951
3952         if (has_turbo_ratio_limit(family, model))
3953                 dump_turbo_ratio_limits(family, model);
3954
3955         if (has_atom_turbo_ratio_limit(family, model))
3956                 dump_atom_turbo_ratio_limits();
3957
3958         if (has_knl_turbo_ratio_limit(family, model))
3959                 dump_knl_turbo_ratio_limits();
3960
3961         if (has_config_tdp(family, model))
3962                 dump_config_tdp();
3963
3964         dump_nhm_cst_cfg();
3965 }
3966
3967 static void dump_sysfs_file(char *path)
3968 {
3969         FILE *input;
3970         char cpuidle_buf[64];
3971
3972         input = fopen(path, "r");
3973         if (input == NULL) {
3974                 if (debug)
3975                         fprintf(outf, "NSFOD %s\n", path);
3976                 return;
3977         }
3978         if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
3979                 err(1, "%s: failed to read file", path);
3980         fclose(input);
3981
3982         fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
3983 }
3984 static void
3985 dump_sysfs_cstate_config(void)
3986 {
3987         char path[64];
3988         char name_buf[16];
3989         char desc[64];
3990         FILE *input;
3991         int state;
3992         char *sp;
3993
3994         if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
3995                 fprintf(outf, "cpuidle not loaded\n");
3996                 return;
3997         }
3998
3999         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
4000         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
4001         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
4002
4003         for (state = 0; state < 10; ++state) {
4004
4005                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4006                         base_cpu, state);
4007                 input = fopen(path, "r");
4008                 if (input == NULL)
4009                         continue;
4010                 if (!fgets(name_buf, sizeof(name_buf), input))
4011                         err(1, "%s: failed to read file", path);
4012
4013                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4014                 sp = strchr(name_buf, '-');
4015                 if (!sp)
4016                         sp = strchrnul(name_buf, '\n');
4017                 *sp = '\0';
4018                 fclose(input);
4019
4020                 remove_underbar(name_buf);
4021
4022                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
4023                         base_cpu, state);
4024                 input = fopen(path, "r");
4025                 if (input == NULL)
4026                         continue;
4027                 if (!fgets(desc, sizeof(desc), input))
4028                         err(1, "%s: failed to read file", path);
4029
4030                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
4031                 fclose(input);
4032         }
4033 }
4034 static void
4035 dump_sysfs_pstate_config(void)
4036 {
4037         char path[64];
4038         char driver_buf[64];
4039         char governor_buf[64];
4040         FILE *input;
4041         int turbo;
4042
4043         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
4044                         base_cpu);
4045         input = fopen(path, "r");
4046         if (input == NULL) {
4047                 fprintf(outf, "NSFOD %s\n", path);
4048                 return;
4049         }
4050         if (!fgets(driver_buf, sizeof(driver_buf), input))
4051                 err(1, "%s: failed to read file", path);
4052         fclose(input);
4053
4054         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
4055                         base_cpu);
4056         input = fopen(path, "r");
4057         if (input == NULL) {
4058                 fprintf(outf, "NSFOD %s\n", path);
4059                 return;
4060         }
4061         if (!fgets(governor_buf, sizeof(governor_buf), input))
4062                 err(1, "%s: failed to read file", path);
4063         fclose(input);
4064
4065         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
4066         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
4067
4068         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
4069         input = fopen(path, "r");
4070         if (input != NULL) {
4071                 if (fscanf(input, "%d", &turbo) != 1)
4072                         err(1, "%s: failed to parse number from file", path);
4073                 fprintf(outf, "cpufreq boost: %d\n", turbo);
4074                 fclose(input);
4075         }
4076
4077         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
4078         input = fopen(path, "r");
4079         if (input != NULL) {
4080                 if (fscanf(input, "%d", &turbo) != 1)
4081                         err(1, "%s: failed to parse number from file", path);
4082                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
4083                 fclose(input);
4084         }
4085 }
4086
4087
4088 /*
4089  * print_epb()
4090  * Decode the ENERGY_PERF_BIAS MSR
4091  */
4092 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4093 {
4094         char *epb_string;
4095         int cpu, epb;
4096
4097         if (!has_epb)
4098                 return 0;
4099
4100         cpu = t->cpu_id;
4101
4102         /* EPB is per-package */
4103         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4104                 return 0;
4105
4106         if (cpu_migrate(cpu)) {
4107                 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
4108                 return -1;
4109         }
4110
4111         epb = get_epb(cpu);
4112         if (epb < 0)
4113                 return 0;
4114
4115         switch (epb) {
4116         case ENERGY_PERF_BIAS_PERFORMANCE:
4117                 epb_string = "performance";
4118                 break;
4119         case ENERGY_PERF_BIAS_NORMAL:
4120                 epb_string = "balanced";
4121                 break;
4122         case ENERGY_PERF_BIAS_POWERSAVE:
4123                 epb_string = "powersave";
4124                 break;
4125         default:
4126                 epb_string = "custom";
4127                 break;
4128         }
4129         fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
4130
4131         return 0;
4132 }
4133 /*
4134  * print_hwp()
4135  * Decode the MSR_HWP_CAPABILITIES
4136  */
4137 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4138 {
4139         unsigned long long msr;
4140         int cpu;
4141
4142         if (!has_hwp)
4143                 return 0;
4144
4145         cpu = t->cpu_id;
4146
4147         /* MSR_HWP_CAPABILITIES is per-package */
4148         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4149                 return 0;
4150
4151         if (cpu_migrate(cpu)) {
4152                 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
4153                 return -1;
4154         }
4155
4156         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
4157                 return 0;
4158
4159         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
4160                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
4161
4162         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
4163         if ((msr & (1 << 0)) == 0)
4164                 return 0;
4165
4166         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
4167                 return 0;
4168
4169         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
4170                         "(high %d guar %d eff %d low %d)\n",
4171                         cpu, msr,
4172                         (unsigned int)HWP_HIGHEST_PERF(msr),
4173                         (unsigned int)HWP_GUARANTEED_PERF(msr),
4174                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
4175                         (unsigned int)HWP_LOWEST_PERF(msr));
4176
4177         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
4178                 return 0;
4179
4180         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
4181                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
4182                         cpu, msr,
4183                         (unsigned int)(((msr) >> 0) & 0xff),
4184                         (unsigned int)(((msr) >> 8) & 0xff),
4185                         (unsigned int)(((msr) >> 16) & 0xff),
4186                         (unsigned int)(((msr) >> 24) & 0xff),
4187                         (unsigned int)(((msr) >> 32) & 0xff3),
4188                         (unsigned int)(((msr) >> 42) & 0x1));
4189
4190         if (has_hwp_pkg) {
4191                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
4192                         return 0;
4193
4194                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
4195                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
4196                         cpu, msr,
4197                         (unsigned int)(((msr) >> 0) & 0xff),
4198                         (unsigned int)(((msr) >> 8) & 0xff),
4199                         (unsigned int)(((msr) >> 16) & 0xff),
4200                         (unsigned int)(((msr) >> 24) & 0xff),
4201                         (unsigned int)(((msr) >> 32) & 0xff3));
4202         }
4203         if (has_hwp_notify) {
4204                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
4205                         return 0;
4206
4207                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
4208                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
4209                         cpu, msr,
4210                         ((msr) & 0x1) ? "EN" : "Dis",
4211                         ((msr) & 0x2) ? "EN" : "Dis");
4212         }
4213         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
4214                 return 0;
4215
4216         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
4217                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
4218                         cpu, msr,
4219                         ((msr) & 0x1) ? "" : "No-",
4220                         ((msr) & 0x2) ? "" : "No-");
4221
4222         return 0;
4223 }
4224
4225 /*
4226  * print_perf_limit()
4227  */
4228 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4229 {
4230         unsigned long long msr;
4231         int cpu;
4232
4233         cpu = t->cpu_id;
4234
4235         /* per-package */
4236         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4237                 return 0;
4238
4239         if (cpu_migrate(cpu)) {
4240                 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
4241                 return -1;
4242         }
4243
4244         if (do_core_perf_limit_reasons) {
4245                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
4246                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4247                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
4248                         (msr & 1 << 15) ? "bit15, " : "",
4249                         (msr & 1 << 14) ? "bit14, " : "",
4250                         (msr & 1 << 13) ? "Transitions, " : "",
4251                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
4252                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
4253                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
4254                         (msr & 1 << 9) ? "CorePwr, " : "",
4255                         (msr & 1 << 8) ? "Amps, " : "",
4256                         (msr & 1 << 6) ? "VR-Therm, " : "",
4257                         (msr & 1 << 5) ? "Auto-HWP, " : "",
4258                         (msr & 1 << 4) ? "Graphics, " : "",
4259                         (msr & 1 << 2) ? "bit2, " : "",
4260                         (msr & 1 << 1) ? "ThermStatus, " : "",
4261                         (msr & 1 << 0) ? "PROCHOT, " : "");
4262                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
4263                         (msr & 1 << 31) ? "bit31, " : "",
4264                         (msr & 1 << 30) ? "bit30, " : "",
4265                         (msr & 1 << 29) ? "Transitions, " : "",
4266                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
4267                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
4268                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
4269                         (msr & 1 << 25) ? "CorePwr, " : "",
4270                         (msr & 1 << 24) ? "Amps, " : "",
4271                         (msr & 1 << 22) ? "VR-Therm, " : "",
4272                         (msr & 1 << 21) ? "Auto-HWP, " : "",
4273                         (msr & 1 << 20) ? "Graphics, " : "",
4274                         (msr & 1 << 18) ? "bit18, " : "",
4275                         (msr & 1 << 17) ? "ThermStatus, " : "",
4276                         (msr & 1 << 16) ? "PROCHOT, " : "");
4277
4278         }
4279         if (do_gfx_perf_limit_reasons) {
4280                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
4281                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4282                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
4283                         (msr & 1 << 0) ? "PROCHOT, " : "",
4284                         (msr & 1 << 1) ? "ThermStatus, " : "",
4285                         (msr & 1 << 4) ? "Graphics, " : "",
4286                         (msr & 1 << 6) ? "VR-Therm, " : "",
4287                         (msr & 1 << 8) ? "Amps, " : "",
4288                         (msr & 1 << 9) ? "GFXPwr, " : "",
4289                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
4290                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
4291                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
4292                         (msr & 1 << 16) ? "PROCHOT, " : "",
4293                         (msr & 1 << 17) ? "ThermStatus, " : "",
4294                         (msr & 1 << 20) ? "Graphics, " : "",
4295                         (msr & 1 << 22) ? "VR-Therm, " : "",
4296                         (msr & 1 << 24) ? "Amps, " : "",
4297                         (msr & 1 << 25) ? "GFXPwr, " : "",
4298                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
4299                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
4300         }
4301         if (do_ring_perf_limit_reasons) {
4302                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
4303                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4304                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
4305                         (msr & 1 << 0) ? "PROCHOT, " : "",
4306                         (msr & 1 << 1) ? "ThermStatus, " : "",
4307                         (msr & 1 << 6) ? "VR-Therm, " : "",
4308                         (msr & 1 << 8) ? "Amps, " : "",
4309                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
4310                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
4311                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
4312                         (msr & 1 << 16) ? "PROCHOT, " : "",
4313                         (msr & 1 << 17) ? "ThermStatus, " : "",
4314                         (msr & 1 << 22) ? "VR-Therm, " : "",
4315                         (msr & 1 << 24) ? "Amps, " : "",
4316                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
4317                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
4318         }
4319         return 0;
4320 }
4321
4322 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
4323 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
4324
4325 double get_tdp_intel(unsigned int model)
4326 {
4327         unsigned long long msr;
4328
4329         if (do_rapl & RAPL_PKG_POWER_INFO)
4330                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
4331                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
4332
4333         switch (model) {
4334         case INTEL_FAM6_ATOM_SILVERMONT:
4335         case INTEL_FAM6_ATOM_SILVERMONT_D:
4336                 return 30.0;
4337         default:
4338                 return 135.0;
4339         }
4340 }
4341
4342 double get_tdp_amd(unsigned int family)
4343 {
4344         /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
4345         return 280.0;
4346 }
4347
4348 /*
4349  * rapl_dram_energy_units_probe()
4350  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
4351  */
4352 static double
4353 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
4354 {
4355         /* only called for genuine_intel, family 6 */
4356
4357         switch (model) {
4358         case INTEL_FAM6_HASWELL_X:      /* HSX */
4359         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4360         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4361         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4362                 return (rapl_dram_energy_units = 15.3 / 1000000);
4363         default:
4364                 return (rapl_energy_units);
4365         }
4366 }
4367
4368 void rapl_probe_intel(unsigned int family, unsigned int model)
4369 {
4370         unsigned long long msr;
4371         unsigned int time_unit;
4372         double tdp;
4373
4374         if (family != 6)
4375                 return;
4376
4377         switch (model) {
4378         case INTEL_FAM6_SANDYBRIDGE:
4379         case INTEL_FAM6_IVYBRIDGE:
4380         case INTEL_FAM6_HASWELL:        /* HSW */
4381         case INTEL_FAM6_HASWELL_L:      /* HSW */
4382         case INTEL_FAM6_HASWELL_G:      /* HSW */
4383         case INTEL_FAM6_BROADWELL:      /* BDW */
4384         case INTEL_FAM6_BROADWELL_G:    /* BDW */
4385                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
4386                 if (rapl_joules) {
4387                         BIC_PRESENT(BIC_Pkg_J);
4388                         BIC_PRESENT(BIC_Cor_J);
4389                         BIC_PRESENT(BIC_GFX_J);
4390                 } else {
4391                         BIC_PRESENT(BIC_PkgWatt);
4392                         BIC_PRESENT(BIC_CorWatt);
4393                         BIC_PRESENT(BIC_GFXWatt);
4394                 }
4395                 break;
4396         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4397         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4398                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
4399                 if (rapl_joules)
4400                         BIC_PRESENT(BIC_Pkg_J);
4401                 else
4402                         BIC_PRESENT(BIC_PkgWatt);
4403                 break;
4404         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4405                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
4406                 if (rapl_joules) {
4407                         BIC_PRESENT(BIC_Pkg_J);
4408                         BIC_PRESENT(BIC_Cor_J);
4409                         BIC_PRESENT(BIC_RAM_J);
4410                         BIC_PRESENT(BIC_GFX_J);
4411                 } else {
4412                         BIC_PRESENT(BIC_PkgWatt);
4413                         BIC_PRESENT(BIC_CorWatt);
4414                         BIC_PRESENT(BIC_RAMWatt);
4415                         BIC_PRESENT(BIC_GFXWatt);
4416                 }
4417                 break;
4418         case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
4419                 do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
4420                 BIC_PRESENT(BIC_PKG__);
4421                 if (rapl_joules)
4422                         BIC_PRESENT(BIC_Pkg_J);
4423                 else
4424                         BIC_PRESENT(BIC_PkgWatt);
4425                 break;
4426         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4427         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4428                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
4429                 BIC_PRESENT(BIC_PKG__);
4430                 BIC_PRESENT(BIC_RAM__);
4431                 if (rapl_joules) {
4432                         BIC_PRESENT(BIC_Pkg_J);
4433                         BIC_PRESENT(BIC_Cor_J);
4434                         BIC_PRESENT(BIC_RAM_J);
4435                         BIC_PRESENT(BIC_GFX_J);
4436                 } else {
4437                         BIC_PRESENT(BIC_PkgWatt);
4438                         BIC_PRESENT(BIC_CorWatt);
4439                         BIC_PRESENT(BIC_RAMWatt);
4440                         BIC_PRESENT(BIC_GFXWatt);
4441                 }
4442                 break;
4443         case INTEL_FAM6_HASWELL_X:      /* HSX */
4444         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4445         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4446         case INTEL_FAM6_ICELAKE_X:      /* ICX */
4447         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4448                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
4449                 BIC_PRESENT(BIC_PKG__);
4450                 BIC_PRESENT(BIC_RAM__);
4451                 if (rapl_joules) {
4452                         BIC_PRESENT(BIC_Pkg_J);
4453                         BIC_PRESENT(BIC_RAM_J);
4454                 } else {
4455                         BIC_PRESENT(BIC_PkgWatt);
4456                         BIC_PRESENT(BIC_RAMWatt);
4457                 }
4458                 break;
4459         case INTEL_FAM6_SANDYBRIDGE_X:
4460         case INTEL_FAM6_IVYBRIDGE_X:
4461                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
4462                 BIC_PRESENT(BIC_PKG__);
4463                 BIC_PRESENT(BIC_RAM__);
4464                 if (rapl_joules) {
4465                         BIC_PRESENT(BIC_Pkg_J);
4466                         BIC_PRESENT(BIC_Cor_J);
4467                         BIC_PRESENT(BIC_RAM_J);
4468                 } else {
4469                         BIC_PRESENT(BIC_PkgWatt);
4470                         BIC_PRESENT(BIC_CorWatt);
4471                         BIC_PRESENT(BIC_RAMWatt);
4472                 }
4473                 break;
4474         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4475         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4476                 do_rapl = RAPL_PKG | RAPL_CORES;
4477                 if (rapl_joules) {
4478                         BIC_PRESENT(BIC_Pkg_J);
4479                         BIC_PRESENT(BIC_Cor_J);
4480                 } else {
4481                         BIC_PRESENT(BIC_PkgWatt);
4482                         BIC_PRESENT(BIC_CorWatt);
4483                 }
4484                 break;
4485         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4486                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4487                 BIC_PRESENT(BIC_PKG__);
4488                 BIC_PRESENT(BIC_RAM__);
4489                 if (rapl_joules) {
4490                         BIC_PRESENT(BIC_Pkg_J);
4491                         BIC_PRESENT(BIC_Cor_J);
4492                         BIC_PRESENT(BIC_RAM_J);
4493                 } else {
4494                         BIC_PRESENT(BIC_PkgWatt);
4495                         BIC_PRESENT(BIC_CorWatt);
4496                         BIC_PRESENT(BIC_RAMWatt);
4497                 }
4498                 break;
4499         default:
4500                 return;
4501         }
4502
4503         /* units on package 0, verify later other packages match */
4504         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4505                 return;
4506
4507         rapl_power_units = 1.0 / (1 << (msr & 0xF));
4508         if (model == INTEL_FAM6_ATOM_SILVERMONT)
4509                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4510         else
4511                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4512
4513         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4514
4515         time_unit = msr >> 16 & 0xF;
4516         if (time_unit == 0)
4517                 time_unit = 0xA;
4518
4519         rapl_time_units = 1.0 / (1 << (time_unit));
4520
4521         tdp = get_tdp_intel(model);
4522
4523         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4524         if (!quiet)
4525                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4526 }
4527
4528 void rapl_probe_amd(unsigned int family, unsigned int model)
4529 {
4530         unsigned long long msr;
4531         unsigned int eax, ebx, ecx, edx;
4532         unsigned int has_rapl = 0;
4533         double tdp;
4534
4535         if (max_extended_level >= 0x80000007) {
4536                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4537                 /* RAPL (Fam 17h+) */
4538                 has_rapl = edx & (1 << 14);
4539         }
4540
4541         if (!has_rapl || family < 0x17)
4542                 return;
4543
4544         do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4545         if (rapl_joules) {
4546                 BIC_PRESENT(BIC_Pkg_J);
4547                 BIC_PRESENT(BIC_Cor_J);
4548         } else {
4549                 BIC_PRESENT(BIC_PkgWatt);
4550                 BIC_PRESENT(BIC_CorWatt);
4551         }
4552
4553         if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4554                 return;
4555
4556         rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4557         rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4558         rapl_power_units = ldexp(1.0, -(msr & 0xf));
4559
4560         tdp = get_tdp_amd(family);
4561
4562         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4563         if (!quiet)
4564                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4565 }
4566
4567 /*
4568  * rapl_probe()
4569  *
4570  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4571  */
4572 void rapl_probe(unsigned int family, unsigned int model)
4573 {
4574         if (genuine_intel)
4575                 rapl_probe_intel(family, model);
4576         if (authentic_amd || hygon_genuine)
4577                 rapl_probe_amd(family, model);
4578 }
4579
4580 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4581 {
4582         if (!genuine_intel)
4583                 return;
4584
4585         if (family != 6)
4586                 return;
4587
4588         switch (model) {
4589         case INTEL_FAM6_HASWELL:        /* HSW */
4590         case INTEL_FAM6_HASWELL_L:      /* HSW */
4591         case INTEL_FAM6_HASWELL_G:      /* HSW */
4592                 do_gfx_perf_limit_reasons = 1;
4593         case INTEL_FAM6_HASWELL_X:      /* HSX */
4594                 do_core_perf_limit_reasons = 1;
4595                 do_ring_perf_limit_reasons = 1;
4596         default:
4597                 return;
4598         }
4599 }
4600
4601 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4602 {
4603         if (is_skx(family, model) || is_bdx(family, model) ||
4604             is_icx(family, model))
4605                 has_automatic_cstate_conversion = 1;
4606 }
4607
4608 void prewake_cstate_probe(unsigned int family, unsigned int model)
4609 {
4610         if (is_icx(family, model))
4611                 dis_cstate_prewake = 1;
4612 }
4613
4614 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4615 {
4616         unsigned long long msr;
4617         unsigned int dts, dts2;
4618         int cpu;
4619
4620         if (!(do_dts || do_ptm))
4621                 return 0;
4622
4623         cpu = t->cpu_id;
4624
4625         /* DTS is per-core, no need to print for each thread */
4626         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4627                 return 0;
4628
4629         if (cpu_migrate(cpu)) {
4630                 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
4631                 return -1;
4632         }
4633
4634         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4635                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4636                         return 0;
4637
4638                 dts = (msr >> 16) & 0x7F;
4639                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
4640                         cpu, msr, tcc_activation_temp - dts);
4641
4642                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4643                         return 0;
4644
4645                 dts = (msr >> 16) & 0x7F;
4646                 dts2 = (msr >> 8) & 0x7F;
4647                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4648                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4649         }
4650
4651
4652         if (do_dts && debug) {
4653                 unsigned int resolution;
4654
4655                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4656                         return 0;
4657
4658                 dts = (msr >> 16) & 0x7F;
4659                 resolution = (msr >> 27) & 0xF;
4660                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4661                         cpu, msr, tcc_activation_temp - dts, resolution);
4662
4663                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4664                         return 0;
4665
4666                 dts = (msr >> 16) & 0x7F;
4667                 dts2 = (msr >> 8) & 0x7F;
4668                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4669                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4670         }
4671
4672         return 0;
4673 }
4674
4675 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4676 {
4677         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4678                 cpu, label,
4679                 ((msr >> 15) & 1) ? "EN" : "DIS",
4680                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
4681                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4682                 (((msr >> 16) & 1) ? "EN" : "DIS"));
4683
4684         return;
4685 }
4686
4687 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4688 {
4689         unsigned long long msr;
4690         const char *msr_name;
4691         int cpu;
4692
4693         if (!do_rapl)
4694                 return 0;
4695
4696         /* RAPL counters are per package, so print only for 1st thread/package */
4697         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4698                 return 0;
4699
4700         cpu = t->cpu_id;
4701         if (cpu_migrate(cpu)) {
4702                 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
4703                 return -1;
4704         }
4705
4706         if (do_rapl & RAPL_AMD_F17H) {
4707                 msr_name = "MSR_RAPL_PWR_UNIT";
4708                 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4709                         return -1;
4710         } else {
4711                 msr_name = "MSR_RAPL_POWER_UNIT";
4712                 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4713                         return -1;
4714         }
4715
4716         fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4717                 rapl_power_units, rapl_energy_units, rapl_time_units);
4718
4719         if (do_rapl & RAPL_PKG_POWER_INFO) {
4720
4721                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4722                         return -5;
4723
4724
4725                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4726                         cpu, msr,
4727                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4728                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4729                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4730                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4731
4732         }
4733         if (do_rapl & RAPL_PKG) {
4734
4735                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4736                         return -9;
4737
4738                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4739                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4740
4741                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
4742                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4743                         cpu,
4744                         ((msr >> 47) & 1) ? "EN" : "DIS",
4745                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
4746                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4747                         ((msr >> 48) & 1) ? "EN" : "DIS");
4748         }
4749
4750         if (do_rapl & RAPL_DRAM_POWER_INFO) {
4751                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4752                         return -6;
4753
4754                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4755                         cpu, msr,
4756                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4757                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4758                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4759                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4760         }
4761         if (do_rapl & RAPL_DRAM) {
4762                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4763                         return -9;
4764                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4765                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4766
4767                 print_power_limit_msr(cpu, msr, "DRAM Limit");
4768         }
4769         if (do_rapl & RAPL_CORE_POLICY) {
4770                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4771                         return -7;
4772
4773                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4774         }
4775         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4776                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4777                         return -9;
4778                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4779                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4780                 print_power_limit_msr(cpu, msr, "Cores Limit");
4781         }
4782         if (do_rapl & RAPL_GFX) {
4783                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4784                         return -8;
4785
4786                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4787
4788                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4789                         return -9;
4790                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4791                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4792                 print_power_limit_msr(cpu, msr, "GFX Limit");
4793         }
4794         return 0;
4795 }
4796
4797 /*
4798  * SNB adds support for additional MSRs:
4799  *
4800  * MSR_PKG_C7_RESIDENCY            0x000003fa
4801  * MSR_CORE_C7_RESIDENCY           0x000003fe
4802  * MSR_PKG_C2_RESIDENCY            0x0000060d
4803  */
4804
4805 int has_snb_msrs(unsigned int family, unsigned int model)
4806 {
4807         if (!genuine_intel)
4808                 return 0;
4809
4810         switch (model) {
4811         case INTEL_FAM6_SANDYBRIDGE:
4812         case INTEL_FAM6_SANDYBRIDGE_X:
4813         case INTEL_FAM6_IVYBRIDGE:              /* IVB */
4814         case INTEL_FAM6_IVYBRIDGE_X:            /* IVB Xeon */
4815         case INTEL_FAM6_HASWELL:                /* HSW */
4816         case INTEL_FAM6_HASWELL_X:              /* HSW */
4817         case INTEL_FAM6_HASWELL_L:              /* HSW */
4818         case INTEL_FAM6_HASWELL_G:              /* HSW */
4819         case INTEL_FAM6_BROADWELL:              /* BDW */
4820         case INTEL_FAM6_BROADWELL_G:            /* BDW */
4821         case INTEL_FAM6_BROADWELL_X:            /* BDX */
4822         case INTEL_FAM6_SKYLAKE_L:              /* SKL */
4823         case INTEL_FAM6_CANNONLAKE_L:           /* CNL */
4824         case INTEL_FAM6_SKYLAKE_X:              /* SKX */
4825         case INTEL_FAM6_ICELAKE_X:              /* ICX */
4826         case INTEL_FAM6_ATOM_GOLDMONT:          /* BXT */
4827         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4828         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4829         case INTEL_FAM6_ATOM_TREMONT:           /* EHL */
4830         case INTEL_FAM6_ATOM_TREMONT_D:         /* JVL */
4831                 return 1;
4832         }
4833         return 0;
4834 }
4835
4836 /*
4837  * HSW ULT added support for C8/C9/C10 MSRs:
4838  *
4839  * MSR_PKG_C8_RESIDENCY         0x00000630
4840  * MSR_PKG_C9_RESIDENCY         0x00000631
4841  * MSR_PKG_C10_RESIDENCY        0x00000632
4842  *
4843  * MSR_PKGC8_IRTL               0x00000633
4844  * MSR_PKGC9_IRTL               0x00000634
4845  * MSR_PKGC10_IRTL              0x00000635
4846  *
4847  */
4848 int has_c8910_msrs(unsigned int family, unsigned int model)
4849 {
4850         if (!genuine_intel)
4851                 return 0;
4852
4853         switch (model) {
4854         case INTEL_FAM6_HASWELL_L:      /* HSW */
4855         case INTEL_FAM6_BROADWELL:      /* BDW */
4856         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4857         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4858         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4859         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4860         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4861                 return 1;
4862         }
4863         return 0;
4864 }
4865
4866 /*
4867  * SKL adds support for additional MSRS:
4868  *
4869  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4870  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4871  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4872  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4873  */
4874 int has_skl_msrs(unsigned int family, unsigned int model)
4875 {
4876         if (!genuine_intel)
4877                 return 0;
4878
4879         switch (model) {
4880         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4881         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4882                 return 1;
4883         }
4884         return 0;
4885 }
4886
4887 int is_slm(unsigned int family, unsigned int model)
4888 {
4889         if (!genuine_intel)
4890                 return 0;
4891         switch (model) {
4892         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4893         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4894                 return 1;
4895         }
4896         return 0;
4897 }
4898
4899 int is_knl(unsigned int family, unsigned int model)
4900 {
4901         if (!genuine_intel)
4902                 return 0;
4903         switch (model) {
4904         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4905                 return 1;
4906         }
4907         return 0;
4908 }
4909
4910 int is_cnl(unsigned int family, unsigned int model)
4911 {
4912         if (!genuine_intel)
4913                 return 0;
4914
4915         switch (model) {
4916         case INTEL_FAM6_CANNONLAKE_L: /* CNL */
4917                 return 1;
4918         }
4919
4920         return 0;
4921 }
4922
4923 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4924 {
4925         if (is_knl(family, model))
4926                 return 1024;
4927         return 1;
4928 }
4929
4930 #define SLM_BCLK_FREQS 5
4931 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4932
4933 double slm_bclk(void)
4934 {
4935         unsigned long long msr = 3;
4936         unsigned int i;
4937         double freq;
4938
4939         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4940                 fprintf(outf, "SLM BCLK: unknown\n");
4941
4942         i = msr & 0xf;
4943         if (i >= SLM_BCLK_FREQS) {
4944                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4945                 i = 3;
4946         }
4947         freq = slm_freq_table[i];
4948
4949         if (!quiet)
4950                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4951
4952         return freq;
4953 }
4954
4955 double discover_bclk(unsigned int family, unsigned int model)
4956 {
4957         if (has_snb_msrs(family, model) || is_knl(family, model))
4958                 return 100.00;
4959         else if (is_slm(family, model))
4960                 return slm_bclk();
4961         else
4962                 return 133.33;
4963 }
4964
4965 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4966 {
4967         unsigned int eax, ebx, ecx, edx;
4968
4969         if (!genuine_intel)
4970                 return 0;
4971
4972         if (cpu_migrate(t->cpu_id)) {
4973                 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
4974                 return -1;
4975         }
4976
4977         if (max_level < 0x1a)
4978                 return 0;
4979
4980         __cpuid(0x1a, eax, ebx, ecx, edx);
4981         eax = (eax >> 24) & 0xFF;
4982         if (eax == 0x20 )
4983                 t->is_atom = true;
4984         return 0;
4985 }
4986
4987 /*
4988  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4989  * the Thermal Control Circuit (TCC) activates.
4990  * This is usually equal to tjMax.
4991  *
4992  * Older processors do not have this MSR, so there we guess,
4993  * but also allow cmdline over-ride with -T.
4994  *
4995  * Several MSR temperature values are in units of degrees-C
4996  * below this value, including the Digital Thermal Sensor (DTS),
4997  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4998  */
4999 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5000 {
5001         unsigned long long msr;
5002         unsigned int target_c_local, tcc_offset;
5003         int cpu;
5004
5005         /* tcc_activation_temp is used only for dts or ptm */
5006         if (!(do_dts || do_ptm))
5007                 return 0;
5008
5009         /* this is a per-package concept */
5010         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
5011                 return 0;
5012
5013         cpu = t->cpu_id;
5014         if (cpu_migrate(cpu)) {
5015                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
5016                 return -1;
5017         }
5018
5019         if (tcc_activation_temp_override != 0) {
5020                 tcc_activation_temp = tcc_activation_temp_override;
5021                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
5022                         cpu, tcc_activation_temp);
5023                 return 0;
5024         }
5025
5026         /* Temperature Target MSR is Nehalem and newer only */
5027         if (!do_nhm_platform_info)
5028                 goto guess;
5029
5030         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
5031                 goto guess;
5032
5033         target_c_local = (msr >> 16) & 0xFF;
5034
5035         if (!quiet) {
5036                 switch (tcc_offset_bits) {
5037                 case 4:
5038                         tcc_offset = (msr >> 24) & 0xF;
5039                         fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5040                         cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset);
5041                         break;
5042                 case 6:
5043                         tcc_offset = (msr >> 24) & 0x3F;
5044                         fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5045                         cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset);
5046                         break;
5047                 default:
5048                         fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
5049                         cpu, msr, target_c_local);
5050                         break;
5051                 }
5052         }
5053
5054         if (!target_c_local)
5055                 goto guess;
5056
5057         tcc_activation_temp = target_c_local;
5058
5059         return 0;
5060
5061 guess:
5062         tcc_activation_temp = TJMAX_DEFAULT;
5063         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
5064                 cpu, tcc_activation_temp);
5065
5066         return 0;
5067 }
5068
5069 void decode_feature_control_msr(void)
5070 {
5071         unsigned long long msr;
5072
5073         if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
5074                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
5075                         base_cpu, msr,
5076                         msr & FEAT_CTL_LOCKED ? "" : "UN-",
5077                         msr & (1 << 18) ? "SGX" : "");
5078 }
5079
5080 void decode_misc_enable_msr(void)
5081 {
5082         unsigned long long msr;
5083
5084         if (!genuine_intel)
5085                 return;
5086
5087         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
5088                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
5089                         base_cpu, msr,
5090                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
5091                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
5092                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
5093                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
5094                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
5095 }
5096
5097 void decode_misc_feature_control(void)
5098 {
5099         unsigned long long msr;
5100
5101         if (!has_misc_feature_control)
5102                 return;
5103
5104         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
5105                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
5106                         base_cpu, msr,
5107                         msr & (0 << 0) ? "No-" : "",
5108                         msr & (1 << 0) ? "No-" : "",
5109                         msr & (2 << 0) ? "No-" : "",
5110                         msr & (3 << 0) ? "No-" : "");
5111 }
5112 /*
5113  * Decode MSR_MISC_PWR_MGMT
5114  *
5115  * Decode the bits according to the Nehalem documentation
5116  * bit[0] seems to continue to have same meaning going forward
5117  * bit[1] less so...
5118  */
5119 void decode_misc_pwr_mgmt_msr(void)
5120 {
5121         unsigned long long msr;
5122
5123         if (!do_nhm_platform_info)
5124                 return;
5125
5126         if (no_MSR_MISC_PWR_MGMT)
5127                 return;
5128
5129         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
5130                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
5131                         base_cpu, msr,
5132                         msr & (1 << 0) ? "DIS" : "EN",
5133                         msr & (1 << 1) ? "EN" : "DIS",
5134                         msr & (1 << 8) ? "EN" : "DIS");
5135 }
5136 /*
5137  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
5138  *
5139  * This MSRs are present on Silvermont processors,
5140  * Intel Atom processor E3000 series (Baytrail), and friends.
5141  */
5142 void decode_c6_demotion_policy_msr(void)
5143 {
5144         unsigned long long msr;
5145
5146         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
5147                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
5148                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5149
5150         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
5151                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
5152                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5153 }
5154
5155 /*
5156  * When models are the same, for the purpose of turbostat, reuse
5157  */
5158 unsigned int intel_model_duplicates(unsigned int model)
5159 {
5160
5161         switch(model) {
5162         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
5163         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
5164         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
5165         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
5166         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
5167                 return INTEL_FAM6_NEHALEM;
5168
5169         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
5170         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
5171                 return INTEL_FAM6_NEHALEM_EX;
5172
5173         case INTEL_FAM6_XEON_PHI_KNM:
5174                 return INTEL_FAM6_XEON_PHI_KNL;
5175
5176         case INTEL_FAM6_BROADWELL_X:
5177         case INTEL_FAM6_BROADWELL_D:    /* BDX-DE */
5178                 return INTEL_FAM6_BROADWELL_X;
5179
5180         case INTEL_FAM6_SKYLAKE_L:
5181         case INTEL_FAM6_SKYLAKE:
5182         case INTEL_FAM6_KABYLAKE_L:
5183         case INTEL_FAM6_KABYLAKE:
5184         case INTEL_FAM6_COMETLAKE_L:
5185         case INTEL_FAM6_COMETLAKE:
5186                 return INTEL_FAM6_SKYLAKE_L;
5187
5188         case INTEL_FAM6_ICELAKE_L:
5189         case INTEL_FAM6_ICELAKE_NNPI:
5190         case INTEL_FAM6_TIGERLAKE_L:
5191         case INTEL_FAM6_TIGERLAKE:
5192         case INTEL_FAM6_ROCKETLAKE:
5193         case INTEL_FAM6_LAKEFIELD:
5194         case INTEL_FAM6_ALDERLAKE:
5195         case INTEL_FAM6_ALDERLAKE_L:
5196                 return INTEL_FAM6_CANNONLAKE_L;
5197
5198         case INTEL_FAM6_ATOM_TREMONT_L:
5199                 return INTEL_FAM6_ATOM_TREMONT;
5200
5201         case INTEL_FAM6_ICELAKE_D:
5202         case INTEL_FAM6_SAPPHIRERAPIDS_X:
5203                 return INTEL_FAM6_ICELAKE_X;
5204         }
5205         return model;
5206 }
5207
5208 void print_dev_latency(void)
5209 {
5210         char *path = "/dev/cpu_dma_latency";
5211         int fd;
5212         int value;
5213         int retval;
5214
5215         fd = open(path, O_RDONLY);
5216         if (fd < 0) {
5217                 warn("fopen %s\n", path);
5218                 return;
5219         }
5220
5221         retval = read(fd, (void *)&value, sizeof(int));
5222         if (retval != sizeof(int)) {
5223                 warn("read %s\n", path);
5224                 close(fd);
5225                 return;
5226         }
5227         fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n",
5228                 value, value == 2000000000 ? "default" : "constrained");
5229
5230         close(fd);
5231 }
5232
5233
5234 /*
5235  * Linux-perf manages the the HW instructions-retired counter
5236  * by enabling when requested, and hiding rollover
5237  */
5238 void linux_perf_init(void)
5239 {
5240         if (!BIC_IS_ENABLED(BIC_IPC))
5241                 return;
5242
5243         if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
5244                 return;
5245
5246         fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5247         if (fd_instr_count_percpu == NULL)
5248                 err(-1, "calloc fd_instr_count_percpu");
5249
5250         BIC_PRESENT(BIC_IPC);
5251 }
5252
5253 void process_cpuid()
5254 {
5255         unsigned int eax, ebx, ecx, edx;
5256         unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
5257         unsigned int has_turbo;
5258         unsigned long long ucode_patch = 0;
5259
5260         eax = ebx = ecx = edx = 0;
5261
5262         __cpuid(0, max_level, ebx, ecx, edx);
5263
5264         if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
5265                 genuine_intel = 1;
5266         else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
5267                 authentic_amd = 1;
5268         else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
5269                 hygon_genuine = 1;
5270
5271         if (!quiet)
5272                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
5273                         (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
5274
5275         __cpuid(1, fms, ebx, ecx, edx);
5276         family = (fms >> 8) & 0xf;
5277         model = (fms >> 4) & 0xf;
5278         stepping = fms & 0xf;
5279         if (family == 0xf)
5280                 family += (fms >> 20) & 0xff;
5281         if (family >= 6)
5282                 model += ((fms >> 16) & 0xf) << 4;
5283         ecx_flags = ecx;
5284         edx_flags = edx;
5285
5286         if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
5287                 warnx("get_msr(UCODE)\n");
5288
5289         /*
5290          * check max extended function levels of CPUID.
5291          * This is needed to check for invariant TSC.
5292          * This check is valid for both Intel and AMD.
5293          */
5294         ebx = ecx = edx = 0;
5295         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
5296
5297         if (!quiet) {
5298                 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
5299                         family, model, stepping, family, model, stepping, (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
5300                 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
5301                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
5302                         ecx_flags & (1 << 0) ? "SSE3" : "-",
5303                         ecx_flags & (1 << 3) ? "MONITOR" : "-",
5304                         ecx_flags & (1 << 6) ? "SMX" : "-",
5305                         ecx_flags & (1 << 7) ? "EIST" : "-",
5306                         ecx_flags & (1 << 8) ? "TM2" : "-",
5307                         edx_flags & (1 << 4) ? "TSC" : "-",
5308                         edx_flags & (1 << 5) ? "MSR" : "-",
5309                         edx_flags & (1 << 22) ? "ACPI-TM" : "-",
5310                         edx_flags & (1 << 28) ? "HT" : "-",
5311                         edx_flags & (1 << 29) ? "TM" : "-");
5312         }
5313         if (genuine_intel) {
5314                 model_orig = model;
5315                 model = intel_model_duplicates(model);
5316         }
5317
5318         if (!(edx_flags & (1 << 5)))
5319                 errx(1, "CPUID: no MSR");
5320
5321         if (max_extended_level >= 0x80000007) {
5322
5323                 /*
5324                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
5325                  * this check is valid for both Intel and AMD
5326                  */
5327                 __cpuid(0x80000007, eax, ebx, ecx, edx);
5328                 has_invariant_tsc = edx & (1 << 8);
5329         }
5330
5331         /*
5332          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
5333          * this check is valid for both Intel and AMD
5334          */
5335
5336         __cpuid(0x6, eax, ebx, ecx, edx);
5337         has_aperf = ecx & (1 << 0);
5338         if (has_aperf) {
5339                 BIC_PRESENT(BIC_Avg_MHz);
5340                 BIC_PRESENT(BIC_Busy);
5341                 BIC_PRESENT(BIC_Bzy_MHz);
5342         }
5343         do_dts = eax & (1 << 0);
5344         if (do_dts)
5345                 BIC_PRESENT(BIC_CoreTmp);
5346         has_turbo = eax & (1 << 1);
5347         do_ptm = eax & (1 << 6);
5348         if (do_ptm)
5349                 BIC_PRESENT(BIC_PkgTmp);
5350         has_hwp = eax & (1 << 7);
5351         has_hwp_notify = eax & (1 << 8);
5352         has_hwp_activity_window = eax & (1 << 9);
5353         has_hwp_epp = eax & (1 << 10);
5354         has_hwp_pkg = eax & (1 << 11);
5355         has_epb = ecx & (1 << 3);
5356
5357         if (!quiet)
5358                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
5359                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
5360                         has_aperf ? "" : "No-",
5361                         has_turbo ? "" : "No-",
5362                         do_dts ? "" : "No-",
5363                         do_ptm ? "" : "No-",
5364                         has_hwp ? "" : "No-",
5365                         has_hwp_notify ? "" : "No-",
5366                         has_hwp_activity_window ? "" : "No-",
5367                         has_hwp_epp ? "" : "No-",
5368                         has_hwp_pkg ? "" : "No-",
5369                         has_epb ? "" : "No-");
5370
5371         if (!quiet)
5372                 decode_misc_enable_msr();
5373
5374
5375         if (max_level >= 0x7 && !quiet) {
5376                 int has_sgx;
5377
5378                 ecx = 0;
5379
5380                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
5381
5382                 has_sgx = ebx & (1 << 2);
5383                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
5384
5385                 if (has_sgx)
5386                         decode_feature_control_msr();
5387         }
5388
5389         if (max_level >= 0x15) {
5390                 unsigned int eax_crystal;
5391                 unsigned int ebx_tsc;
5392
5393                 /*
5394                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
5395                  */
5396                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
5397                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
5398
5399                 if (ebx_tsc != 0) {
5400
5401                         if (!quiet && (ebx != 0))
5402                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
5403                                         eax_crystal, ebx_tsc, crystal_hz);
5404
5405                         if (crystal_hz == 0)
5406                                 switch(model) {
5407                                 case INTEL_FAM6_SKYLAKE_L:      /* SKL */
5408                                         crystal_hz = 24000000;  /* 24.0 MHz */
5409                                         break;
5410                                 case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
5411                                         crystal_hz = 25000000;  /* 25.0 MHz */
5412                                         break;
5413                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5414                                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5415                                         crystal_hz = 19200000;  /* 19.2 MHz */
5416                                         break;
5417                                 default:
5418                                         crystal_hz = 0;
5419                         }
5420
5421                         if (crystal_hz) {
5422                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
5423                                 if (!quiet)
5424                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
5425                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
5426                         }
5427                 }
5428         }
5429         if (max_level >= 0x16) {
5430                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
5431
5432                 /*
5433                  * CPUID 16H Base MHz, Max MHz, Bus MHz
5434                  */
5435                 base_mhz = max_mhz = bus_mhz = edx = 0;
5436
5437                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
5438                 if (!quiet)
5439                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
5440                                 base_mhz, max_mhz, bus_mhz);
5441         }
5442
5443         if (has_aperf)
5444                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
5445
5446         BIC_PRESENT(BIC_IRQ);
5447         BIC_PRESENT(BIC_TSC_MHz);
5448
5449         if (probe_nhm_msrs(family, model)) {
5450                 do_nhm_platform_info = 1;
5451                 BIC_PRESENT(BIC_CPU_c1);
5452                 BIC_PRESENT(BIC_CPU_c3);
5453                 BIC_PRESENT(BIC_CPU_c6);
5454                 BIC_PRESENT(BIC_SMI);
5455         }
5456         do_snb_cstates = has_snb_msrs(family, model);
5457
5458         if (do_snb_cstates)
5459                 BIC_PRESENT(BIC_CPU_c7);
5460
5461         do_irtl_snb = has_snb_msrs(family, model);
5462         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
5463                 BIC_PRESENT(BIC_Pkgpc2);
5464         if (pkg_cstate_limit >= PCL__3)
5465                 BIC_PRESENT(BIC_Pkgpc3);
5466         if (pkg_cstate_limit >= PCL__6)
5467                 BIC_PRESENT(BIC_Pkgpc6);
5468         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
5469                 BIC_PRESENT(BIC_Pkgpc7);
5470         if (has_slv_msrs(family, model)) {
5471                 BIC_NOT_PRESENT(BIC_Pkgpc2);
5472                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5473                 BIC_PRESENT(BIC_Pkgpc6);
5474                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5475                 BIC_PRESENT(BIC_Mod_c6);
5476                 use_c1_residency_msr = 1;
5477         }
5478         if (is_jvl(family, model)) {
5479                 BIC_NOT_PRESENT(BIC_CPU_c3);
5480                 BIC_NOT_PRESENT(BIC_CPU_c7);
5481                 BIC_NOT_PRESENT(BIC_Pkgpc2);
5482                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5483                 BIC_NOT_PRESENT(BIC_Pkgpc6);
5484                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5485         }
5486         if (is_dnv(family, model)) {
5487                 BIC_PRESENT(BIC_CPU_c1);
5488                 BIC_NOT_PRESENT(BIC_CPU_c3);
5489                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5490                 BIC_NOT_PRESENT(BIC_CPU_c7);
5491                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5492                 use_c1_residency_msr = 1;
5493         }
5494         if (is_skx(family, model) || is_icx(family, model)) {
5495                 BIC_NOT_PRESENT(BIC_CPU_c3);
5496                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5497                 BIC_NOT_PRESENT(BIC_CPU_c7);
5498                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5499         }
5500         if (is_bdx(family, model)) {
5501                 BIC_NOT_PRESENT(BIC_CPU_c7);
5502                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5503         }
5504         if (has_c8910_msrs(family, model)) {
5505                 if (pkg_cstate_limit >= PCL__8)
5506                         BIC_PRESENT(BIC_Pkgpc8);
5507                 if (pkg_cstate_limit >= PCL__9)
5508                         BIC_PRESENT(BIC_Pkgpc9);
5509                 if (pkg_cstate_limit >= PCL_10)
5510                         BIC_PRESENT(BIC_Pkgpc10);
5511         }
5512         do_irtl_hsw = has_c8910_msrs(family, model);
5513         if (has_skl_msrs(family, model)) {
5514                 BIC_PRESENT(BIC_Totl_c0);
5515                 BIC_PRESENT(BIC_Any_c0);
5516                 BIC_PRESENT(BIC_GFX_c0);
5517                 BIC_PRESENT(BIC_CPUGFX);
5518         }
5519         do_slm_cstates = is_slm(family, model);
5520         do_knl_cstates  = is_knl(family, model);
5521
5522         if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
5523             is_ehl(family, model))
5524                 BIC_NOT_PRESENT(BIC_CPU_c3);
5525
5526         if (!quiet)
5527                 decode_misc_pwr_mgmt_msr();
5528
5529         if (!quiet && has_slv_msrs(family, model))
5530                 decode_c6_demotion_policy_msr();
5531
5532         rapl_probe(family, model);
5533         perf_limit_reasons_probe(family, model);
5534         automatic_cstate_conversion_probe(family, model);
5535
5536         check_tcc_offset(model_orig);
5537
5538         if (!quiet)
5539                 dump_cstate_pstate_config_info(family, model);
5540
5541         if (!quiet)
5542                 print_dev_latency();
5543         if (!quiet)
5544                 dump_sysfs_cstate_config();
5545         if (!quiet)
5546                 dump_sysfs_pstate_config();
5547
5548         if (has_skl_msrs(family, model) || is_ehl(family, model))
5549                 calculate_tsc_tweak();
5550
5551         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
5552                 BIC_PRESENT(BIC_GFX_rc6);
5553
5554         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
5555                 BIC_PRESENT(BIC_GFXMHz);
5556
5557         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
5558                 BIC_PRESENT(BIC_GFXACTMHz);
5559
5560         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
5561                 BIC_PRESENT(BIC_CPU_LPI);
5562         else
5563                 BIC_NOT_PRESENT(BIC_CPU_LPI);
5564
5565         if (!access(sys_lpi_file_sysfs, R_OK)) {
5566                 sys_lpi_file = sys_lpi_file_sysfs;
5567                 BIC_PRESENT(BIC_SYS_LPI);
5568         } else if (!access(sys_lpi_file_debugfs, R_OK)) {
5569                 sys_lpi_file = sys_lpi_file_debugfs;
5570                 BIC_PRESENT(BIC_SYS_LPI);
5571         } else {
5572                 sys_lpi_file_sysfs = NULL;
5573                 BIC_NOT_PRESENT(BIC_SYS_LPI);
5574         }
5575
5576         if (!quiet)
5577                 decode_misc_feature_control();
5578
5579         return;
5580 }
5581
5582 /*
5583  * in /dev/cpu/ return success for names that are numbers
5584  * ie. filter out ".", "..", "microcode".
5585  */
5586 int dir_filter(const struct dirent *dirp)
5587 {
5588         if (isdigit(dirp->d_name[0]))
5589                 return 1;
5590         else
5591                 return 0;
5592 }
5593
5594 int open_dev_cpu_msr(int dummy1)
5595 {
5596         return 0;
5597 }
5598
5599 void topology_probe()
5600 {
5601         int i;
5602         int max_core_id = 0;
5603         int max_package_id = 0;
5604         int max_die_id = 0;
5605         int max_siblings = 0;
5606
5607         /* Initialize num_cpus, max_cpu_num */
5608         set_max_cpu_num();
5609         topo.num_cpus = 0;
5610         for_all_proc_cpus(count_cpus);
5611         if (!summary_only && topo.num_cpus > 1)
5612                 BIC_PRESENT(BIC_CPU);
5613
5614         if (debug > 1)
5615                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
5616
5617         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
5618         if (cpus == NULL)
5619                 err(1, "calloc cpus");
5620
5621         /*
5622          * Allocate and initialize cpu_present_set
5623          */
5624         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
5625         if (cpu_present_set == NULL)
5626                 err(3, "CPU_ALLOC");
5627         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5628         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
5629         for_all_proc_cpus(mark_cpu_present);
5630
5631         /*
5632          * Validate that all cpus in cpu_subset are also in cpu_present_set
5633          */
5634         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
5635                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
5636                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
5637                                 err(1, "cpu%d not present", i);
5638         }
5639
5640         /*
5641          * Allocate and initialize cpu_affinity_set
5642          */
5643         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
5644         if (cpu_affinity_set == NULL)
5645                 err(3, "CPU_ALLOC");
5646         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5647         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
5648
5649         for_all_proc_cpus(init_thread_id);
5650
5651         /*
5652          * For online cpus
5653          * find max_core_id, max_package_id
5654          */
5655         for (i = 0; i <= topo.max_cpu_num; ++i) {
5656                 int siblings;
5657
5658                 if (cpu_is_not_present(i)) {
5659                         if (debug > 1)
5660                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
5661                         continue;
5662                 }
5663
5664                 cpus[i].logical_cpu_id = i;
5665
5666                 /* get package information */
5667                 cpus[i].physical_package_id = get_physical_package_id(i);
5668                 if (cpus[i].physical_package_id > max_package_id)
5669                         max_package_id = cpus[i].physical_package_id;
5670
5671                 /* get die information */
5672                 cpus[i].die_id = get_die_id(i);
5673                 if (cpus[i].die_id > max_die_id)
5674                         max_die_id = cpus[i].die_id;
5675
5676                 /* get numa node information */
5677                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
5678                 if (cpus[i].physical_node_id > topo.max_node_num)
5679                         topo.max_node_num = cpus[i].physical_node_id;
5680
5681                 /* get core information */
5682                 cpus[i].physical_core_id = get_core_id(i);
5683                 if (cpus[i].physical_core_id > max_core_id)
5684                         max_core_id = cpus[i].physical_core_id;
5685
5686                 /* get thread information */
5687                 siblings = get_thread_siblings(&cpus[i]);
5688                 if (siblings > max_siblings)
5689                         max_siblings = siblings;
5690                 if (cpus[i].thread_id == 0)
5691                         topo.num_cores++;
5692         }
5693
5694         topo.cores_per_node = max_core_id + 1;
5695         if (debug > 1)
5696                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
5697                         max_core_id, topo.cores_per_node);
5698         if (!summary_only && topo.cores_per_node > 1)
5699                 BIC_PRESENT(BIC_Core);
5700
5701         topo.num_die = max_die_id + 1;
5702         if (debug > 1)
5703                 fprintf(outf, "max_die_id %d, sizing for %d die\n",
5704                                 max_die_id, topo.num_die);
5705         if (!summary_only && topo.num_die > 1)
5706                 BIC_PRESENT(BIC_Die);
5707
5708         topo.num_packages = max_package_id + 1;
5709         if (debug > 1)
5710                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
5711                         max_package_id, topo.num_packages);
5712         if (!summary_only && topo.num_packages > 1)
5713                 BIC_PRESENT(BIC_Package);
5714
5715         set_node_data();
5716         if (debug > 1)
5717                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5718         if (!summary_only && topo.nodes_per_pkg > 1)
5719                 BIC_PRESENT(BIC_Node);
5720
5721         topo.threads_per_core = max_siblings;
5722         if (debug > 1)
5723                 fprintf(outf, "max_siblings %d\n", max_siblings);
5724
5725         if (debug < 1)
5726                 return;
5727
5728         for (i = 0; i <= topo.max_cpu_num; ++i) {
5729                 if (cpu_is_not_present(i))
5730                         continue;
5731                 fprintf(outf,
5732                         "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
5733                         i, cpus[i].physical_package_id, cpus[i].die_id,
5734                         cpus[i].physical_node_id,
5735                         cpus[i].logical_node_id,
5736                         cpus[i].physical_core_id,
5737                         cpus[i].thread_id);
5738         }
5739
5740 }
5741
5742 void
5743 allocate_counters(struct thread_data **t, struct core_data **c,
5744                   struct pkg_data **p)
5745 {
5746         int i;
5747         int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
5748                         topo.num_packages;
5749         int num_threads = topo.threads_per_core * num_cores;
5750
5751         *t = calloc(num_threads, sizeof(struct thread_data));
5752         if (*t == NULL)
5753                 goto error;
5754
5755         for (i = 0; i < num_threads; i++)
5756                 (*t)[i].cpu_id = -1;
5757
5758         *c = calloc(num_cores, sizeof(struct core_data));
5759         if (*c == NULL)
5760                 goto error;
5761
5762         for (i = 0; i < num_cores; i++)
5763                 (*c)[i].core_id = -1;
5764
5765         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
5766         if (*p == NULL)
5767                 goto error;
5768
5769         for (i = 0; i < topo.num_packages; i++)
5770                 (*p)[i].package_id = i;
5771
5772         return;
5773 error:
5774         err(1, "calloc counters");
5775 }
5776 /*
5777  * init_counter()
5778  *
5779  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
5780  */
5781 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
5782         struct pkg_data *pkg_base, int cpu_id)
5783 {
5784         int pkg_id = cpus[cpu_id].physical_package_id;
5785         int node_id = cpus[cpu_id].logical_node_id;
5786         int core_id = cpus[cpu_id].physical_core_id;
5787         int thread_id = cpus[cpu_id].thread_id;
5788         struct thread_data *t;
5789         struct core_data *c;
5790         struct pkg_data *p;
5791
5792
5793         /* Workaround for systems where physical_node_id==-1
5794          * and logical_node_id==(-1 - topo.num_cpus)
5795          */
5796         if (node_id < 0)
5797                 node_id = 0;
5798
5799         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
5800         c = GET_CORE(core_base, core_id, node_id, pkg_id);
5801         p = GET_PKG(pkg_base, pkg_id);
5802
5803         t->cpu_id = cpu_id;
5804         if (thread_id == 0) {
5805                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
5806                 if (cpu_is_first_core_in_package(cpu_id))
5807                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
5808         }
5809
5810         c->core_id = core_id;
5811         p->package_id = pkg_id;
5812 }
5813
5814
5815 int initialize_counters(int cpu_id)
5816 {
5817         init_counter(EVEN_COUNTERS, cpu_id);
5818         init_counter(ODD_COUNTERS, cpu_id);
5819         return 0;
5820 }
5821
5822 void allocate_output_buffer()
5823 {
5824         output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
5825         outp = output_buffer;
5826         if (outp == NULL)
5827                 err(-1, "calloc output buffer");
5828 }
5829 void allocate_fd_percpu(void)
5830 {
5831         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5832         if (fd_percpu == NULL)
5833                 err(-1, "calloc fd_percpu");
5834 }
5835 void allocate_irq_buffers(void)
5836 {
5837         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
5838         if (irq_column_2_cpu == NULL)
5839                 err(-1, "calloc %d", topo.num_cpus);
5840
5841         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5842         if (irqs_per_cpu == NULL)
5843                 err(-1, "calloc %d", topo.max_cpu_num + 1);
5844 }
5845 void setup_all_buffers(void)
5846 {
5847         topology_probe();
5848         allocate_irq_buffers();
5849         allocate_fd_percpu();
5850         allocate_counters(&thread_even, &core_even, &package_even);
5851         allocate_counters(&thread_odd, &core_odd, &package_odd);
5852         allocate_output_buffer();
5853         for_all_proc_cpus(initialize_counters);
5854 }
5855
5856 void set_base_cpu(void)
5857 {
5858         base_cpu = sched_getcpu();
5859         if (base_cpu < 0)
5860                 err(-ENODEV, "No valid cpus found");
5861
5862         if (debug > 1)
5863                 fprintf(outf, "base_cpu = %d\n", base_cpu);
5864 }
5865
5866 void turbostat_init()
5867 {
5868         setup_all_buffers();
5869         set_base_cpu();
5870         check_dev_msr();
5871         check_permissions();
5872         process_cpuid();
5873         linux_perf_init();
5874
5875
5876         if (!quiet)
5877                 for_all_cpus(print_hwp, ODD_COUNTERS);
5878
5879         if (!quiet)
5880                 for_all_cpus(print_epb, ODD_COUNTERS);
5881
5882         if (!quiet)
5883                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
5884
5885         if (!quiet)
5886                 for_all_cpus(print_rapl, ODD_COUNTERS);
5887
5888         for_all_cpus(set_temperature_target, ODD_COUNTERS);
5889
5890         for_all_cpus(get_cpu_type, ODD_COUNTERS);
5891         for_all_cpus(get_cpu_type, EVEN_COUNTERS);
5892
5893         if (!quiet)
5894                 for_all_cpus(print_thermal, ODD_COUNTERS);
5895
5896         if (!quiet && do_irtl_snb)
5897                 print_irtl();
5898 }
5899
5900 int fork_it(char **argv)
5901 {
5902         pid_t child_pid;
5903         int status;
5904
5905         snapshot_proc_sysfs_files();
5906         status = for_all_cpus(get_counters, EVEN_COUNTERS);
5907         first_counter_read = 0;
5908         if (status)
5909                 exit(status);
5910         /* clear affinity side-effect of get_counters() */
5911         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5912         gettimeofday(&tv_even, (struct timezone *)NULL);
5913
5914         child_pid = fork();
5915         if (!child_pid) {
5916                 /* child */
5917                 execvp(argv[0], argv);
5918                 err(errno, "exec %s", argv[0]);
5919         } else {
5920
5921                 /* parent */
5922                 if (child_pid == -1)
5923                         err(1, "fork");
5924
5925                 signal(SIGINT, SIG_IGN);
5926                 signal(SIGQUIT, SIG_IGN);
5927                 if (waitpid(child_pid, &status, 0) == -1)
5928                         err(status, "waitpid");
5929
5930                 if (WIFEXITED(status))
5931                         status = WEXITSTATUS(status);
5932         }
5933         /*
5934          * n.b. fork_it() does not check for errors from for_all_cpus()
5935          * because re-starting is problematic when forking
5936          */
5937         snapshot_proc_sysfs_files();
5938         for_all_cpus(get_counters, ODD_COUNTERS);
5939         gettimeofday(&tv_odd, (struct timezone *)NULL);
5940         timersub(&tv_odd, &tv_even, &tv_delta);
5941         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5942                 fprintf(outf, "%s: Counter reset detected\n", progname);
5943         else {
5944                 compute_average(EVEN_COUNTERS);
5945                 format_all_counters(EVEN_COUNTERS);
5946         }
5947
5948         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5949
5950         flush_output_stderr();
5951
5952         return status;
5953 }
5954
5955 int get_and_dump_counters(void)
5956 {
5957         int status;
5958
5959         snapshot_proc_sysfs_files();
5960         status = for_all_cpus(get_counters, ODD_COUNTERS);
5961         if (status)
5962                 return status;
5963
5964         status = for_all_cpus(dump_counters, ODD_COUNTERS);
5965         if (status)
5966                 return status;
5967
5968         flush_output_stdout();
5969
5970         return status;
5971 }
5972
5973 void print_version() {
5974         fprintf(outf, "turbostat version 21.03.12"
5975                 " - Len Brown <lenb@kernel.org>\n");
5976 }
5977
5978 int add_counter(unsigned int msr_num, char *path, char *name,
5979         unsigned int width, enum counter_scope scope,
5980         enum counter_type type, enum counter_format format, int flags)
5981 {
5982         struct msr_counter *msrp;
5983
5984         msrp = calloc(1, sizeof(struct msr_counter));
5985         if (msrp == NULL) {
5986                 perror("calloc");
5987                 exit(1);
5988         }
5989
5990         msrp->msr_num = msr_num;
5991         strncpy(msrp->name, name, NAME_BYTES - 1);
5992         if (path)
5993                 strncpy(msrp->path, path, PATH_BYTES - 1);
5994         msrp->width = width;
5995         msrp->type = type;
5996         msrp->format = format;
5997         msrp->flags = flags;
5998
5999         switch (scope) {
6000
6001         case SCOPE_CPU:
6002                 msrp->next = sys.tp;
6003                 sys.tp = msrp;
6004                 sys.added_thread_counters++;
6005                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
6006                         fprintf(stderr, "exceeded max %d added thread counters\n",
6007                                 MAX_ADDED_COUNTERS);
6008                         exit(-1);
6009                 }
6010                 break;
6011
6012         case SCOPE_CORE:
6013                 msrp->next = sys.cp;
6014                 sys.cp = msrp;
6015                 sys.added_core_counters++;
6016                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
6017                         fprintf(stderr, "exceeded max %d added core counters\n",
6018                                 MAX_ADDED_COUNTERS);
6019                         exit(-1);
6020                 }
6021                 break;
6022
6023         case SCOPE_PACKAGE:
6024                 msrp->next = sys.pp;
6025                 sys.pp = msrp;
6026                 sys.added_package_counters++;
6027                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
6028                         fprintf(stderr, "exceeded max %d added package counters\n",
6029                                 MAX_ADDED_COUNTERS);
6030                         exit(-1);
6031                 }
6032                 break;
6033         }
6034
6035         return 0;
6036 }
6037
6038 void parse_add_command(char *add_command)
6039 {
6040         int msr_num = 0;
6041         char *path = NULL;
6042         char name_buffer[NAME_BYTES] = "";
6043         int width = 64;
6044         int fail = 0;
6045         enum counter_scope scope = SCOPE_CPU;
6046         enum counter_type type = COUNTER_CYCLES;
6047         enum counter_format format = FORMAT_DELTA;
6048
6049         while (add_command) {
6050
6051                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
6052                         goto next;
6053
6054                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
6055                         goto next;
6056
6057                 if (*add_command == '/') {
6058                         path = add_command;
6059                         goto next;
6060                 }
6061
6062                 if (sscanf(add_command, "u%d", &width) == 1) {
6063                         if ((width == 32) || (width == 64))
6064                                 goto next;
6065                         width = 64;
6066                 }
6067                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
6068                         scope = SCOPE_CPU;
6069                         goto next;
6070                 }
6071                 if (!strncmp(add_command, "core", strlen("core"))) {
6072                         scope = SCOPE_CORE;
6073                         goto next;
6074                 }
6075                 if (!strncmp(add_command, "package", strlen("package"))) {
6076                         scope = SCOPE_PACKAGE;
6077                         goto next;
6078                 }
6079                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
6080                         type = COUNTER_CYCLES;
6081                         goto next;
6082                 }
6083                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
6084                         type = COUNTER_SECONDS;
6085                         goto next;
6086                 }
6087                 if (!strncmp(add_command, "usec", strlen("usec"))) {
6088                         type = COUNTER_USEC;
6089                         goto next;
6090                 }
6091                 if (!strncmp(add_command, "raw", strlen("raw"))) {
6092                         format = FORMAT_RAW;
6093                         goto next;
6094                 }
6095                 if (!strncmp(add_command, "delta", strlen("delta"))) {
6096                         format = FORMAT_DELTA;
6097                         goto next;
6098                 }
6099                 if (!strncmp(add_command, "percent", strlen("percent"))) {
6100                         format = FORMAT_PERCENT;
6101                         goto next;
6102                 }
6103
6104                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
6105                         char *eos;
6106
6107                         eos = strchr(name_buffer, ',');
6108                         if (eos)
6109                                 *eos = '\0';
6110                         goto next;
6111                 }
6112
6113 next:
6114                 add_command = strchr(add_command, ',');
6115                 if (add_command) {
6116                         *add_command = '\0';
6117                         add_command++;
6118                 }
6119
6120         }
6121         if ((msr_num == 0) && (path == NULL)) {
6122                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
6123                 fail++;
6124         }
6125
6126         /* generate default column header */
6127         if (*name_buffer == '\0') {
6128                 if (width == 32)
6129                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6130                 else
6131                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6132         }
6133
6134         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
6135                 fail++;
6136
6137         if (fail) {
6138                 help();
6139                 exit(1);
6140         }
6141 }
6142
6143 int is_deferred_skip(char *name)
6144 {
6145         int i;
6146
6147         for (i = 0; i < deferred_skip_index; ++i)
6148                 if (!strcmp(name, deferred_skip_names[i]))
6149                         return 1;
6150         return 0;
6151 }
6152
6153 void probe_sysfs(void)
6154 {
6155         char path[64];
6156         char name_buf[16];
6157         FILE *input;
6158         int state;
6159         char *sp;
6160
6161         if (!DO_BIC(BIC_sysfs))
6162                 return;
6163
6164         for (state = 10; state >= 0; --state) {
6165
6166                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
6167                         base_cpu, state);
6168                 input = fopen(path, "r");
6169                 if (input == NULL)
6170                         continue;
6171                 if (!fgets(name_buf, sizeof(name_buf), input))
6172                         err(1, "%s: failed to read file", path);
6173
6174                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6175                 sp = strchr(name_buf, '-');
6176                 if (!sp)
6177                         sp = strchrnul(name_buf, '\n');
6178                 *sp = '%';
6179                 *(sp + 1) = '\0';
6180
6181                 remove_underbar(name_buf);
6182
6183                 fclose(input);
6184
6185                 sprintf(path, "cpuidle/state%d/time", state);
6186
6187                 if (is_deferred_skip(name_buf))
6188                         continue;
6189
6190                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
6191                                 FORMAT_PERCENT, SYSFS_PERCPU);
6192         }
6193
6194         for (state = 10; state >= 0; --state) {
6195
6196                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
6197                         base_cpu, state);
6198                 input = fopen(path, "r");
6199                 if (input == NULL)
6200                         continue;
6201                 if (!fgets(name_buf, sizeof(name_buf), input))
6202                         err(1, "%s: failed to read file", path);
6203                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6204                 sp = strchr(name_buf, '-');
6205                 if (!sp)
6206                         sp = strchrnul(name_buf, '\n');
6207                 *sp = '\0';
6208                 fclose(input);
6209
6210                 remove_underbar(name_buf);
6211
6212                 sprintf(path, "cpuidle/state%d/usage", state);
6213
6214                 if (is_deferred_skip(name_buf))
6215                         continue;
6216
6217                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
6218                                 FORMAT_DELTA, SYSFS_PERCPU);
6219         }
6220
6221 }
6222
6223
6224 /*
6225  * parse cpuset with following syntax
6226  * 1,2,4..6,8-10 and set bits in cpu_subset
6227  */
6228 void parse_cpu_command(char *optarg)
6229 {
6230         unsigned int start, end;
6231         char *next;
6232
6233         if (!strcmp(optarg, "core")) {
6234                 if (cpu_subset)
6235                         goto error;
6236                 show_core_only++;
6237                 return;
6238         }
6239         if (!strcmp(optarg, "package")) {
6240                 if (cpu_subset)
6241                         goto error;
6242                 show_pkg_only++;
6243                 return;
6244         }
6245         if (show_core_only || show_pkg_only)
6246                 goto error;
6247
6248         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
6249         if (cpu_subset == NULL)
6250                 err(3, "CPU_ALLOC");
6251         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
6252
6253         CPU_ZERO_S(cpu_subset_size, cpu_subset);
6254
6255         next = optarg;
6256
6257         while (next && *next) {
6258
6259                 if (*next == '-')       /* no negative cpu numbers */
6260                         goto error;
6261
6262                 start = strtoul(next, &next, 10);
6263
6264                 if (start >= CPU_SUBSET_MAXCPUS)
6265                         goto error;
6266                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
6267
6268                 if (*next == '\0')
6269                         break;
6270
6271                 if (*next == ',') {
6272                         next += 1;
6273                         continue;
6274                 }
6275
6276                 if (*next == '-') {
6277                         next += 1;      /* start range */
6278                 } else if (*next == '.') {
6279                         next += 1;
6280                         if (*next == '.')
6281                                 next += 1;      /* start range */
6282                         else
6283                                 goto error;
6284                 }
6285
6286                 end = strtoul(next, &next, 10);
6287                 if (end <= start)
6288                         goto error;
6289
6290                 while (++start <= end) {
6291                         if (start >= CPU_SUBSET_MAXCPUS)
6292                                 goto error;
6293                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
6294                 }
6295
6296                 if (*next == ',')
6297                         next += 1;
6298                 else if (*next != '\0')
6299                         goto error;
6300         }
6301
6302         return;
6303
6304 error:
6305         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
6306         help();
6307         exit(-1);
6308 }
6309
6310
6311 void cmdline(int argc, char **argv)
6312 {
6313         int opt;
6314         int option_index = 0;
6315         static struct option long_options[] = {
6316                 {"add",         required_argument,      0, 'a'},
6317                 {"cpu",         required_argument,      0, 'c'},
6318                 {"Dump",        no_argument,            0, 'D'},
6319                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
6320                 {"enable",      required_argument,      0, 'e'},
6321                 {"interval",    required_argument,      0, 'i'},
6322                 {"IPC", no_argument,                    0, 'I'},
6323                 {"num_iterations",      required_argument,      0, 'n'},
6324                 {"help",        no_argument,            0, 'h'},
6325                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
6326                 {"Joules",      no_argument,            0, 'J'},
6327                 {"list",        no_argument,            0, 'l'},
6328                 {"out",         required_argument,      0, 'o'},
6329                 {"quiet",       no_argument,            0, 'q'},
6330                 {"show",        required_argument,      0, 's'},
6331                 {"Summary",     no_argument,            0, 'S'},
6332                 {"TCC",         required_argument,      0, 'T'},
6333                 {"version",     no_argument,            0, 'v' },
6334                 {0,             0,                      0,  0 }
6335         };
6336
6337         progname = argv[0];
6338
6339         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
6340                                 long_options, &option_index)) != -1) {
6341                 switch (opt) {
6342                 case 'a':
6343                         parse_add_command(optarg);
6344                         break;
6345                 case 'c':
6346                         parse_cpu_command(optarg);
6347                         break;
6348                 case 'D':
6349                         dump_only++;
6350                         break;
6351                 case 'e':
6352                         /* --enable specified counter */
6353                         bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
6354                         break;
6355                 case 'd':
6356                         debug++;
6357                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6358                         break;
6359                 case 'H':
6360                         /*
6361                          * --hide: do not show those specified
6362                          *  multiple invocations simply clear more bits in enabled mask
6363                          */
6364                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
6365                         break;
6366                 case 'h':
6367                 default:
6368                         help();
6369                         exit(1);
6370                 case 'i':
6371                         {
6372                                 double interval = strtod(optarg, NULL);
6373
6374                                 if (interval < 0.001) {
6375                                         fprintf(outf, "interval %f seconds is too small\n",
6376                                                 interval);
6377                                         exit(2);
6378                                 }
6379
6380                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
6381                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
6382                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
6383                         }
6384                         break;
6385                 case 'J':
6386                         rapl_joules++;
6387                         break;
6388                 case 'l':
6389                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6390                         list_header_only++;
6391                         quiet++;
6392                         break;
6393                 case 'o':
6394                         outf = fopen_or_die(optarg, "w");
6395                         break;
6396                 case 'q':
6397                         quiet = 1;
6398                         break;
6399                 case 'n':
6400                         num_iterations = strtod(optarg, NULL);
6401
6402                         if (num_iterations <= 0) {
6403                                 fprintf(outf, "iterations %d should be positive number\n",
6404                                         num_iterations);
6405                                 exit(2);
6406                         }
6407                         break;
6408                 case 's':
6409                         /*
6410                          * --show: show only those specified
6411                          *  The 1st invocation will clear and replace the enabled mask
6412                          *  subsequent invocations can add to it.
6413                          */
6414                         if (shown == 0)
6415                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
6416                         else
6417                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
6418                         shown = 1;
6419                         break;
6420                 case 'S':
6421                         summary_only++;
6422                         break;
6423                 case 'T':
6424                         tcc_activation_temp_override = atoi(optarg);
6425                         break;
6426                 case 'v':
6427                         print_version();
6428                         exit(0);
6429                         break;
6430                 }
6431         }
6432 }
6433
6434 int main(int argc, char **argv)
6435 {
6436         outf = stderr;
6437         cmdline(argc, argv);
6438
6439         if (!quiet)
6440                 print_version();
6441
6442         probe_sysfs();
6443
6444         turbostat_init();
6445
6446         /* dump counters and exit */
6447         if (dump_only)
6448                 return get_and_dump_counters();
6449
6450         /* list header and exit */
6451         if (list_header_only) {
6452                 print_header(",");
6453                 flush_output_stdout();
6454                 return 0;
6455         }
6456
6457         msr_sum_record();
6458         /*
6459          * if any params left, it must be a command to fork
6460          */
6461         if (argc - optind)
6462                 return fork_it(argv + optind);
6463         else
6464                 turbostat_loop();
6465
6466         return 0;
6467 }