tools/power turbostat: Add Ice Lake NNPI support
[linux-2.6-microblaze.git] / tools / power / x86 / turbostat / turbostat.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * turbostat -- show CPU frequency and C-state residency
4  * on modern Intel and AMD processors.
5  *
6  * Copyright (c) 2013 Intel Corporation.
7  * Len Brown <len.brown@intel.com>
8  */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <linux/capability.h>
34 #include <errno.h>
35 #include <math.h>
36
37 char *proc_stat = "/proc/stat";
38 FILE *outf;
39 int *fd_percpu;
40 struct timeval interval_tv = {5, 0};
41 struct timespec interval_ts = {5, 0};
42 struct timespec one_msec = {0, 1000000};
43 unsigned int num_iterations;
44 unsigned int debug;
45 unsigned int quiet;
46 unsigned int shown;
47 unsigned int sums_need_wide_columns;
48 unsigned int rapl_joules;
49 unsigned int summary_only;
50 unsigned int list_header_only;
51 unsigned int dump_only;
52 unsigned int do_snb_cstates;
53 unsigned int do_knl_cstates;
54 unsigned int do_slm_cstates;
55 unsigned int use_c1_residency_msr;
56 unsigned int has_aperf;
57 unsigned int has_epb;
58 unsigned int do_irtl_snb;
59 unsigned int do_irtl_hsw;
60 unsigned int units = 1000000;   /* MHz etc */
61 unsigned int genuine_intel;
62 unsigned int authentic_amd;
63 unsigned int max_level, max_extended_level;
64 unsigned int has_invariant_tsc;
65 unsigned int do_nhm_platform_info;
66 unsigned int no_MSR_MISC_PWR_MGMT;
67 unsigned int aperf_mperf_multiplier = 1;
68 double bclk;
69 double base_hz;
70 unsigned int has_base_hz;
71 double tsc_tweak = 1.0;
72 unsigned int show_pkg_only;
73 unsigned int show_core_only;
74 char *output_buffer, *outp;
75 unsigned int do_rapl;
76 unsigned int do_dts;
77 unsigned int do_ptm;
78 unsigned long long  gfx_cur_rc6_ms;
79 unsigned long long cpuidle_cur_cpu_lpi_us;
80 unsigned long long cpuidle_cur_sys_lpi_us;
81 unsigned int gfx_cur_mhz;
82 unsigned int tcc_activation_temp;
83 unsigned int tcc_activation_temp_override;
84 double rapl_power_units, rapl_time_units;
85 double rapl_dram_energy_units, rapl_energy_units;
86 double rapl_joule_counter_range;
87 unsigned int do_core_perf_limit_reasons;
88 unsigned int has_automatic_cstate_conversion;
89 unsigned int do_gfx_perf_limit_reasons;
90 unsigned int do_ring_perf_limit_reasons;
91 unsigned int crystal_hz;
92 unsigned long long tsc_hz;
93 int base_cpu;
94 double discover_bclk(unsigned int family, unsigned int model);
95 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
96                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
97 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
98 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
99 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
100 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
101 unsigned int has_misc_feature_control;
102 unsigned int first_counter_read = 1;
103
104 #define RAPL_PKG                (1 << 0)
105                                         /* 0x610 MSR_PKG_POWER_LIMIT */
106                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
107 #define RAPL_PKG_PERF_STATUS    (1 << 1)
108                                         /* 0x613 MSR_PKG_PERF_STATUS */
109 #define RAPL_PKG_POWER_INFO     (1 << 2)
110                                         /* 0x614 MSR_PKG_POWER_INFO */
111
112 #define RAPL_DRAM               (1 << 3)
113                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
114                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
115 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
116                                         /* 0x61b MSR_DRAM_PERF_STATUS */
117 #define RAPL_DRAM_POWER_INFO    (1 << 5)
118                                         /* 0x61c MSR_DRAM_POWER_INFO */
119
120 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
121                                         /* 0x638 MSR_PP0_POWER_LIMIT */
122 #define RAPL_CORE_POLICY        (1 << 7)
123                                         /* 0x63a MSR_PP0_POLICY */
124
125 #define RAPL_GFX                (1 << 8)
126                                         /* 0x640 MSR_PP1_POWER_LIMIT */
127                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
128                                         /* 0x642 MSR_PP1_POLICY */
129
130 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
131                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
132 #define RAPL_PER_CORE_ENERGY    (1 << 10)
133                                         /* Indicates cores energy collection is per-core,
134                                          * not per-package. */
135 #define RAPL_AMD_F17H           (1 << 11)
136                                         /* 0xc0010299 MSR_RAPL_PWR_UNIT */
137                                         /* 0xc001029a MSR_CORE_ENERGY_STAT */
138                                         /* 0xc001029b MSR_PKG_ENERGY_STAT */
139 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
140 #define TJMAX_DEFAULT   100
141
142 /* MSRs that are not yet in the kernel-provided header. */
143 #define MSR_RAPL_PWR_UNIT       0xc0010299
144 #define MSR_CORE_ENERGY_STAT    0xc001029a
145 #define MSR_PKG_ENERGY_STAT     0xc001029b
146
147 #define MAX(a, b) ((a) > (b) ? (a) : (b))
148
149 /*
150  * buffer size used by sscanf() for added column names
151  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
152  */
153 #define NAME_BYTES 20
154 #define PATH_BYTES 128
155
156 int backwards_count;
157 char *progname;
158
159 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
160 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
161 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
162 #define MAX_ADDED_COUNTERS 8
163 #define MAX_ADDED_THREAD_COUNTERS 24
164 #define BITMASK_SIZE 32
165
166 struct thread_data {
167         struct timeval tv_begin;
168         struct timeval tv_end;
169         struct timeval tv_delta;
170         unsigned long long tsc;
171         unsigned long long aperf;
172         unsigned long long mperf;
173         unsigned long long c1;
174         unsigned long long  irq_count;
175         unsigned int smi_count;
176         unsigned int cpu_id;
177         unsigned int apic_id;
178         unsigned int x2apic_id;
179         unsigned int flags;
180 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
181 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
182         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
183 } *thread_even, *thread_odd;
184
185 struct core_data {
186         unsigned long long c3;
187         unsigned long long c6;
188         unsigned long long c7;
189         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
190         unsigned int core_temp_c;
191         unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
192         unsigned int core_id;
193         unsigned long long counter[MAX_ADDED_COUNTERS];
194 } *core_even, *core_odd;
195
196 struct pkg_data {
197         unsigned long long pc2;
198         unsigned long long pc3;
199         unsigned long long pc6;
200         unsigned long long pc7;
201         unsigned long long pc8;
202         unsigned long long pc9;
203         unsigned long long pc10;
204         unsigned long long cpu_lpi;
205         unsigned long long sys_lpi;
206         unsigned long long pkg_wtd_core_c0;
207         unsigned long long pkg_any_core_c0;
208         unsigned long long pkg_any_gfxe_c0;
209         unsigned long long pkg_both_core_gfxe_c0;
210         long long gfx_rc6_ms;
211         unsigned int gfx_mhz;
212         unsigned int package_id;
213         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
214         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
215         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
216         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
217         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
218         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
219         unsigned int pkg_temp_c;
220         unsigned long long counter[MAX_ADDED_COUNTERS];
221 } *package_even, *package_odd;
222
223 #define ODD_COUNTERS thread_odd, core_odd, package_odd
224 #define EVEN_COUNTERS thread_even, core_even, package_even
225
226 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
227         ((thread_base) +                                                      \
228          ((pkg_no) *                                                          \
229           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
230          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
231          ((core_no) * topo.threads_per_core) +                                \
232          (thread_no))
233
234 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
235         ((core_base) +                                                  \
236          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
237          ((node_no) * topo.cores_per_node) +                            \
238          (core_no))
239
240
241 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
242
243 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
244 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
245 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
246
247 struct msr_counter {
248         unsigned int msr_num;
249         char name[NAME_BYTES];
250         char path[PATH_BYTES];
251         unsigned int width;
252         enum counter_type type;
253         enum counter_format format;
254         struct msr_counter *next;
255         unsigned int flags;
256 #define FLAGS_HIDE      (1 << 0)
257 #define FLAGS_SHOW      (1 << 1)
258 #define SYSFS_PERCPU    (1 << 1)
259 };
260
261 struct sys_counters {
262         unsigned int added_thread_counters;
263         unsigned int added_core_counters;
264         unsigned int added_package_counters;
265         struct msr_counter *tp;
266         struct msr_counter *cp;
267         struct msr_counter *pp;
268 } sys;
269
270 struct system_summary {
271         struct thread_data threads;
272         struct core_data cores;
273         struct pkg_data packages;
274 } average;
275
276 struct cpu_topology {
277         int physical_package_id;
278         int die_id;
279         int logical_cpu_id;
280         int physical_node_id;
281         int logical_node_id;    /* 0-based count within the package */
282         int physical_core_id;
283         int thread_id;
284         cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
285 } *cpus;
286
287 struct topo_params {
288         int num_packages;
289         int num_die;
290         int num_cpus;
291         int num_cores;
292         int max_cpu_num;
293         int max_node_num;
294         int nodes_per_pkg;
295         int cores_per_node;
296         int threads_per_core;
297 } topo;
298
299 struct timeval tv_even, tv_odd, tv_delta;
300
301 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
302 int *irqs_per_cpu;              /* indexed by cpu_num */
303
304 void setup_all_buffers(void);
305
306 int cpu_is_not_present(int cpu)
307 {
308         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
309 }
310 /*
311  * run func(thread, core, package) in topology order
312  * skip non-present cpus
313  */
314
315 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
316         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
317 {
318         int retval, pkg_no, core_no, thread_no, node_no;
319
320         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
321                 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
322                         for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
323                                 for (thread_no = 0; thread_no <
324                                         topo.threads_per_core; ++thread_no) {
325                                         struct thread_data *t;
326                                         struct core_data *c;
327                                         struct pkg_data *p;
328
329                                         t = GET_THREAD(thread_base, thread_no,
330                                                        core_no, node_no,
331                                                        pkg_no);
332
333                                         if (cpu_is_not_present(t->cpu_id))
334                                                 continue;
335
336                                         c = GET_CORE(core_base, core_no,
337                                                      node_no, pkg_no);
338                                         p = GET_PKG(pkg_base, pkg_no);
339
340                                         retval = func(t, c, p);
341                                         if (retval)
342                                                 return retval;
343                                 }
344                         }
345                 }
346         }
347         return 0;
348 }
349
350 int cpu_migrate(int cpu)
351 {
352         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
353         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
354         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
355                 return -1;
356         else
357                 return 0;
358 }
359 int get_msr_fd(int cpu)
360 {
361         char pathname[32];
362         int fd;
363
364         fd = fd_percpu[cpu];
365
366         if (fd)
367                 return fd;
368
369         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
370         fd = open(pathname, O_RDONLY);
371         if (fd < 0)
372                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
373
374         fd_percpu[cpu] = fd;
375
376         return fd;
377 }
378
379 int get_msr(int cpu, off_t offset, unsigned long long *msr)
380 {
381         ssize_t retval;
382
383         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
384
385         if (retval != sizeof *msr)
386                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
387
388         return 0;
389 }
390
391 /*
392  * This list matches the column headers, except
393  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
394  * 2. Core and CPU are moved to the end, we can't have strings that contain them
395  *    matching on them for --show and --hide.
396  */
397 struct msr_counter bic[] = {
398         { 0x0, "usec" },
399         { 0x0, "Time_Of_Day_Seconds" },
400         { 0x0, "Package" },
401         { 0x0, "Node" },
402         { 0x0, "Avg_MHz" },
403         { 0x0, "Busy%" },
404         { 0x0, "Bzy_MHz" },
405         { 0x0, "TSC_MHz" },
406         { 0x0, "IRQ" },
407         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
408         { 0x0, "sysfs" },
409         { 0x0, "CPU%c1" },
410         { 0x0, "CPU%c3" },
411         { 0x0, "CPU%c6" },
412         { 0x0, "CPU%c7" },
413         { 0x0, "ThreadC" },
414         { 0x0, "CoreTmp" },
415         { 0x0, "CoreCnt" },
416         { 0x0, "PkgTmp" },
417         { 0x0, "GFX%rc6" },
418         { 0x0, "GFXMHz" },
419         { 0x0, "Pkg%pc2" },
420         { 0x0, "Pkg%pc3" },
421         { 0x0, "Pkg%pc6" },
422         { 0x0, "Pkg%pc7" },
423         { 0x0, "Pkg%pc8" },
424         { 0x0, "Pkg%pc9" },
425         { 0x0, "Pk%pc10" },
426         { 0x0, "CPU%LPI" },
427         { 0x0, "SYS%LPI" },
428         { 0x0, "PkgWatt" },
429         { 0x0, "CorWatt" },
430         { 0x0, "GFXWatt" },
431         { 0x0, "PkgCnt" },
432         { 0x0, "RAMWatt" },
433         { 0x0, "PKG_%" },
434         { 0x0, "RAM_%" },
435         { 0x0, "Pkg_J" },
436         { 0x0, "Cor_J" },
437         { 0x0, "GFX_J" },
438         { 0x0, "RAM_J" },
439         { 0x0, "Mod%c6" },
440         { 0x0, "Totl%C0" },
441         { 0x0, "Any%C0" },
442         { 0x0, "GFX%C0" },
443         { 0x0, "CPUGFX%" },
444         { 0x0, "Core" },
445         { 0x0, "CPU" },
446         { 0x0, "APIC" },
447         { 0x0, "X2APIC" },
448         { 0x0, "Die" },
449 };
450
451 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
452 #define BIC_USEC        (1ULL << 0)
453 #define BIC_TOD         (1ULL << 1)
454 #define BIC_Package     (1ULL << 2)
455 #define BIC_Node        (1ULL << 3)
456 #define BIC_Avg_MHz     (1ULL << 4)
457 #define BIC_Busy        (1ULL << 5)
458 #define BIC_Bzy_MHz     (1ULL << 6)
459 #define BIC_TSC_MHz     (1ULL << 7)
460 #define BIC_IRQ         (1ULL << 8)
461 #define BIC_SMI         (1ULL << 9)
462 #define BIC_sysfs       (1ULL << 10)
463 #define BIC_CPU_c1      (1ULL << 11)
464 #define BIC_CPU_c3      (1ULL << 12)
465 #define BIC_CPU_c6      (1ULL << 13)
466 #define BIC_CPU_c7      (1ULL << 14)
467 #define BIC_ThreadC     (1ULL << 15)
468 #define BIC_CoreTmp     (1ULL << 16)
469 #define BIC_CoreCnt     (1ULL << 17)
470 #define BIC_PkgTmp      (1ULL << 18)
471 #define BIC_GFX_rc6     (1ULL << 19)
472 #define BIC_GFXMHz      (1ULL << 20)
473 #define BIC_Pkgpc2      (1ULL << 21)
474 #define BIC_Pkgpc3      (1ULL << 22)
475 #define BIC_Pkgpc6      (1ULL << 23)
476 #define BIC_Pkgpc7      (1ULL << 24)
477 #define BIC_Pkgpc8      (1ULL << 25)
478 #define BIC_Pkgpc9      (1ULL << 26)
479 #define BIC_Pkgpc10     (1ULL << 27)
480 #define BIC_CPU_LPI     (1ULL << 28)
481 #define BIC_SYS_LPI     (1ULL << 29)
482 #define BIC_PkgWatt     (1ULL << 30)
483 #define BIC_CorWatt     (1ULL << 31)
484 #define BIC_GFXWatt     (1ULL << 32)
485 #define BIC_PkgCnt      (1ULL << 33)
486 #define BIC_RAMWatt     (1ULL << 34)
487 #define BIC_PKG__       (1ULL << 35)
488 #define BIC_RAM__       (1ULL << 36)
489 #define BIC_Pkg_J       (1ULL << 37)
490 #define BIC_Cor_J       (1ULL << 38)
491 #define BIC_GFX_J       (1ULL << 39)
492 #define BIC_RAM_J       (1ULL << 40)
493 #define BIC_Mod_c6      (1ULL << 41)
494 #define BIC_Totl_c0     (1ULL << 42)
495 #define BIC_Any_c0      (1ULL << 43)
496 #define BIC_GFX_c0      (1ULL << 44)
497 #define BIC_CPUGFX      (1ULL << 45)
498 #define BIC_Core        (1ULL << 46)
499 #define BIC_CPU         (1ULL << 47)
500 #define BIC_APIC        (1ULL << 48)
501 #define BIC_X2APIC      (1ULL << 49)
502 #define BIC_Die         (1ULL << 50)
503
504 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
505
506 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
507 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
508
509 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
510 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
511 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
512 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
513
514
515 #define MAX_DEFERRED 16
516 char *deferred_skip_names[MAX_DEFERRED];
517 int deferred_skip_index;
518
519 /*
520  * HIDE_LIST - hide this list of counters, show the rest [default]
521  * SHOW_LIST - show this list of counters, hide the rest
522  */
523 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
524
525 void help(void)
526 {
527         fprintf(outf,
528         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
529         "\n"
530         "Turbostat forks the specified COMMAND and prints statistics\n"
531         "when COMMAND completes.\n"
532         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
533         "to print statistics, until interrupted.\n"
534         "  -a, --add    add a counter\n"
535         "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
536         "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
537         "                 {core | package | j,k,l..m,n-p }\n"
538         "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
539         "  -D, --Dump   displays the raw counter values\n"
540         "  -e, --enable [all | column]\n"
541         "               shows all or the specified disabled column\n"
542         "  -H, --hide [column|column,column,...]\n"
543         "               hide the specified column(s)\n"
544         "  -i, --interval sec.subsec\n"
545         "               Override default 5-second measurement interval\n"
546         "  -J, --Joules displays energy in Joules instead of Watts\n"
547         "  -l, --list   list column headers only\n"
548         "  -n, --num_iterations num\n"
549         "               number of the measurement iterations\n"
550         "  -o, --out file\n"
551         "               create or truncate \"file\" for all output\n"
552         "  -q, --quiet  skip decoding system configuration header\n"
553         "  -s, --show [column|column,column,...]\n"
554         "               show only the specified column(s)\n"
555         "  -S, --Summary\n"
556         "               limits output to 1-line system summary per interval\n"
557         "  -T, --TCC temperature\n"
558         "               sets the Thermal Control Circuit temperature in\n"
559         "                 degrees Celsius\n"
560         "  -h, --help   print this help message\n"
561         "  -v, --version        print version information\n"
562         "\n"
563         "For more help, run \"man turbostat\"\n");
564 }
565
566 /*
567  * bic_lookup
568  * for all the strings in comma separate name_list,
569  * set the approprate bit in return value.
570  */
571 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
572 {
573         int i;
574         unsigned long long retval = 0;
575
576         while (name_list) {
577                 char *comma;
578
579                 comma = strchr(name_list, ',');
580
581                 if (comma)
582                         *comma = '\0';
583
584                 if (!strcmp(name_list, "all"))
585                         return ~0;
586
587                 for (i = 0; i < MAX_BIC; ++i) {
588                         if (!strcmp(name_list, bic[i].name)) {
589                                 retval |= (1ULL << i);
590                                 break;
591                         }
592                 }
593                 if (i == MAX_BIC) {
594                         if (mode == SHOW_LIST) {
595                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
596                                 exit(-1);
597                         }
598                         deferred_skip_names[deferred_skip_index++] = name_list;
599                         if (debug)
600                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
601                         if (deferred_skip_index >= MAX_DEFERRED) {
602                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
603                                         MAX_DEFERRED, name_list);
604                                 help();
605                                 exit(1);
606                         }
607                 }
608
609                 name_list = comma;
610                 if (name_list)
611                         name_list++;
612
613         }
614         return retval;
615 }
616
617
618 void print_header(char *delim)
619 {
620         struct msr_counter *mp;
621         int printed = 0;
622
623         if (DO_BIC(BIC_USEC))
624                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
625         if (DO_BIC(BIC_TOD))
626                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
627         if (DO_BIC(BIC_Package))
628                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
629         if (DO_BIC(BIC_Die))
630                 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
631         if (DO_BIC(BIC_Node))
632                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
633         if (DO_BIC(BIC_Core))
634                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
635         if (DO_BIC(BIC_CPU))
636                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
637         if (DO_BIC(BIC_APIC))
638                 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
639         if (DO_BIC(BIC_X2APIC))
640                 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
641         if (DO_BIC(BIC_Avg_MHz))
642                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
643         if (DO_BIC(BIC_Busy))
644                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
645         if (DO_BIC(BIC_Bzy_MHz))
646                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
647         if (DO_BIC(BIC_TSC_MHz))
648                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
649
650         if (DO_BIC(BIC_IRQ)) {
651                 if (sums_need_wide_columns)
652                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
653                 else
654                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
655         }
656
657         if (DO_BIC(BIC_SMI))
658                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
659
660         for (mp = sys.tp; mp; mp = mp->next) {
661
662                 if (mp->format == FORMAT_RAW) {
663                         if (mp->width == 64)
664                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
665                         else
666                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
667                 } else {
668                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
669                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
670                         else
671                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
672                 }
673         }
674
675         if (DO_BIC(BIC_CPU_c1))
676                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
677         if (DO_BIC(BIC_CPU_c3))
678                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
679         if (DO_BIC(BIC_CPU_c6))
680                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
681         if (DO_BIC(BIC_CPU_c7))
682                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
683
684         if (DO_BIC(BIC_Mod_c6))
685                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
686
687         if (DO_BIC(BIC_CoreTmp))
688                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
689
690         if (do_rapl && !rapl_joules) {
691                 if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
692                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
693         } else if (do_rapl && rapl_joules) {
694                 if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
695                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
696         }
697
698         for (mp = sys.cp; mp; mp = mp->next) {
699                 if (mp->format == FORMAT_RAW) {
700                         if (mp->width == 64)
701                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
702                         else
703                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
704                 } else {
705                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
706                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
707                         else
708                                 outp += sprintf(outp, "%s%s", delim, mp->name);
709                 }
710         }
711
712         if (DO_BIC(BIC_PkgTmp))
713                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
714
715         if (DO_BIC(BIC_GFX_rc6))
716                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
717
718         if (DO_BIC(BIC_GFXMHz))
719                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
720
721         if (DO_BIC(BIC_Totl_c0))
722                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
723         if (DO_BIC(BIC_Any_c0))
724                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
725         if (DO_BIC(BIC_GFX_c0))
726                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
727         if (DO_BIC(BIC_CPUGFX))
728                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
729
730         if (DO_BIC(BIC_Pkgpc2))
731                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
732         if (DO_BIC(BIC_Pkgpc3))
733                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
734         if (DO_BIC(BIC_Pkgpc6))
735                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
736         if (DO_BIC(BIC_Pkgpc7))
737                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
738         if (DO_BIC(BIC_Pkgpc8))
739                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
740         if (DO_BIC(BIC_Pkgpc9))
741                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
742         if (DO_BIC(BIC_Pkgpc10))
743                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
744         if (DO_BIC(BIC_CPU_LPI))
745                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
746         if (DO_BIC(BIC_SYS_LPI))
747                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
748
749         if (do_rapl && !rapl_joules) {
750                 if (DO_BIC(BIC_PkgWatt))
751                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
752                 if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
753                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
754                 if (DO_BIC(BIC_GFXWatt))
755                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
756                 if (DO_BIC(BIC_RAMWatt))
757                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
758                 if (DO_BIC(BIC_PKG__))
759                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
760                 if (DO_BIC(BIC_RAM__))
761                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
762         } else if (do_rapl && rapl_joules) {
763                 if (DO_BIC(BIC_Pkg_J))
764                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
765                 if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
766                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
767                 if (DO_BIC(BIC_GFX_J))
768                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
769                 if (DO_BIC(BIC_RAM_J))
770                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
771                 if (DO_BIC(BIC_PKG__))
772                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
773                 if (DO_BIC(BIC_RAM__))
774                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
775         }
776         for (mp = sys.pp; mp; mp = mp->next) {
777                 if (mp->format == FORMAT_RAW) {
778                         if (mp->width == 64)
779                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
780                         else
781                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
782                 } else {
783                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
784                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
785                         else
786                                 outp += sprintf(outp, "%s%s", delim, mp->name);
787                 }
788         }
789
790         outp += sprintf(outp, "\n");
791 }
792
793 int dump_counters(struct thread_data *t, struct core_data *c,
794         struct pkg_data *p)
795 {
796         int i;
797         struct msr_counter *mp;
798
799         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
800
801         if (t) {
802                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
803                         t->cpu_id, t->flags);
804                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
805                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
806                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
807                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
808
809                 if (DO_BIC(BIC_IRQ))
810                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
811                 if (DO_BIC(BIC_SMI))
812                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
813
814                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
815                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
816                                 i, mp->msr_num, t->counter[i]);
817                 }
818         }
819
820         if (c) {
821                 outp += sprintf(outp, "core: %d\n", c->core_id);
822                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
823                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
824                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
825                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
826                 outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
827
828                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
829                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
830                                 i, mp->msr_num, c->counter[i]);
831                 }
832                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
833         }
834
835         if (p) {
836                 outp += sprintf(outp, "package: %d\n", p->package_id);
837
838                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
839                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
840                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
841                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
842
843                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
844                 if (DO_BIC(BIC_Pkgpc3))
845                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
846                 if (DO_BIC(BIC_Pkgpc6))
847                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
848                 if (DO_BIC(BIC_Pkgpc7))
849                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
850                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
851                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
852                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
853                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
854                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
855                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
856                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
857                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
858                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
859                 outp += sprintf(outp, "Throttle PKG: %0X\n",
860                         p->rapl_pkg_perf_status);
861                 outp += sprintf(outp, "Throttle RAM: %0X\n",
862                         p->rapl_dram_perf_status);
863                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
864
865                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
866                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
867                                 i, mp->msr_num, p->counter[i]);
868                 }
869         }
870
871         outp += sprintf(outp, "\n");
872
873         return 0;
874 }
875
876 /*
877  * column formatting convention & formats
878  */
879 int format_counters(struct thread_data *t, struct core_data *c,
880         struct pkg_data *p)
881 {
882         double interval_float, tsc;
883         char *fmt8;
884         int i;
885         struct msr_counter *mp;
886         char *delim = "\t";
887         int printed = 0;
888
889          /* if showing only 1st thread in core and this isn't one, bail out */
890         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
891                 return 0;
892
893          /* if showing only 1st thread in pkg and this isn't one, bail out */
894         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
895                 return 0;
896
897         /*if not summary line and --cpu is used */
898         if ((t != &average.threads) &&
899                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
900                 return 0;
901
902         if (DO_BIC(BIC_USEC)) {
903                 /* on each row, print how many usec each timestamp took to gather */
904                 struct timeval tv;
905
906                 timersub(&t->tv_end, &t->tv_begin, &tv);
907                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
908         }
909
910         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
911         if (DO_BIC(BIC_TOD))
912                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
913
914         interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
915
916         tsc = t->tsc * tsc_tweak;
917
918         /* topo columns, print blanks on 1st (average) line */
919         if (t == &average.threads) {
920                 if (DO_BIC(BIC_Package))
921                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
922                 if (DO_BIC(BIC_Die))
923                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
924                 if (DO_BIC(BIC_Node))
925                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
926                 if (DO_BIC(BIC_Core))
927                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
928                 if (DO_BIC(BIC_CPU))
929                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
930                 if (DO_BIC(BIC_APIC))
931                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
932                 if (DO_BIC(BIC_X2APIC))
933                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
934         } else {
935                 if (DO_BIC(BIC_Package)) {
936                         if (p)
937                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
938                         else
939                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
940                 }
941                 if (DO_BIC(BIC_Die)) {
942                         if (c)
943                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
944                         else
945                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
946                 }
947                 if (DO_BIC(BIC_Node)) {
948                         if (t)
949                                 outp += sprintf(outp, "%s%d",
950                                                 (printed++ ? delim : ""),
951                                               cpus[t->cpu_id].physical_node_id);
952                         else
953                                 outp += sprintf(outp, "%s-",
954                                                 (printed++ ? delim : ""));
955                 }
956                 if (DO_BIC(BIC_Core)) {
957                         if (c)
958                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
959                         else
960                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
961                 }
962                 if (DO_BIC(BIC_CPU))
963                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
964                 if (DO_BIC(BIC_APIC))
965                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
966                 if (DO_BIC(BIC_X2APIC))
967                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
968         }
969
970         if (DO_BIC(BIC_Avg_MHz))
971                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
972                         1.0 / units * t->aperf / interval_float);
973
974         if (DO_BIC(BIC_Busy))
975                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
976
977         if (DO_BIC(BIC_Bzy_MHz)) {
978                 if (has_base_hz)
979                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
980                 else
981                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
982                                 tsc / units * t->aperf / t->mperf / interval_float);
983         }
984
985         if (DO_BIC(BIC_TSC_MHz))
986                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
987
988         /* IRQ */
989         if (DO_BIC(BIC_IRQ)) {
990                 if (sums_need_wide_columns)
991                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
992                 else
993                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
994         }
995
996         /* SMI */
997         if (DO_BIC(BIC_SMI))
998                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
999
1000         /* Added counters */
1001         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1002                 if (mp->format == FORMAT_RAW) {
1003                         if (mp->width == 32)
1004                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
1005                         else
1006                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1007                 } else if (mp->format == FORMAT_DELTA) {
1008                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1009                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1010                         else
1011                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1012                 } else if (mp->format == FORMAT_PERCENT) {
1013                         if (mp->type == COUNTER_USEC)
1014                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
1015                         else
1016                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
1017                 }
1018         }
1019
1020         /* C1 */
1021         if (DO_BIC(BIC_CPU_c1))
1022                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1023
1024
1025         /* print per-core data only for 1st thread in core */
1026         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1027                 goto done;
1028
1029         if (DO_BIC(BIC_CPU_c3))
1030                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1031         if (DO_BIC(BIC_CPU_c6))
1032                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1033         if (DO_BIC(BIC_CPU_c7))
1034                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1035
1036         /* Mod%c6 */
1037         if (DO_BIC(BIC_Mod_c6))
1038                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1039
1040         if (DO_BIC(BIC_CoreTmp))
1041                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1042
1043         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1044                 if (mp->format == FORMAT_RAW) {
1045                         if (mp->width == 32)
1046                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1047                         else
1048                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1049                 } else if (mp->format == FORMAT_DELTA) {
1050                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1051                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1052                         else
1053                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1054                 } else if (mp->format == FORMAT_PERCENT) {
1055                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1056                 }
1057         }
1058
1059         /*
1060          * If measurement interval exceeds minimum RAPL Joule Counter range,
1061          * indicate that results are suspect by printing "**" in fraction place.
1062          */
1063         if (interval_float < rapl_joule_counter_range)
1064                 fmt8 = "%s%.2f";
1065         else
1066                 fmt8 = "%6.0f**";
1067
1068         if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1069                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1070         if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1071                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1072
1073         /* print per-package data only for 1st core in package */
1074         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1075                 goto done;
1076
1077         /* PkgTmp */
1078         if (DO_BIC(BIC_PkgTmp))
1079                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1080
1081         /* GFXrc6 */
1082         if (DO_BIC(BIC_GFX_rc6)) {
1083                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1084                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1085                 } else {
1086                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1087                                 p->gfx_rc6_ms / 10.0 / interval_float);
1088                 }
1089         }
1090
1091         /* GFXMHz */
1092         if (DO_BIC(BIC_GFXMHz))
1093                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1094
1095         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1096         if (DO_BIC(BIC_Totl_c0))
1097                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1098         if (DO_BIC(BIC_Any_c0))
1099                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1100         if (DO_BIC(BIC_GFX_c0))
1101                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1102         if (DO_BIC(BIC_CPUGFX))
1103                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1104
1105         if (DO_BIC(BIC_Pkgpc2))
1106                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1107         if (DO_BIC(BIC_Pkgpc3))
1108                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1109         if (DO_BIC(BIC_Pkgpc6))
1110                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1111         if (DO_BIC(BIC_Pkgpc7))
1112                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1113         if (DO_BIC(BIC_Pkgpc8))
1114                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1115         if (DO_BIC(BIC_Pkgpc9))
1116                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1117         if (DO_BIC(BIC_Pkgpc10))
1118                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1119
1120         if (DO_BIC(BIC_CPU_LPI))
1121                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1122         if (DO_BIC(BIC_SYS_LPI))
1123                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1124
1125         if (DO_BIC(BIC_PkgWatt))
1126                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1127         if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1128                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1129         if (DO_BIC(BIC_GFXWatt))
1130                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1131         if (DO_BIC(BIC_RAMWatt))
1132                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1133         if (DO_BIC(BIC_Pkg_J))
1134                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1135         if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1136                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1137         if (DO_BIC(BIC_GFX_J))
1138                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1139         if (DO_BIC(BIC_RAM_J))
1140                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1141         if (DO_BIC(BIC_PKG__))
1142                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1143         if (DO_BIC(BIC_RAM__))
1144                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1145
1146         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1147                 if (mp->format == FORMAT_RAW) {
1148                         if (mp->width == 32)
1149                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1150                         else
1151                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1152                 } else if (mp->format == FORMAT_DELTA) {
1153                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1154                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1155                         else
1156                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1157                 } else if (mp->format == FORMAT_PERCENT) {
1158                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1159                 }
1160         }
1161
1162 done:
1163         if (*(outp - 1) != '\n')
1164                 outp += sprintf(outp, "\n");
1165
1166         return 0;
1167 }
1168
1169 void flush_output_stdout(void)
1170 {
1171         FILE *filep;
1172
1173         if (outf == stderr)
1174                 filep = stdout;
1175         else
1176                 filep = outf;
1177
1178         fputs(output_buffer, filep);
1179         fflush(filep);
1180
1181         outp = output_buffer;
1182 }
1183 void flush_output_stderr(void)
1184 {
1185         fputs(output_buffer, outf);
1186         fflush(outf);
1187         outp = output_buffer;
1188 }
1189 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1190 {
1191         static int printed;
1192
1193         if (!printed || !summary_only)
1194                 print_header("\t");
1195
1196         format_counters(&average.threads, &average.cores, &average.packages);
1197
1198         printed = 1;
1199
1200         if (summary_only)
1201                 return;
1202
1203         for_all_cpus(format_counters, t, c, p);
1204 }
1205
1206 #define DELTA_WRAP32(new, old)                  \
1207         if (new > old) {                        \
1208                 old = new - old;                \
1209         } else {                                \
1210                 old = 0x100000000 + new - old;  \
1211         }
1212
1213 int
1214 delta_package(struct pkg_data *new, struct pkg_data *old)
1215 {
1216         int i;
1217         struct msr_counter *mp;
1218
1219
1220         if (DO_BIC(BIC_Totl_c0))
1221                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1222         if (DO_BIC(BIC_Any_c0))
1223                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1224         if (DO_BIC(BIC_GFX_c0))
1225                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1226         if (DO_BIC(BIC_CPUGFX))
1227                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1228
1229         old->pc2 = new->pc2 - old->pc2;
1230         if (DO_BIC(BIC_Pkgpc3))
1231                 old->pc3 = new->pc3 - old->pc3;
1232         if (DO_BIC(BIC_Pkgpc6))
1233                 old->pc6 = new->pc6 - old->pc6;
1234         if (DO_BIC(BIC_Pkgpc7))
1235                 old->pc7 = new->pc7 - old->pc7;
1236         old->pc8 = new->pc8 - old->pc8;
1237         old->pc9 = new->pc9 - old->pc9;
1238         old->pc10 = new->pc10 - old->pc10;
1239         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1240         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1241         old->pkg_temp_c = new->pkg_temp_c;
1242
1243         /* flag an error when rc6 counter resets/wraps */
1244         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1245                 old->gfx_rc6_ms = -1;
1246         else
1247                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1248
1249         old->gfx_mhz = new->gfx_mhz;
1250
1251         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1252         DELTA_WRAP32(new->energy_cores, old->energy_cores);
1253         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1254         DELTA_WRAP32(new->energy_dram, old->energy_dram);
1255         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1256         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1257
1258         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1259                 if (mp->format == FORMAT_RAW)
1260                         old->counter[i] = new->counter[i];
1261                 else
1262                         old->counter[i] = new->counter[i] - old->counter[i];
1263         }
1264
1265         return 0;
1266 }
1267
1268 void
1269 delta_core(struct core_data *new, struct core_data *old)
1270 {
1271         int i;
1272         struct msr_counter *mp;
1273
1274         old->c3 = new->c3 - old->c3;
1275         old->c6 = new->c6 - old->c6;
1276         old->c7 = new->c7 - old->c7;
1277         old->core_temp_c = new->core_temp_c;
1278         old->mc6_us = new->mc6_us - old->mc6_us;
1279
1280         DELTA_WRAP32(new->core_energy, old->core_energy);
1281
1282         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1283                 if (mp->format == FORMAT_RAW)
1284                         old->counter[i] = new->counter[i];
1285                 else
1286                         old->counter[i] = new->counter[i] - old->counter[i];
1287         }
1288 }
1289
1290 /*
1291  * old = new - old
1292  */
1293 int
1294 delta_thread(struct thread_data *new, struct thread_data *old,
1295         struct core_data *core_delta)
1296 {
1297         int i;
1298         struct msr_counter *mp;
1299
1300         /* we run cpuid just the 1st time, copy the results */
1301         if (DO_BIC(BIC_APIC))
1302                 new->apic_id = old->apic_id;
1303         if (DO_BIC(BIC_X2APIC))
1304                 new->x2apic_id = old->x2apic_id;
1305
1306         /*
1307          * the timestamps from start of measurement interval are in "old"
1308          * the timestamp from end of measurement interval are in "new"
1309          * over-write old w/ new so we can print end of interval values
1310          */
1311
1312         timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1313         old->tv_begin = new->tv_begin;
1314         old->tv_end = new->tv_end;
1315
1316         old->tsc = new->tsc - old->tsc;
1317
1318         /* check for TSC < 1 Mcycles over interval */
1319         if (old->tsc < (1000 * 1000))
1320                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1321                      "You can disable all c-states by booting with \"idle=poll\"\n"
1322                      "or just the deep ones with \"processor.max_cstate=1\"");
1323
1324         old->c1 = new->c1 - old->c1;
1325
1326         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1327                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1328                         old->aperf = new->aperf - old->aperf;
1329                         old->mperf = new->mperf - old->mperf;
1330                 } else {
1331                         return -1;
1332                 }
1333         }
1334
1335
1336         if (use_c1_residency_msr) {
1337                 /*
1338                  * Some models have a dedicated C1 residency MSR,
1339                  * which should be more accurate than the derivation below.
1340                  */
1341         } else {
1342                 /*
1343                  * As counter collection is not atomic,
1344                  * it is possible for mperf's non-halted cycles + idle states
1345                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1346                  */
1347                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1348                         old->c1 = 0;
1349                 else {
1350                         /* normal case, derive c1 */
1351                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1352                                 - core_delta->c6 - core_delta->c7;
1353                 }
1354         }
1355
1356         if (old->mperf == 0) {
1357                 if (debug > 1)
1358                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1359                 old->mperf = 1; /* divide by 0 protection */
1360         }
1361
1362         if (DO_BIC(BIC_IRQ))
1363                 old->irq_count = new->irq_count - old->irq_count;
1364
1365         if (DO_BIC(BIC_SMI))
1366                 old->smi_count = new->smi_count - old->smi_count;
1367
1368         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1369                 if (mp->format == FORMAT_RAW)
1370                         old->counter[i] = new->counter[i];
1371                 else
1372                         old->counter[i] = new->counter[i] - old->counter[i];
1373         }
1374         return 0;
1375 }
1376
1377 int delta_cpu(struct thread_data *t, struct core_data *c,
1378         struct pkg_data *p, struct thread_data *t2,
1379         struct core_data *c2, struct pkg_data *p2)
1380 {
1381         int retval = 0;
1382
1383         /* calculate core delta only for 1st thread in core */
1384         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1385                 delta_core(c, c2);
1386
1387         /* always calculate thread delta */
1388         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1389         if (retval)
1390                 return retval;
1391
1392         /* calculate package delta only for 1st core in package */
1393         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1394                 retval = delta_package(p, p2);
1395
1396         return retval;
1397 }
1398
1399 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1400 {
1401         int i;
1402         struct msr_counter  *mp;
1403
1404         t->tv_begin.tv_sec = 0;
1405         t->tv_begin.tv_usec = 0;
1406         t->tv_end.tv_sec = 0;
1407         t->tv_end.tv_usec = 0;
1408         t->tv_delta.tv_sec = 0;
1409         t->tv_delta.tv_usec = 0;
1410
1411         t->tsc = 0;
1412         t->aperf = 0;
1413         t->mperf = 0;
1414         t->c1 = 0;
1415
1416         t->irq_count = 0;
1417         t->smi_count = 0;
1418
1419         /* tells format_counters to dump all fields from this set */
1420         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1421
1422         c->c3 = 0;
1423         c->c6 = 0;
1424         c->c7 = 0;
1425         c->mc6_us = 0;
1426         c->core_temp_c = 0;
1427         c->core_energy = 0;
1428
1429         p->pkg_wtd_core_c0 = 0;
1430         p->pkg_any_core_c0 = 0;
1431         p->pkg_any_gfxe_c0 = 0;
1432         p->pkg_both_core_gfxe_c0 = 0;
1433
1434         p->pc2 = 0;
1435         if (DO_BIC(BIC_Pkgpc3))
1436                 p->pc3 = 0;
1437         if (DO_BIC(BIC_Pkgpc6))
1438                 p->pc6 = 0;
1439         if (DO_BIC(BIC_Pkgpc7))
1440                 p->pc7 = 0;
1441         p->pc8 = 0;
1442         p->pc9 = 0;
1443         p->pc10 = 0;
1444         p->cpu_lpi = 0;
1445         p->sys_lpi = 0;
1446
1447         p->energy_pkg = 0;
1448         p->energy_dram = 0;
1449         p->energy_cores = 0;
1450         p->energy_gfx = 0;
1451         p->rapl_pkg_perf_status = 0;
1452         p->rapl_dram_perf_status = 0;
1453         p->pkg_temp_c = 0;
1454
1455         p->gfx_rc6_ms = 0;
1456         p->gfx_mhz = 0;
1457         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1458                 t->counter[i] = 0;
1459
1460         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1461                 c->counter[i] = 0;
1462
1463         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1464                 p->counter[i] = 0;
1465 }
1466 int sum_counters(struct thread_data *t, struct core_data *c,
1467         struct pkg_data *p)
1468 {
1469         int i;
1470         struct msr_counter *mp;
1471
1472         /* copy un-changing apic_id's */
1473         if (DO_BIC(BIC_APIC))
1474                 average.threads.apic_id = t->apic_id;
1475         if (DO_BIC(BIC_X2APIC))
1476                 average.threads.x2apic_id = t->x2apic_id;
1477
1478         /* remember first tv_begin */
1479         if (average.threads.tv_begin.tv_sec == 0)
1480                 average.threads.tv_begin = t->tv_begin;
1481
1482         /* remember last tv_end */
1483         average.threads.tv_end = t->tv_end;
1484
1485         average.threads.tsc += t->tsc;
1486         average.threads.aperf += t->aperf;
1487         average.threads.mperf += t->mperf;
1488         average.threads.c1 += t->c1;
1489
1490         average.threads.irq_count += t->irq_count;
1491         average.threads.smi_count += t->smi_count;
1492
1493         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1494                 if (mp->format == FORMAT_RAW)
1495                         continue;
1496                 average.threads.counter[i] += t->counter[i];
1497         }
1498
1499         /* sum per-core values only for 1st thread in core */
1500         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1501                 return 0;
1502
1503         average.cores.c3 += c->c3;
1504         average.cores.c6 += c->c6;
1505         average.cores.c7 += c->c7;
1506         average.cores.mc6_us += c->mc6_us;
1507
1508         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1509
1510         average.cores.core_energy += c->core_energy;
1511
1512         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1513                 if (mp->format == FORMAT_RAW)
1514                         continue;
1515                 average.cores.counter[i] += c->counter[i];
1516         }
1517
1518         /* sum per-pkg values only for 1st core in pkg */
1519         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1520                 return 0;
1521
1522         if (DO_BIC(BIC_Totl_c0))
1523                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1524         if (DO_BIC(BIC_Any_c0))
1525                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1526         if (DO_BIC(BIC_GFX_c0))
1527                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1528         if (DO_BIC(BIC_CPUGFX))
1529                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1530
1531         average.packages.pc2 += p->pc2;
1532         if (DO_BIC(BIC_Pkgpc3))
1533                 average.packages.pc3 += p->pc3;
1534         if (DO_BIC(BIC_Pkgpc6))
1535                 average.packages.pc6 += p->pc6;
1536         if (DO_BIC(BIC_Pkgpc7))
1537                 average.packages.pc7 += p->pc7;
1538         average.packages.pc8 += p->pc8;
1539         average.packages.pc9 += p->pc9;
1540         average.packages.pc10 += p->pc10;
1541
1542         average.packages.cpu_lpi = p->cpu_lpi;
1543         average.packages.sys_lpi = p->sys_lpi;
1544
1545         average.packages.energy_pkg += p->energy_pkg;
1546         average.packages.energy_dram += p->energy_dram;
1547         average.packages.energy_cores += p->energy_cores;
1548         average.packages.energy_gfx += p->energy_gfx;
1549
1550         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1551         average.packages.gfx_mhz = p->gfx_mhz;
1552
1553         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1554
1555         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1556         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1557
1558         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1559                 if (mp->format == FORMAT_RAW)
1560                         continue;
1561                 average.packages.counter[i] += p->counter[i];
1562         }
1563         return 0;
1564 }
1565 /*
1566  * sum the counters for all cpus in the system
1567  * compute the weighted average
1568  */
1569 void compute_average(struct thread_data *t, struct core_data *c,
1570         struct pkg_data *p)
1571 {
1572         int i;
1573         struct msr_counter *mp;
1574
1575         clear_counters(&average.threads, &average.cores, &average.packages);
1576
1577         for_all_cpus(sum_counters, t, c, p);
1578
1579         /* Use the global time delta for the average. */
1580         average.threads.tv_delta = tv_delta;
1581
1582         average.threads.tsc /= topo.num_cpus;
1583         average.threads.aperf /= topo.num_cpus;
1584         average.threads.mperf /= topo.num_cpus;
1585         average.threads.c1 /= topo.num_cpus;
1586
1587         if (average.threads.irq_count > 9999999)
1588                 sums_need_wide_columns = 1;
1589
1590         average.cores.c3 /= topo.num_cores;
1591         average.cores.c6 /= topo.num_cores;
1592         average.cores.c7 /= topo.num_cores;
1593         average.cores.mc6_us /= topo.num_cores;
1594
1595         if (DO_BIC(BIC_Totl_c0))
1596                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1597         if (DO_BIC(BIC_Any_c0))
1598                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1599         if (DO_BIC(BIC_GFX_c0))
1600                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1601         if (DO_BIC(BIC_CPUGFX))
1602                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1603
1604         average.packages.pc2 /= topo.num_packages;
1605         if (DO_BIC(BIC_Pkgpc3))
1606                 average.packages.pc3 /= topo.num_packages;
1607         if (DO_BIC(BIC_Pkgpc6))
1608                 average.packages.pc6 /= topo.num_packages;
1609         if (DO_BIC(BIC_Pkgpc7))
1610                 average.packages.pc7 /= topo.num_packages;
1611
1612         average.packages.pc8 /= topo.num_packages;
1613         average.packages.pc9 /= topo.num_packages;
1614         average.packages.pc10 /= topo.num_packages;
1615
1616         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1617                 if (mp->format == FORMAT_RAW)
1618                         continue;
1619                 if (mp->type == COUNTER_ITEMS) {
1620                         if (average.threads.counter[i] > 9999999)
1621                                 sums_need_wide_columns = 1;
1622                         continue;
1623                 }
1624                 average.threads.counter[i] /= topo.num_cpus;
1625         }
1626         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1627                 if (mp->format == FORMAT_RAW)
1628                         continue;
1629                 if (mp->type == COUNTER_ITEMS) {
1630                         if (average.cores.counter[i] > 9999999)
1631                                 sums_need_wide_columns = 1;
1632                 }
1633                 average.cores.counter[i] /= topo.num_cores;
1634         }
1635         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1636                 if (mp->format == FORMAT_RAW)
1637                         continue;
1638                 if (mp->type == COUNTER_ITEMS) {
1639                         if (average.packages.counter[i] > 9999999)
1640                                 sums_need_wide_columns = 1;
1641                 }
1642                 average.packages.counter[i] /= topo.num_packages;
1643         }
1644 }
1645
1646 static unsigned long long rdtsc(void)
1647 {
1648         unsigned int low, high;
1649
1650         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1651
1652         return low | ((unsigned long long)high) << 32;
1653 }
1654
1655 /*
1656  * Open a file, and exit on failure
1657  */
1658 FILE *fopen_or_die(const char *path, const char *mode)
1659 {
1660         FILE *filep = fopen(path, mode);
1661
1662         if (!filep)
1663                 err(1, "%s: open failed", path);
1664         return filep;
1665 }
1666 /*
1667  * snapshot_sysfs_counter()
1668  *
1669  * return snapshot of given counter
1670  */
1671 unsigned long long snapshot_sysfs_counter(char *path)
1672 {
1673         FILE *fp;
1674         int retval;
1675         unsigned long long counter;
1676
1677         fp = fopen_or_die(path, "r");
1678
1679         retval = fscanf(fp, "%lld", &counter);
1680         if (retval != 1)
1681                 err(1, "snapshot_sysfs_counter(%s)", path);
1682
1683         fclose(fp);
1684
1685         return counter;
1686 }
1687
1688 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1689 {
1690         if (mp->msr_num != 0) {
1691                 if (get_msr(cpu, mp->msr_num, counterp))
1692                         return -1;
1693         } else {
1694                 char path[128 + PATH_BYTES];
1695
1696                 if (mp->flags & SYSFS_PERCPU) {
1697                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1698                                  cpu, mp->path);
1699
1700                         *counterp = snapshot_sysfs_counter(path);
1701                 } else {
1702                         *counterp = snapshot_sysfs_counter(mp->path);
1703                 }
1704         }
1705
1706         return 0;
1707 }
1708
1709 void get_apic_id(struct thread_data *t)
1710 {
1711         unsigned int eax, ebx, ecx, edx;
1712
1713         if (DO_BIC(BIC_APIC)) {
1714                 eax = ebx = ecx = edx = 0;
1715                 __cpuid(1, eax, ebx, ecx, edx);
1716
1717                 t->apic_id = (ebx >> 24) & 0xff;
1718         }
1719
1720         if (!DO_BIC(BIC_X2APIC))
1721                 return;
1722
1723         if (authentic_amd) {
1724                 unsigned int topology_extensions;
1725
1726                 if (max_extended_level < 0x8000001e)
1727                         return;
1728
1729                 eax = ebx = ecx = edx = 0;
1730                 __cpuid(0x80000001, eax, ebx, ecx, edx);
1731                         topology_extensions = ecx & (1 << 22);
1732
1733                 if (topology_extensions == 0)
1734                         return;
1735
1736                 eax = ebx = ecx = edx = 0;
1737                 __cpuid(0x8000001e, eax, ebx, ecx, edx);
1738
1739                 t->x2apic_id = eax;
1740                 return;
1741         }
1742
1743         if (!genuine_intel)
1744                 return;
1745
1746         if (max_level < 0xb)
1747                 return;
1748
1749         ecx = 0;
1750         __cpuid(0xb, eax, ebx, ecx, edx);
1751         t->x2apic_id = edx;
1752
1753         if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1754                 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
1755                                 t->cpu_id, t->apic_id, t->x2apic_id);
1756 }
1757
1758 /*
1759  * get_counters(...)
1760  * migrate to cpu
1761  * acquire and record local counters for that cpu
1762  */
1763 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1764 {
1765         int cpu = t->cpu_id;
1766         unsigned long long msr;
1767         int aperf_mperf_retry_count = 0;
1768         struct msr_counter *mp;
1769         int i;
1770
1771         if (cpu_migrate(cpu)) {
1772                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1773                 return -1;
1774         }
1775
1776         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1777
1778         if (first_counter_read)
1779                 get_apic_id(t);
1780 retry:
1781         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1782
1783         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1784                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1785
1786                 /*
1787                  * The TSC, APERF and MPERF must be read together for
1788                  * APERF/MPERF and MPERF/TSC to give accurate results.
1789                  *
1790                  * Unfortunately, APERF and MPERF are read by
1791                  * individual system call, so delays may occur
1792                  * between them.  If the time to read them
1793                  * varies by a large amount, we re-read them.
1794                  */
1795
1796                 /*
1797                  * This initial dummy APERF read has been seen to
1798                  * reduce jitter in the subsequent reads.
1799                  */
1800
1801                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1802                         return -3;
1803
1804                 t->tsc = rdtsc();       /* re-read close to APERF */
1805
1806                 tsc_before = t->tsc;
1807
1808                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1809                         return -3;
1810
1811                 tsc_between = rdtsc();
1812
1813                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1814                         return -4;
1815
1816                 tsc_after = rdtsc();
1817
1818                 aperf_time = tsc_between - tsc_before;
1819                 mperf_time = tsc_after - tsc_between;
1820
1821                 /*
1822                  * If the system call latency to read APERF and MPERF
1823                  * differ by more than 2x, then try again.
1824                  */
1825                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1826                         aperf_mperf_retry_count++;
1827                         if (aperf_mperf_retry_count < 5)
1828                                 goto retry;
1829                         else
1830                                 warnx("cpu%d jitter %lld %lld",
1831                                         cpu, aperf_time, mperf_time);
1832                 }
1833                 aperf_mperf_retry_count = 0;
1834
1835                 t->aperf = t->aperf * aperf_mperf_multiplier;
1836                 t->mperf = t->mperf * aperf_mperf_multiplier;
1837         }
1838
1839         if (DO_BIC(BIC_IRQ))
1840                 t->irq_count = irqs_per_cpu[cpu];
1841         if (DO_BIC(BIC_SMI)) {
1842                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1843                         return -5;
1844                 t->smi_count = msr & 0xFFFFFFFF;
1845         }
1846         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1847                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1848                         return -6;
1849         }
1850
1851         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1852                 if (get_mp(cpu, mp, &t->counter[i]))
1853                         return -10;
1854         }
1855
1856         /* collect core counters only for 1st thread in core */
1857         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1858                 goto done;
1859
1860         if (DO_BIC(BIC_CPU_c3)) {
1861                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1862                         return -6;
1863         }
1864
1865         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1866                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1867                         return -7;
1868         } else if (do_knl_cstates) {
1869                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1870                         return -7;
1871         }
1872
1873         if (DO_BIC(BIC_CPU_c7))
1874                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1875                         return -8;
1876
1877         if (DO_BIC(BIC_Mod_c6))
1878                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1879                         return -8;
1880
1881         if (DO_BIC(BIC_CoreTmp)) {
1882                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1883                         return -9;
1884                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1885         }
1886
1887         if (do_rapl & RAPL_AMD_F17H) {
1888                 if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
1889                         return -14;
1890                 c->core_energy = msr & 0xFFFFFFFF;
1891         }
1892
1893         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1894                 if (get_mp(cpu, mp, &c->counter[i]))
1895                         return -10;
1896         }
1897
1898         /* collect package counters only for 1st core in package */
1899         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1900                 goto done;
1901
1902         if (DO_BIC(BIC_Totl_c0)) {
1903                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1904                         return -10;
1905         }
1906         if (DO_BIC(BIC_Any_c0)) {
1907                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1908                         return -11;
1909         }
1910         if (DO_BIC(BIC_GFX_c0)) {
1911                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1912                         return -12;
1913         }
1914         if (DO_BIC(BIC_CPUGFX)) {
1915                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1916                         return -13;
1917         }
1918         if (DO_BIC(BIC_Pkgpc3))
1919                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1920                         return -9;
1921         if (DO_BIC(BIC_Pkgpc6)) {
1922                 if (do_slm_cstates) {
1923                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1924                                 return -10;
1925                 } else {
1926                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1927                                 return -10;
1928                 }
1929         }
1930
1931         if (DO_BIC(BIC_Pkgpc2))
1932                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1933                         return -11;
1934         if (DO_BIC(BIC_Pkgpc7))
1935                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1936                         return -12;
1937         if (DO_BIC(BIC_Pkgpc8))
1938                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1939                         return -13;
1940         if (DO_BIC(BIC_Pkgpc9))
1941                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1942                         return -13;
1943         if (DO_BIC(BIC_Pkgpc10))
1944                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1945                         return -13;
1946
1947         if (DO_BIC(BIC_CPU_LPI))
1948                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1949         if (DO_BIC(BIC_SYS_LPI))
1950                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
1951
1952         if (do_rapl & RAPL_PKG) {
1953                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1954                         return -13;
1955                 p->energy_pkg = msr & 0xFFFFFFFF;
1956         }
1957         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1958                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1959                         return -14;
1960                 p->energy_cores = msr & 0xFFFFFFFF;
1961         }
1962         if (do_rapl & RAPL_DRAM) {
1963                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1964                         return -15;
1965                 p->energy_dram = msr & 0xFFFFFFFF;
1966         }
1967         if (do_rapl & RAPL_GFX) {
1968                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1969                         return -16;
1970                 p->energy_gfx = msr & 0xFFFFFFFF;
1971         }
1972         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1973                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1974                         return -16;
1975                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1976         }
1977         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1978                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1979                         return -16;
1980                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1981         }
1982         if (do_rapl & RAPL_AMD_F17H) {
1983                 if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
1984                         return -13;
1985                 p->energy_pkg = msr & 0xFFFFFFFF;
1986         }
1987         if (DO_BIC(BIC_PkgTmp)) {
1988                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1989                         return -17;
1990                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1991         }
1992
1993         if (DO_BIC(BIC_GFX_rc6))
1994                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1995
1996         if (DO_BIC(BIC_GFXMHz))
1997                 p->gfx_mhz = gfx_cur_mhz;
1998
1999         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2000                 if (get_mp(cpu, mp, &p->counter[i]))
2001                         return -10;
2002         }
2003 done:
2004         gettimeofday(&t->tv_end, (struct timezone *)NULL);
2005
2006         return 0;
2007 }
2008
2009 /*
2010  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2011  * If you change the values, note they are used both in comparisons
2012  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2013  */
2014
2015 #define PCLUKN 0 /* Unknown */
2016 #define PCLRSV 1 /* Reserved */
2017 #define PCL__0 2 /* PC0 */
2018 #define PCL__1 3 /* PC1 */
2019 #define PCL__2 4 /* PC2 */
2020 #define PCL__3 5 /* PC3 */
2021 #define PCL__4 6 /* PC4 */
2022 #define PCL__6 7 /* PC6 */
2023 #define PCL_6N 8 /* PC6 No Retention */
2024 #define PCL_6R 9 /* PC6 Retention */
2025 #define PCL__7 10 /* PC7 */
2026 #define PCL_7S 11 /* PC7 Shrink */
2027 #define PCL__8 12 /* PC8 */
2028 #define PCL__9 13 /* PC9 */
2029 #define PCL_10 14 /* PC10 */
2030 #define PCLUNL 15 /* Unlimited */
2031
2032 int pkg_cstate_limit = PCLUKN;
2033 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2034         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
2035
2036 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2037 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2038 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2039 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
2040 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2041 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2042 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2043 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2044
2045
2046 static void
2047 calculate_tsc_tweak()
2048 {
2049         tsc_tweak = base_hz / tsc_hz;
2050 }
2051
2052 static void
2053 dump_nhm_platform_info(void)
2054 {
2055         unsigned long long msr;
2056         unsigned int ratio;
2057
2058         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2059
2060         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2061
2062         ratio = (msr >> 40) & 0xFF;
2063         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
2064                 ratio, bclk, ratio * bclk);
2065
2066         ratio = (msr >> 8) & 0xFF;
2067         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2068                 ratio, bclk, ratio * bclk);
2069
2070         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2071         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2072                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2073
2074         return;
2075 }
2076
2077 static void
2078 dump_hsw_turbo_ratio_limits(void)
2079 {
2080         unsigned long long msr;
2081         unsigned int ratio;
2082
2083         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2084
2085         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2086
2087         ratio = (msr >> 8) & 0xFF;
2088         if (ratio)
2089                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2090                         ratio, bclk, ratio * bclk);
2091
2092         ratio = (msr >> 0) & 0xFF;
2093         if (ratio)
2094                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2095                         ratio, bclk, ratio * bclk);
2096         return;
2097 }
2098
2099 static void
2100 dump_ivt_turbo_ratio_limits(void)
2101 {
2102         unsigned long long msr;
2103         unsigned int ratio;
2104
2105         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2106
2107         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2108
2109         ratio = (msr >> 56) & 0xFF;
2110         if (ratio)
2111                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2112                         ratio, bclk, ratio * bclk);
2113
2114         ratio = (msr >> 48) & 0xFF;
2115         if (ratio)
2116                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2117                         ratio, bclk, ratio * bclk);
2118
2119         ratio = (msr >> 40) & 0xFF;
2120         if (ratio)
2121                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2122                         ratio, bclk, ratio * bclk);
2123
2124         ratio = (msr >> 32) & 0xFF;
2125         if (ratio)
2126                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2127                         ratio, bclk, ratio * bclk);
2128
2129         ratio = (msr >> 24) & 0xFF;
2130         if (ratio)
2131                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2132                         ratio, bclk, ratio * bclk);
2133
2134         ratio = (msr >> 16) & 0xFF;
2135         if (ratio)
2136                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2137                         ratio, bclk, ratio * bclk);
2138
2139         ratio = (msr >> 8) & 0xFF;
2140         if (ratio)
2141                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2142                         ratio, bclk, ratio * bclk);
2143
2144         ratio = (msr >> 0) & 0xFF;
2145         if (ratio)
2146                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2147                         ratio, bclk, ratio * bclk);
2148         return;
2149 }
2150 int has_turbo_ratio_group_limits(int family, int model)
2151 {
2152
2153         if (!genuine_intel)
2154                 return 0;
2155
2156         switch (model) {
2157         case INTEL_FAM6_ATOM_GOLDMONT:
2158         case INTEL_FAM6_SKYLAKE_X:
2159         case INTEL_FAM6_ATOM_GOLDMONT_X:
2160                 return 1;
2161         }
2162         return 0;
2163 }
2164
2165 static void
2166 dump_turbo_ratio_limits(int family, int model)
2167 {
2168         unsigned long long msr, core_counts;
2169         unsigned int ratio, group_size;
2170
2171         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2172         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2173
2174         if (has_turbo_ratio_group_limits(family, model)) {
2175                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2176                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2177         } else {
2178                 core_counts = 0x0807060504030201;
2179         }
2180
2181         ratio = (msr >> 56) & 0xFF;
2182         group_size = (core_counts >> 56) & 0xFF;
2183         if (ratio)
2184                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2185                         ratio, bclk, ratio * bclk, group_size);
2186
2187         ratio = (msr >> 48) & 0xFF;
2188         group_size = (core_counts >> 48) & 0xFF;
2189         if (ratio)
2190                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2191                         ratio, bclk, ratio * bclk, group_size);
2192
2193         ratio = (msr >> 40) & 0xFF;
2194         group_size = (core_counts >> 40) & 0xFF;
2195         if (ratio)
2196                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2197                         ratio, bclk, ratio * bclk, group_size);
2198
2199         ratio = (msr >> 32) & 0xFF;
2200         group_size = (core_counts >> 32) & 0xFF;
2201         if (ratio)
2202                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2203                         ratio, bclk, ratio * bclk, group_size);
2204
2205         ratio = (msr >> 24) & 0xFF;
2206         group_size = (core_counts >> 24) & 0xFF;
2207         if (ratio)
2208                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2209                         ratio, bclk, ratio * bclk, group_size);
2210
2211         ratio = (msr >> 16) & 0xFF;
2212         group_size = (core_counts >> 16) & 0xFF;
2213         if (ratio)
2214                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2215                         ratio, bclk, ratio * bclk, group_size);
2216
2217         ratio = (msr >> 8) & 0xFF;
2218         group_size = (core_counts >> 8) & 0xFF;
2219         if (ratio)
2220                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2221                         ratio, bclk, ratio * bclk, group_size);
2222
2223         ratio = (msr >> 0) & 0xFF;
2224         group_size = (core_counts >> 0) & 0xFF;
2225         if (ratio)
2226                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2227                         ratio, bclk, ratio * bclk, group_size);
2228         return;
2229 }
2230
2231 static void
2232 dump_atom_turbo_ratio_limits(void)
2233 {
2234         unsigned long long msr;
2235         unsigned int ratio;
2236
2237         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2238         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2239
2240         ratio = (msr >> 0) & 0x3F;
2241         if (ratio)
2242                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2243                         ratio, bclk, ratio * bclk);
2244
2245         ratio = (msr >> 8) & 0x3F;
2246         if (ratio)
2247                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2248                         ratio, bclk, ratio * bclk);
2249
2250         ratio = (msr >> 16) & 0x3F;
2251         if (ratio)
2252                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2253                         ratio, bclk, ratio * bclk);
2254
2255         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2256         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2257
2258         ratio = (msr >> 24) & 0x3F;
2259         if (ratio)
2260                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2261                         ratio, bclk, ratio * bclk);
2262
2263         ratio = (msr >> 16) & 0x3F;
2264         if (ratio)
2265                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2266                         ratio, bclk, ratio * bclk);
2267
2268         ratio = (msr >> 8) & 0x3F;
2269         if (ratio)
2270                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2271                         ratio, bclk, ratio * bclk);
2272
2273         ratio = (msr >> 0) & 0x3F;
2274         if (ratio)
2275                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2276                         ratio, bclk, ratio * bclk);
2277 }
2278
2279 static void
2280 dump_knl_turbo_ratio_limits(void)
2281 {
2282         const unsigned int buckets_no = 7;
2283
2284         unsigned long long msr;
2285         int delta_cores, delta_ratio;
2286         int i, b_nr;
2287         unsigned int cores[buckets_no];
2288         unsigned int ratio[buckets_no];
2289
2290         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2291
2292         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2293                 base_cpu, msr);
2294
2295         /**
2296          * Turbo encoding in KNL is as follows:
2297          * [0] -- Reserved
2298          * [7:1] -- Base value of number of active cores of bucket 1.
2299          * [15:8] -- Base value of freq ratio of bucket 1.
2300          * [20:16] -- +ve delta of number of active cores of bucket 2.
2301          * i.e. active cores of bucket 2 =
2302          * active cores of bucket 1 + delta
2303          * [23:21] -- Negative delta of freq ratio of bucket 2.
2304          * i.e. freq ratio of bucket 2 =
2305          * freq ratio of bucket 1 - delta
2306          * [28:24]-- +ve delta of number of active cores of bucket 3.
2307          * [31:29]-- -ve delta of freq ratio of bucket 3.
2308          * [36:32]-- +ve delta of number of active cores of bucket 4.
2309          * [39:37]-- -ve delta of freq ratio of bucket 4.
2310          * [44:40]-- +ve delta of number of active cores of bucket 5.
2311          * [47:45]-- -ve delta of freq ratio of bucket 5.
2312          * [52:48]-- +ve delta of number of active cores of bucket 6.
2313          * [55:53]-- -ve delta of freq ratio of bucket 6.
2314          * [60:56]-- +ve delta of number of active cores of bucket 7.
2315          * [63:61]-- -ve delta of freq ratio of bucket 7.
2316          */
2317
2318         b_nr = 0;
2319         cores[b_nr] = (msr & 0xFF) >> 1;
2320         ratio[b_nr] = (msr >> 8) & 0xFF;
2321
2322         for (i = 16; i < 64; i += 8) {
2323                 delta_cores = (msr >> i) & 0x1F;
2324                 delta_ratio = (msr >> (i + 5)) & 0x7;
2325
2326                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2327                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2328                 b_nr++;
2329         }
2330
2331         for (i = buckets_no - 1; i >= 0; i--)
2332                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2333                         fprintf(outf,
2334                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2335                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2336 }
2337
2338 static void
2339 dump_nhm_cst_cfg(void)
2340 {
2341         unsigned long long msr;
2342
2343         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2344
2345         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2346
2347         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2348                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2349                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2350                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2351                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2352                 (msr & (1 << 15)) ? "" : "UN",
2353                 (unsigned int)msr & 0xF,
2354                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2355
2356 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2357         if (has_automatic_cstate_conversion) {
2358                 fprintf(outf, ", automatic c-state conversion=%s",
2359                         (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2360         }
2361
2362         fprintf(outf, ")\n");
2363
2364         return;
2365 }
2366
2367 static void
2368 dump_config_tdp(void)
2369 {
2370         unsigned long long msr;
2371
2372         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2373         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2374         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2375
2376         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2377         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2378         if (msr) {
2379                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2380                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2381                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2382                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2383         }
2384         fprintf(outf, ")\n");
2385
2386         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2387         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2388         if (msr) {
2389                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2390                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2391                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2392                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2393         }
2394         fprintf(outf, ")\n");
2395
2396         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2397         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2398         if ((msr) & 0x3)
2399                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2400         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2401         fprintf(outf, ")\n");
2402
2403         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2404         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2405         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2406         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2407         fprintf(outf, ")\n");
2408 }
2409
2410 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2411
2412 void print_irtl(void)
2413 {
2414         unsigned long long msr;
2415
2416         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2417         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2418         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2419                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2420
2421         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2422         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2423         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2424                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2425
2426         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2427         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2428         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2429                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2430
2431         if (!do_irtl_hsw)
2432                 return;
2433
2434         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2435         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2436         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2437                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2438
2439         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2440         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2441         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2442                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2443
2444         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2445         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2446         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2447                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2448
2449 }
2450 void free_fd_percpu(void)
2451 {
2452         int i;
2453
2454         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2455                 if (fd_percpu[i] != 0)
2456                         close(fd_percpu[i]);
2457         }
2458
2459         free(fd_percpu);
2460 }
2461
2462 void free_all_buffers(void)
2463 {
2464         int i;
2465
2466         CPU_FREE(cpu_present_set);
2467         cpu_present_set = NULL;
2468         cpu_present_setsize = 0;
2469
2470         CPU_FREE(cpu_affinity_set);
2471         cpu_affinity_set = NULL;
2472         cpu_affinity_setsize = 0;
2473
2474         free(thread_even);
2475         free(core_even);
2476         free(package_even);
2477
2478         thread_even = NULL;
2479         core_even = NULL;
2480         package_even = NULL;
2481
2482         free(thread_odd);
2483         free(core_odd);
2484         free(package_odd);
2485
2486         thread_odd = NULL;
2487         core_odd = NULL;
2488         package_odd = NULL;
2489
2490         free(output_buffer);
2491         output_buffer = NULL;
2492         outp = NULL;
2493
2494         free_fd_percpu();
2495
2496         free(irq_column_2_cpu);
2497         free(irqs_per_cpu);
2498
2499         for (i = 0; i <= topo.max_cpu_num; ++i) {
2500                 if (cpus[i].put_ids)
2501                         CPU_FREE(cpus[i].put_ids);
2502         }
2503         free(cpus);
2504 }
2505
2506
2507 /*
2508  * Parse a file containing a single int.
2509  * Return 0 if file can not be opened
2510  * Exit if file can be opened, but can not be parsed
2511  */
2512 int parse_int_file(const char *fmt, ...)
2513 {
2514         va_list args;
2515         char path[PATH_MAX];
2516         FILE *filep;
2517         int value;
2518
2519         va_start(args, fmt);
2520         vsnprintf(path, sizeof(path), fmt, args);
2521         va_end(args);
2522         filep = fopen(path, "r");
2523         if (!filep)
2524                 return 0;
2525         if (fscanf(filep, "%d", &value) != 1)
2526                 err(1, "%s: failed to parse number from file", path);
2527         fclose(filep);
2528         return value;
2529 }
2530
2531 /*
2532  * cpu_is_first_core_in_package(cpu)
2533  * return 1 if given CPU is 1st core in package
2534  */
2535 int cpu_is_first_core_in_package(int cpu)
2536 {
2537         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2538 }
2539
2540 int get_physical_package_id(int cpu)
2541 {
2542         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2543 }
2544
2545 int get_die_id(int cpu)
2546 {
2547         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2548 }
2549
2550 int get_core_id(int cpu)
2551 {
2552         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2553 }
2554
2555 void set_node_data(void)
2556 {
2557         int pkg, node, lnode, cpu, cpux;
2558         int cpu_count;
2559
2560         /* initialize logical_node_id */
2561         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2562                 cpus[cpu].logical_node_id = -1;
2563
2564         cpu_count = 0;
2565         for (pkg = 0; pkg < topo.num_packages; pkg++) {
2566                 lnode = 0;
2567                 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2568                         if (cpus[cpu].physical_package_id != pkg)
2569                                 continue;
2570                         /* find a cpu with an unset logical_node_id */
2571                         if (cpus[cpu].logical_node_id != -1)
2572                                 continue;
2573                         cpus[cpu].logical_node_id = lnode;
2574                         node = cpus[cpu].physical_node_id;
2575                         cpu_count++;
2576                         /*
2577                          * find all matching cpus on this pkg and set
2578                          * the logical_node_id
2579                          */
2580                         for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2581                                 if ((cpus[cpux].physical_package_id == pkg) &&
2582                                    (cpus[cpux].physical_node_id == node)) {
2583                                         cpus[cpux].logical_node_id = lnode;
2584                                         cpu_count++;
2585                                 }
2586                         }
2587                         lnode++;
2588                         if (lnode > topo.nodes_per_pkg)
2589                                 topo.nodes_per_pkg = lnode;
2590                 }
2591                 if (cpu_count >= topo.max_cpu_num)
2592                         break;
2593         }
2594 }
2595
2596 int get_physical_node_id(struct cpu_topology *thiscpu)
2597 {
2598         char path[80];
2599         FILE *filep;
2600         int i;
2601         int cpu = thiscpu->logical_cpu_id;
2602
2603         for (i = 0; i <= topo.max_cpu_num; i++) {
2604                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2605                         cpu, i);
2606                 filep = fopen(path, "r");
2607                 if (!filep)
2608                         continue;
2609                 fclose(filep);
2610                 return i;
2611         }
2612         return -1;
2613 }
2614
2615 int get_thread_siblings(struct cpu_topology *thiscpu)
2616 {
2617         char path[80], character;
2618         FILE *filep;
2619         unsigned long map;
2620         int so, shift, sib_core;
2621         int cpu = thiscpu->logical_cpu_id;
2622         int offset = topo.max_cpu_num + 1;
2623         size_t size;
2624         int thread_id = 0;
2625
2626         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2627         if (thiscpu->thread_id < 0)
2628                 thiscpu->thread_id = thread_id++;
2629         if (!thiscpu->put_ids)
2630                 return -1;
2631
2632         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2633         CPU_ZERO_S(size, thiscpu->put_ids);
2634
2635         sprintf(path,
2636                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2637         filep = fopen_or_die(path, "r");
2638         do {
2639                 offset -= BITMASK_SIZE;
2640                 if (fscanf(filep, "%lx%c", &map, &character) != 2)
2641                         err(1, "%s: failed to parse file", path);
2642                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2643                         if ((map >> shift) & 0x1) {
2644                                 so = shift + offset;
2645                                 sib_core = get_core_id(so);
2646                                 if (sib_core == thiscpu->physical_core_id) {
2647                                         CPU_SET_S(so, size, thiscpu->put_ids);
2648                                         if ((so != cpu) &&
2649                                             (cpus[so].thread_id < 0))
2650                                                 cpus[so].thread_id =
2651                                                                     thread_id++;
2652                                 }
2653                         }
2654                 }
2655         } while (!strncmp(&character, ",", 1));
2656         fclose(filep);
2657
2658         return CPU_COUNT_S(size, thiscpu->put_ids);
2659 }
2660
2661 /*
2662  * run func(thread, core, package) in topology order
2663  * skip non-present cpus
2664  */
2665
2666 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2667         struct pkg_data *, struct thread_data *, struct core_data *,
2668         struct pkg_data *), struct thread_data *thread_base,
2669         struct core_data *core_base, struct pkg_data *pkg_base,
2670         struct thread_data *thread_base2, struct core_data *core_base2,
2671         struct pkg_data *pkg_base2)
2672 {
2673         int retval, pkg_no, node_no, core_no, thread_no;
2674
2675         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2676                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2677                         for (core_no = 0; core_no < topo.cores_per_node;
2678                              ++core_no) {
2679                                 for (thread_no = 0; thread_no <
2680                                         topo.threads_per_core; ++thread_no) {
2681                                         struct thread_data *t, *t2;
2682                                         struct core_data *c, *c2;
2683                                         struct pkg_data *p, *p2;
2684
2685                                         t = GET_THREAD(thread_base, thread_no,
2686                                                        core_no, node_no,
2687                                                        pkg_no);
2688
2689                                         if (cpu_is_not_present(t->cpu_id))
2690                                                 continue;
2691
2692                                         t2 = GET_THREAD(thread_base2, thread_no,
2693                                                         core_no, node_no,
2694                                                         pkg_no);
2695
2696                                         c = GET_CORE(core_base, core_no,
2697                                                      node_no, pkg_no);
2698                                         c2 = GET_CORE(core_base2, core_no,
2699                                                       node_no,
2700                                                       pkg_no);
2701
2702                                         p = GET_PKG(pkg_base, pkg_no);
2703                                         p2 = GET_PKG(pkg_base2, pkg_no);
2704
2705                                         retval = func(t, c, p, t2, c2, p2);
2706                                         if (retval)
2707                                                 return retval;
2708                                 }
2709                         }
2710                 }
2711         }
2712         return 0;
2713 }
2714
2715 /*
2716  * run func(cpu) on every cpu in /proc/stat
2717  * return max_cpu number
2718  */
2719 int for_all_proc_cpus(int (func)(int))
2720 {
2721         FILE *fp;
2722         int cpu_num;
2723         int retval;
2724
2725         fp = fopen_or_die(proc_stat, "r");
2726
2727         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2728         if (retval != 0)
2729                 err(1, "%s: failed to parse format", proc_stat);
2730
2731         while (1) {
2732                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2733                 if (retval != 1)
2734                         break;
2735
2736                 retval = func(cpu_num);
2737                 if (retval) {
2738                         fclose(fp);
2739                         return(retval);
2740                 }
2741         }
2742         fclose(fp);
2743         return 0;
2744 }
2745
2746 void re_initialize(void)
2747 {
2748         free_all_buffers();
2749         setup_all_buffers();
2750         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2751 }
2752
2753 void set_max_cpu_num(void)
2754 {
2755         FILE *filep;
2756         unsigned long dummy;
2757
2758         topo.max_cpu_num = 0;
2759         filep = fopen_or_die(
2760                         "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2761                         "r");
2762         while (fscanf(filep, "%lx,", &dummy) == 1)
2763                 topo.max_cpu_num += BITMASK_SIZE;
2764         fclose(filep);
2765         topo.max_cpu_num--; /* 0 based */
2766 }
2767
2768 /*
2769  * count_cpus()
2770  * remember the last one seen, it will be the max
2771  */
2772 int count_cpus(int cpu)
2773 {
2774         topo.num_cpus++;
2775         return 0;
2776 }
2777 int mark_cpu_present(int cpu)
2778 {
2779         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2780         return 0;
2781 }
2782
2783 int init_thread_id(int cpu)
2784 {
2785         cpus[cpu].thread_id = -1;
2786         return 0;
2787 }
2788
2789 /*
2790  * snapshot_proc_interrupts()
2791  *
2792  * read and record summary of /proc/interrupts
2793  *
2794  * return 1 if config change requires a restart, else return 0
2795  */
2796 int snapshot_proc_interrupts(void)
2797 {
2798         static FILE *fp;
2799         int column, retval;
2800
2801         if (fp == NULL)
2802                 fp = fopen_or_die("/proc/interrupts", "r");
2803         else
2804                 rewind(fp);
2805
2806         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2807         for (column = 0; column < topo.num_cpus; ++column) {
2808                 int cpu_number;
2809
2810                 retval = fscanf(fp, " CPU%d", &cpu_number);
2811                 if (retval != 1)
2812                         break;
2813
2814                 if (cpu_number > topo.max_cpu_num) {
2815                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2816                         return 1;
2817                 }
2818
2819                 irq_column_2_cpu[column] = cpu_number;
2820                 irqs_per_cpu[cpu_number] = 0;
2821         }
2822
2823         /* read /proc/interrupt count lines and sum up irqs per cpu */
2824         while (1) {
2825                 int column;
2826                 char buf[64];
2827
2828                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2829                 if (retval != 1)
2830                         break;
2831
2832                 /* read the count per cpu */
2833                 for (column = 0; column < topo.num_cpus; ++column) {
2834
2835                         int cpu_number, irq_count;
2836
2837                         retval = fscanf(fp, " %d", &irq_count);
2838                         if (retval != 1)
2839                                 break;
2840
2841                         cpu_number = irq_column_2_cpu[column];
2842                         irqs_per_cpu[cpu_number] += irq_count;
2843
2844                 }
2845
2846                 while (getc(fp) != '\n')
2847                         ;       /* flush interrupt description */
2848
2849         }
2850         return 0;
2851 }
2852 /*
2853  * snapshot_gfx_rc6_ms()
2854  *
2855  * record snapshot of
2856  * /sys/class/drm/card0/power/rc6_residency_ms
2857  *
2858  * return 1 if config change requires a restart, else return 0
2859  */
2860 int snapshot_gfx_rc6_ms(void)
2861 {
2862         FILE *fp;
2863         int retval;
2864
2865         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2866
2867         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2868         if (retval != 1)
2869                 err(1, "GFX rc6");
2870
2871         fclose(fp);
2872
2873         return 0;
2874 }
2875 /*
2876  * snapshot_gfx_mhz()
2877  *
2878  * record snapshot of
2879  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2880  *
2881  * return 1 if config change requires a restart, else return 0
2882  */
2883 int snapshot_gfx_mhz(void)
2884 {
2885         static FILE *fp;
2886         int retval;
2887
2888         if (fp == NULL)
2889                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2890         else {
2891                 rewind(fp);
2892                 fflush(fp);
2893         }
2894
2895         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2896         if (retval != 1)
2897                 err(1, "GFX MHz");
2898
2899         return 0;
2900 }
2901
2902 /*
2903  * snapshot_cpu_lpi()
2904  *
2905  * record snapshot of
2906  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2907  *
2908  * return 1 if config change requires a restart, else return 0
2909  */
2910 int snapshot_cpu_lpi_us(void)
2911 {
2912         FILE *fp;
2913         int retval;
2914
2915         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2916
2917         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2918         if (retval != 1) {
2919                 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
2920                 BIC_NOT_PRESENT(BIC_CPU_LPI);
2921                 fclose(fp);
2922                 return -1;
2923         }
2924
2925         fclose(fp);
2926
2927         return 0;
2928 }
2929 /*
2930  * snapshot_sys_lpi()
2931  *
2932  * record snapshot of
2933  * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
2934  *
2935  * return 1 if config change requires a restart, else return 0
2936  */
2937 int snapshot_sys_lpi_us(void)
2938 {
2939         FILE *fp;
2940         int retval;
2941
2942         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
2943
2944         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2945         if (retval != 1) {
2946                 fprintf(stderr, "Disabling Low Power Idle System output\n");
2947                 BIC_NOT_PRESENT(BIC_SYS_LPI);
2948                 fclose(fp);
2949                 return -1;
2950         }
2951         fclose(fp);
2952
2953         return 0;
2954 }
2955 /*
2956  * snapshot /proc and /sys files
2957  *
2958  * return 1 if configuration restart needed, else return 0
2959  */
2960 int snapshot_proc_sysfs_files(void)
2961 {
2962         if (DO_BIC(BIC_IRQ))
2963                 if (snapshot_proc_interrupts())
2964                         return 1;
2965
2966         if (DO_BIC(BIC_GFX_rc6))
2967                 snapshot_gfx_rc6_ms();
2968
2969         if (DO_BIC(BIC_GFXMHz))
2970                 snapshot_gfx_mhz();
2971
2972         if (DO_BIC(BIC_CPU_LPI))
2973                 snapshot_cpu_lpi_us();
2974
2975         if (DO_BIC(BIC_SYS_LPI))
2976                 snapshot_sys_lpi_us();
2977
2978         return 0;
2979 }
2980
2981 int exit_requested;
2982
2983 static void signal_handler (int signal)
2984 {
2985         switch (signal) {
2986         case SIGINT:
2987                 exit_requested = 1;
2988                 if (debug)
2989                         fprintf(stderr, " SIGINT\n");
2990                 break;
2991         case SIGUSR1:
2992                 if (debug > 1)
2993                         fprintf(stderr, "SIGUSR1\n");
2994                 break;
2995         }
2996         /* make sure this manually-invoked interval is at least 1ms long */
2997         nanosleep(&one_msec, NULL);
2998 }
2999
3000 void setup_signal_handler(void)
3001 {
3002         struct sigaction sa;
3003
3004         memset(&sa, 0, sizeof(sa));
3005
3006         sa.sa_handler = &signal_handler;
3007
3008         if (sigaction(SIGINT, &sa, NULL) < 0)
3009                 err(1, "sigaction SIGINT");
3010         if (sigaction(SIGUSR1, &sa, NULL) < 0)
3011                 err(1, "sigaction SIGUSR1");
3012 }
3013
3014 void do_sleep(void)
3015 {
3016         struct timeval select_timeout;
3017         fd_set readfds;
3018         int retval;
3019
3020         FD_ZERO(&readfds);
3021         FD_SET(0, &readfds);
3022
3023         if (!isatty(fileno(stdin))) {
3024                 nanosleep(&interval_ts, NULL);
3025                 return;
3026         }
3027
3028         select_timeout = interval_tv;
3029         retval = select(1, &readfds, NULL, NULL, &select_timeout);
3030
3031         if (retval == 1) {
3032                 switch (getc(stdin)) {
3033                 case 'q':
3034                         exit_requested = 1;
3035                         break;
3036                 }
3037                 /* make sure this manually-invoked interval is at least 1ms long */
3038                 nanosleep(&one_msec, NULL);
3039         }
3040 }
3041
3042
3043 void turbostat_loop()
3044 {
3045         int retval;
3046         int restarted = 0;
3047         int done_iters = 0;
3048
3049         setup_signal_handler();
3050
3051 restart:
3052         restarted++;
3053
3054         snapshot_proc_sysfs_files();
3055         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3056         first_counter_read = 0;
3057         if (retval < -1) {
3058                 exit(retval);
3059         } else if (retval == -1) {
3060                 if (restarted > 1) {
3061                         exit(retval);
3062                 }
3063                 re_initialize();
3064                 goto restart;
3065         }
3066         restarted = 0;
3067         done_iters = 0;
3068         gettimeofday(&tv_even, (struct timezone *)NULL);
3069
3070         while (1) {
3071                 if (for_all_proc_cpus(cpu_is_not_present)) {
3072                         re_initialize();
3073                         goto restart;
3074                 }
3075                 do_sleep();
3076                 if (snapshot_proc_sysfs_files())
3077                         goto restart;
3078                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
3079                 if (retval < -1) {
3080                         exit(retval);
3081                 } else if (retval == -1) {
3082                         re_initialize();
3083                         goto restart;
3084                 }
3085                 gettimeofday(&tv_odd, (struct timezone *)NULL);
3086                 timersub(&tv_odd, &tv_even, &tv_delta);
3087                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3088                         re_initialize();
3089                         goto restart;
3090                 }
3091                 compute_average(EVEN_COUNTERS);
3092                 format_all_counters(EVEN_COUNTERS);
3093                 flush_output_stdout();
3094                 if (exit_requested)
3095                         break;
3096                 if (num_iterations && ++done_iters >= num_iterations)
3097                         break;
3098                 do_sleep();
3099                 if (snapshot_proc_sysfs_files())
3100                         goto restart;
3101                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3102                 if (retval < -1) {
3103                         exit(retval);
3104                 } else if (retval == -1) {
3105                         re_initialize();
3106                         goto restart;
3107                 }
3108                 gettimeofday(&tv_even, (struct timezone *)NULL);
3109                 timersub(&tv_even, &tv_odd, &tv_delta);
3110                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3111                         re_initialize();
3112                         goto restart;
3113                 }
3114                 compute_average(ODD_COUNTERS);
3115                 format_all_counters(ODD_COUNTERS);
3116                 flush_output_stdout();
3117                 if (exit_requested)
3118                         break;
3119                 if (num_iterations && ++done_iters >= num_iterations)
3120                         break;
3121         }
3122 }
3123
3124 void check_dev_msr()
3125 {
3126         struct stat sb;
3127         char pathname[32];
3128
3129         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3130         if (stat(pathname, &sb))
3131                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3132                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3133 }
3134
3135 void check_permissions()
3136 {
3137         struct __user_cap_header_struct cap_header_data;
3138         cap_user_header_t cap_header = &cap_header_data;
3139         struct __user_cap_data_struct cap_data_data;
3140         cap_user_data_t cap_data = &cap_data_data;
3141         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
3142         int do_exit = 0;
3143         char pathname[32];
3144
3145         /* check for CAP_SYS_RAWIO */
3146         cap_header->pid = getpid();
3147         cap_header->version = _LINUX_CAPABILITY_VERSION;
3148         if (capget(cap_header, cap_data) < 0)
3149                 err(-6, "capget(2) failed");
3150
3151         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
3152                 do_exit++;
3153                 warnx("capget(CAP_SYS_RAWIO) failed,"
3154                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3155         }
3156
3157         /* test file permissions */
3158         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3159         if (euidaccess(pathname, R_OK)) {
3160                 do_exit++;
3161                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3162         }
3163
3164         /* if all else fails, thell them to be root */
3165         if (do_exit)
3166                 if (getuid() != 0)
3167                         warnx("... or simply run as root");
3168
3169         if (do_exit)
3170                 exit(-6);
3171 }
3172
3173 /*
3174  * NHM adds support for additional MSRs:
3175  *
3176  * MSR_SMI_COUNT                   0x00000034
3177  *
3178  * MSR_PLATFORM_INFO               0x000000ce
3179  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3180  *
3181  * MSR_MISC_PWR_MGMT               0x000001aa
3182  *
3183  * MSR_PKG_C3_RESIDENCY            0x000003f8
3184  * MSR_PKG_C6_RESIDENCY            0x000003f9
3185  * MSR_CORE_C3_RESIDENCY           0x000003fc
3186  * MSR_CORE_C6_RESIDENCY           0x000003fd
3187  *
3188  * Side effect:
3189  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3190  * sets has_misc_feature_control
3191  */
3192 int probe_nhm_msrs(unsigned int family, unsigned int model)
3193 {
3194         unsigned long long msr;
3195         unsigned int base_ratio;
3196         int *pkg_cstate_limits;
3197
3198         if (!genuine_intel)
3199                 return 0;
3200
3201         if (family != 6)
3202                 return 0;
3203
3204         bclk = discover_bclk(family, model);
3205
3206         switch (model) {
3207         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3208         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3209                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3210                 break;
3211         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3212         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3213         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3214         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3215                 pkg_cstate_limits = snb_pkg_cstate_limits;
3216                 has_misc_feature_control = 1;
3217                 break;
3218         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3219         case INTEL_FAM6_HASWELL_X:      /* HSX */
3220         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3221         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3222         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3223         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3224         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3225         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3226         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3227                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3228                 has_misc_feature_control = 1;
3229                 break;
3230         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3231                 pkg_cstate_limits = skx_pkg_cstate_limits;
3232                 has_misc_feature_control = 1;
3233                 break;
3234         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3235                 no_MSR_MISC_PWR_MGMT = 1;
3236         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
3237                 pkg_cstate_limits = slv_pkg_cstate_limits;
3238                 break;
3239         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3240                 pkg_cstate_limits = amt_pkg_cstate_limits;
3241                 no_MSR_MISC_PWR_MGMT = 1;
3242                 break;
3243         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3244                 pkg_cstate_limits = phi_pkg_cstate_limits;
3245                 break;
3246         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3247         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3248         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
3249                 pkg_cstate_limits = glm_pkg_cstate_limits;
3250                 break;
3251         default:
3252                 return 0;
3253         }
3254         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3255         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3256
3257         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3258         base_ratio = (msr >> 8) & 0xFF;
3259
3260         base_hz = base_ratio * bclk * 1000000;
3261         has_base_hz = 1;
3262         return 1;
3263 }
3264 /*
3265  * SLV client has support for unique MSRs:
3266  *
3267  * MSR_CC6_DEMOTION_POLICY_CONFIG
3268  * MSR_MC6_DEMOTION_POLICY_CONFIG
3269  */
3270
3271 int has_slv_msrs(unsigned int family, unsigned int model)
3272 {
3273         if (!genuine_intel)
3274                 return 0;
3275
3276         switch (model) {
3277         case INTEL_FAM6_ATOM_SILVERMONT:
3278         case INTEL_FAM6_ATOM_SILVERMONT_MID:
3279         case INTEL_FAM6_ATOM_AIRMONT_MID:
3280                 return 1;
3281         }
3282         return 0;
3283 }
3284 int is_dnv(unsigned int family, unsigned int model)
3285 {
3286
3287         if (!genuine_intel)
3288                 return 0;
3289
3290         switch (model) {
3291         case INTEL_FAM6_ATOM_GOLDMONT_X:
3292                 return 1;
3293         }
3294         return 0;
3295 }
3296 int is_bdx(unsigned int family, unsigned int model)
3297 {
3298
3299         if (!genuine_intel)
3300                 return 0;
3301
3302         switch (model) {
3303         case INTEL_FAM6_BROADWELL_X:
3304                 return 1;
3305         }
3306         return 0;
3307 }
3308 int is_skx(unsigned int family, unsigned int model)
3309 {
3310
3311         if (!genuine_intel)
3312                 return 0;
3313
3314         switch (model) {
3315         case INTEL_FAM6_SKYLAKE_X:
3316                 return 1;
3317         }
3318         return 0;
3319 }
3320
3321 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3322 {
3323         if (has_slv_msrs(family, model))
3324                 return 0;
3325
3326         switch (model) {
3327         /* Nehalem compatible, but do not include turbo-ratio limit support */
3328         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3329         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3330                 return 0;
3331         default:
3332                 return 1;
3333         }
3334 }
3335 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3336 {
3337         if (has_slv_msrs(family, model))
3338                 return 1;
3339
3340         return 0;
3341 }
3342 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3343 {
3344         if (!genuine_intel)
3345                 return 0;
3346
3347         if (family != 6)
3348                 return 0;
3349
3350         switch (model) {
3351         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3352         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3353                 return 1;
3354         default:
3355                 return 0;
3356         }
3357 }
3358 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3359 {
3360         if (!genuine_intel)
3361                 return 0;
3362
3363         if (family != 6)
3364                 return 0;
3365
3366         switch (model) {
3367         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3368                 return 1;
3369         default:
3370                 return 0;
3371         }
3372 }
3373
3374 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3375 {
3376         if (!genuine_intel)
3377                 return 0;
3378
3379         if (family != 6)
3380                 return 0;
3381
3382         switch (model) {
3383         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3384                 return 1;
3385         default:
3386                 return 0;
3387         }
3388 }
3389 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3390 {
3391         if (!genuine_intel)
3392                 return 0;
3393
3394         if (family != 6)
3395                 return 0;
3396
3397         switch (model) {
3398         case INTEL_FAM6_ATOM_GOLDMONT:
3399         case INTEL_FAM6_SKYLAKE_X:
3400                 return 1;
3401         default:
3402                 return 0;
3403         }
3404 }
3405 int has_config_tdp(unsigned int family, unsigned int model)
3406 {
3407         if (!genuine_intel)
3408                 return 0;
3409
3410         if (family != 6)
3411                 return 0;
3412
3413         switch (model) {
3414         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3415         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3416         case INTEL_FAM6_HASWELL_X:      /* HSX */
3417         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3418         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3419         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3420         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3421         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3422         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3423         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3424         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3425
3426         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3427                 return 1;
3428         default:
3429                 return 0;
3430         }
3431 }
3432
3433 static void
3434 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3435 {
3436         if (!do_nhm_platform_info)
3437                 return;
3438
3439         dump_nhm_platform_info();
3440
3441         if (has_hsw_turbo_ratio_limit(family, model))
3442                 dump_hsw_turbo_ratio_limits();
3443
3444         if (has_ivt_turbo_ratio_limit(family, model))
3445                 dump_ivt_turbo_ratio_limits();
3446
3447         if (has_turbo_ratio_limit(family, model))
3448                 dump_turbo_ratio_limits(family, model);
3449
3450         if (has_atom_turbo_ratio_limit(family, model))
3451                 dump_atom_turbo_ratio_limits();
3452
3453         if (has_knl_turbo_ratio_limit(family, model))
3454                 dump_knl_turbo_ratio_limits();
3455
3456         if (has_config_tdp(family, model))
3457                 dump_config_tdp();
3458
3459         dump_nhm_cst_cfg();
3460 }
3461
3462 static void
3463 dump_sysfs_cstate_config(void)
3464 {
3465         char path[64];
3466         char name_buf[16];
3467         char desc[64];
3468         FILE *input;
3469         int state;
3470         char *sp;
3471
3472         if (!DO_BIC(BIC_sysfs))
3473                 return;
3474
3475         for (state = 0; state < 10; ++state) {
3476
3477                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3478                         base_cpu, state);
3479                 input = fopen(path, "r");
3480                 if (input == NULL)
3481                         continue;
3482                 if (!fgets(name_buf, sizeof(name_buf), input))
3483                         err(1, "%s: failed to read file", path);
3484
3485                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3486                 sp = strchr(name_buf, '-');
3487                 if (!sp)
3488                         sp = strchrnul(name_buf, '\n');
3489                 *sp = '\0';
3490                 fclose(input);
3491
3492                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3493                         base_cpu, state);
3494                 input = fopen(path, "r");
3495                 if (input == NULL)
3496                         continue;
3497                 if (!fgets(desc, sizeof(desc), input))
3498                         err(1, "%s: failed to read file", path);
3499
3500                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3501                 fclose(input);
3502         }
3503 }
3504 static void
3505 dump_sysfs_pstate_config(void)
3506 {
3507         char path[64];
3508         char driver_buf[64];
3509         char governor_buf[64];
3510         FILE *input;
3511         int turbo;
3512
3513         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3514                         base_cpu);
3515         input = fopen(path, "r");
3516         if (input == NULL) {
3517                 fprintf(outf, "NSFOD %s\n", path);
3518                 return;
3519         }
3520         if (!fgets(driver_buf, sizeof(driver_buf), input))
3521                 err(1, "%s: failed to read file", path);
3522         fclose(input);
3523
3524         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3525                         base_cpu);
3526         input = fopen(path, "r");
3527         if (input == NULL) {
3528                 fprintf(outf, "NSFOD %s\n", path);
3529                 return;
3530         }
3531         if (!fgets(governor_buf, sizeof(governor_buf), input))
3532                 err(1, "%s: failed to read file", path);
3533         fclose(input);
3534
3535         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3536         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3537
3538         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3539         input = fopen(path, "r");
3540         if (input != NULL) {
3541                 if (fscanf(input, "%d", &turbo) != 1)
3542                         err(1, "%s: failed to parse number from file", path);
3543                 fprintf(outf, "cpufreq boost: %d\n", turbo);
3544                 fclose(input);
3545         }
3546
3547         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3548         input = fopen(path, "r");
3549         if (input != NULL) {
3550                 if (fscanf(input, "%d", &turbo) != 1)
3551                         err(1, "%s: failed to parse number from file", path);
3552                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3553                 fclose(input);
3554         }
3555 }
3556
3557
3558 /*
3559  * print_epb()
3560  * Decode the ENERGY_PERF_BIAS MSR
3561  */
3562 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3563 {
3564         unsigned long long msr;
3565         char *epb_string;
3566         int cpu;
3567
3568         if (!has_epb)
3569                 return 0;
3570
3571         cpu = t->cpu_id;
3572
3573         /* EPB is per-package */
3574         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3575                 return 0;
3576
3577         if (cpu_migrate(cpu)) {
3578                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3579                 return -1;
3580         }
3581
3582         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3583                 return 0;
3584
3585         switch (msr & 0xF) {
3586         case ENERGY_PERF_BIAS_PERFORMANCE:
3587                 epb_string = "performance";
3588                 break;
3589         case ENERGY_PERF_BIAS_NORMAL:
3590                 epb_string = "balanced";
3591                 break;
3592         case ENERGY_PERF_BIAS_POWERSAVE:
3593                 epb_string = "powersave";
3594                 break;
3595         default:
3596                 epb_string = "custom";
3597                 break;
3598         }
3599         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3600
3601         return 0;
3602 }
3603 /*
3604  * print_hwp()
3605  * Decode the MSR_HWP_CAPABILITIES
3606  */
3607 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3608 {
3609         unsigned long long msr;
3610         int cpu;
3611
3612         if (!has_hwp)
3613                 return 0;
3614
3615         cpu = t->cpu_id;
3616
3617         /* MSR_HWP_CAPABILITIES is per-package */
3618         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3619                 return 0;
3620
3621         if (cpu_migrate(cpu)) {
3622                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3623                 return -1;
3624         }
3625
3626         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3627                 return 0;
3628
3629         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3630                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3631
3632         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3633         if ((msr & (1 << 0)) == 0)
3634                 return 0;
3635
3636         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3637                 return 0;
3638
3639         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3640                         "(high %d guar %d eff %d low %d)\n",
3641                         cpu, msr,
3642                         (unsigned int)HWP_HIGHEST_PERF(msr),
3643                         (unsigned int)HWP_GUARANTEED_PERF(msr),
3644                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3645                         (unsigned int)HWP_LOWEST_PERF(msr));
3646
3647         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3648                 return 0;
3649
3650         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3651                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3652                         cpu, msr,
3653                         (unsigned int)(((msr) >> 0) & 0xff),
3654                         (unsigned int)(((msr) >> 8) & 0xff),
3655                         (unsigned int)(((msr) >> 16) & 0xff),
3656                         (unsigned int)(((msr) >> 24) & 0xff),
3657                         (unsigned int)(((msr) >> 32) & 0xff3),
3658                         (unsigned int)(((msr) >> 42) & 0x1));
3659
3660         if (has_hwp_pkg) {
3661                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3662                         return 0;
3663
3664                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3665                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3666                         cpu, msr,
3667                         (unsigned int)(((msr) >> 0) & 0xff),
3668                         (unsigned int)(((msr) >> 8) & 0xff),
3669                         (unsigned int)(((msr) >> 16) & 0xff),
3670                         (unsigned int)(((msr) >> 24) & 0xff),
3671                         (unsigned int)(((msr) >> 32) & 0xff3));
3672         }
3673         if (has_hwp_notify) {
3674                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3675                         return 0;
3676
3677                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3678                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3679                         cpu, msr,
3680                         ((msr) & 0x1) ? "EN" : "Dis",
3681                         ((msr) & 0x2) ? "EN" : "Dis");
3682         }
3683         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3684                 return 0;
3685
3686         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3687                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3688                         cpu, msr,
3689                         ((msr) & 0x1) ? "" : "No-",
3690                         ((msr) & 0x2) ? "" : "No-");
3691
3692         return 0;
3693 }
3694
3695 /*
3696  * print_perf_limit()
3697  */
3698 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3699 {
3700         unsigned long long msr;
3701         int cpu;
3702
3703         cpu = t->cpu_id;
3704
3705         /* per-package */
3706         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3707                 return 0;
3708
3709         if (cpu_migrate(cpu)) {
3710                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3711                 return -1;
3712         }
3713
3714         if (do_core_perf_limit_reasons) {
3715                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3716                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3717                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3718                         (msr & 1 << 15) ? "bit15, " : "",
3719                         (msr & 1 << 14) ? "bit14, " : "",
3720                         (msr & 1 << 13) ? "Transitions, " : "",
3721                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3722                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
3723                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3724                         (msr & 1 << 9) ? "CorePwr, " : "",
3725                         (msr & 1 << 8) ? "Amps, " : "",
3726                         (msr & 1 << 6) ? "VR-Therm, " : "",
3727                         (msr & 1 << 5) ? "Auto-HWP, " : "",
3728                         (msr & 1 << 4) ? "Graphics, " : "",
3729                         (msr & 1 << 2) ? "bit2, " : "",
3730                         (msr & 1 << 1) ? "ThermStatus, " : "",
3731                         (msr & 1 << 0) ? "PROCHOT, " : "");
3732                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3733                         (msr & 1 << 31) ? "bit31, " : "",
3734                         (msr & 1 << 30) ? "bit30, " : "",
3735                         (msr & 1 << 29) ? "Transitions, " : "",
3736                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3737                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
3738                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3739                         (msr & 1 << 25) ? "CorePwr, " : "",
3740                         (msr & 1 << 24) ? "Amps, " : "",
3741                         (msr & 1 << 22) ? "VR-Therm, " : "",
3742                         (msr & 1 << 21) ? "Auto-HWP, " : "",
3743                         (msr & 1 << 20) ? "Graphics, " : "",
3744                         (msr & 1 << 18) ? "bit18, " : "",
3745                         (msr & 1 << 17) ? "ThermStatus, " : "",
3746                         (msr & 1 << 16) ? "PROCHOT, " : "");
3747
3748         }
3749         if (do_gfx_perf_limit_reasons) {
3750                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3751                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3752                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3753                         (msr & 1 << 0) ? "PROCHOT, " : "",
3754                         (msr & 1 << 1) ? "ThermStatus, " : "",
3755                         (msr & 1 << 4) ? "Graphics, " : "",
3756                         (msr & 1 << 6) ? "VR-Therm, " : "",
3757                         (msr & 1 << 8) ? "Amps, " : "",
3758                         (msr & 1 << 9) ? "GFXPwr, " : "",
3759                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3760                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3761                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3762                         (msr & 1 << 16) ? "PROCHOT, " : "",
3763                         (msr & 1 << 17) ? "ThermStatus, " : "",
3764                         (msr & 1 << 20) ? "Graphics, " : "",
3765                         (msr & 1 << 22) ? "VR-Therm, " : "",
3766                         (msr & 1 << 24) ? "Amps, " : "",
3767                         (msr & 1 << 25) ? "GFXPwr, " : "",
3768                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3769                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3770         }
3771         if (do_ring_perf_limit_reasons) {
3772                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3773                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3774                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
3775                         (msr & 1 << 0) ? "PROCHOT, " : "",
3776                         (msr & 1 << 1) ? "ThermStatus, " : "",
3777                         (msr & 1 << 6) ? "VR-Therm, " : "",
3778                         (msr & 1 << 8) ? "Amps, " : "",
3779                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3780                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3781                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3782                         (msr & 1 << 16) ? "PROCHOT, " : "",
3783                         (msr & 1 << 17) ? "ThermStatus, " : "",
3784                         (msr & 1 << 22) ? "VR-Therm, " : "",
3785                         (msr & 1 << 24) ? "Amps, " : "",
3786                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3787                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3788         }
3789         return 0;
3790 }
3791
3792 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3793 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3794
3795 double get_tdp_intel(unsigned int model)
3796 {
3797         unsigned long long msr;
3798
3799         if (do_rapl & RAPL_PKG_POWER_INFO)
3800                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3801                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3802
3803         switch (model) {
3804         case INTEL_FAM6_ATOM_SILVERMONT:
3805         case INTEL_FAM6_ATOM_SILVERMONT_X:
3806                 return 30.0;
3807         default:
3808                 return 135.0;
3809         }
3810 }
3811
3812 double get_tdp_amd(unsigned int family)
3813 {
3814         switch (family) {
3815         case 0x17:
3816         default:
3817                 /* This is the max stock TDP of HEDT/Server Fam17h chips */
3818                 return 250.0;
3819         }
3820 }
3821
3822 /*
3823  * rapl_dram_energy_units_probe()
3824  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3825  */
3826 static double
3827 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3828 {
3829         /* only called for genuine_intel, family 6 */
3830
3831         switch (model) {
3832         case INTEL_FAM6_HASWELL_X:      /* HSX */
3833         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3834         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3835                 return (rapl_dram_energy_units = 15.3 / 1000000);
3836         default:
3837                 return (rapl_energy_units);
3838         }
3839 }
3840
3841 void rapl_probe_intel(unsigned int family, unsigned int model)
3842 {
3843         unsigned long long msr;
3844         unsigned int time_unit;
3845         double tdp;
3846
3847         if (family != 6)
3848                 return;
3849
3850         switch (model) {
3851         case INTEL_FAM6_SANDYBRIDGE:
3852         case INTEL_FAM6_IVYBRIDGE:
3853         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3854         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3855         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3856         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3857         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3858                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3859                 if (rapl_joules) {
3860                         BIC_PRESENT(BIC_Pkg_J);
3861                         BIC_PRESENT(BIC_Cor_J);
3862                         BIC_PRESENT(BIC_GFX_J);
3863                 } else {
3864                         BIC_PRESENT(BIC_PkgWatt);
3865                         BIC_PRESENT(BIC_CorWatt);
3866                         BIC_PRESENT(BIC_GFXWatt);
3867                 }
3868                 break;
3869         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3870         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3871                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3872                 if (rapl_joules)
3873                         BIC_PRESENT(BIC_Pkg_J);
3874                 else
3875                         BIC_PRESENT(BIC_PkgWatt);
3876                 break;
3877         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3878         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3879                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3880                 BIC_PRESENT(BIC_PKG__);
3881                 BIC_PRESENT(BIC_RAM__);
3882                 if (rapl_joules) {
3883                         BIC_PRESENT(BIC_Pkg_J);
3884                         BIC_PRESENT(BIC_Cor_J);
3885                         BIC_PRESENT(BIC_RAM_J);
3886                         BIC_PRESENT(BIC_GFX_J);
3887                 } else {
3888                         BIC_PRESENT(BIC_PkgWatt);
3889                         BIC_PRESENT(BIC_CorWatt);
3890                         BIC_PRESENT(BIC_RAMWatt);
3891                         BIC_PRESENT(BIC_GFXWatt);
3892                 }
3893                 break;
3894         case INTEL_FAM6_HASWELL_X:      /* HSX */
3895         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3896         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3897         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3898                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3899                 BIC_PRESENT(BIC_PKG__);
3900                 BIC_PRESENT(BIC_RAM__);
3901                 if (rapl_joules) {
3902                         BIC_PRESENT(BIC_Pkg_J);
3903                         BIC_PRESENT(BIC_RAM_J);
3904                 } else {
3905                         BIC_PRESENT(BIC_PkgWatt);
3906                         BIC_PRESENT(BIC_RAMWatt);
3907                 }
3908                 break;
3909         case INTEL_FAM6_SANDYBRIDGE_X:
3910         case INTEL_FAM6_IVYBRIDGE_X:
3911                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3912                 BIC_PRESENT(BIC_PKG__);
3913                 BIC_PRESENT(BIC_RAM__);
3914                 if (rapl_joules) {
3915                         BIC_PRESENT(BIC_Pkg_J);
3916                         BIC_PRESENT(BIC_Cor_J);
3917                         BIC_PRESENT(BIC_RAM_J);
3918                 } else {
3919                         BIC_PRESENT(BIC_PkgWatt);
3920                         BIC_PRESENT(BIC_CorWatt);
3921                         BIC_PRESENT(BIC_RAMWatt);
3922                 }
3923                 break;
3924         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3925         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
3926                 do_rapl = RAPL_PKG | RAPL_CORES;
3927                 if (rapl_joules) {
3928                         BIC_PRESENT(BIC_Pkg_J);
3929                         BIC_PRESENT(BIC_Cor_J);
3930                 } else {
3931                         BIC_PRESENT(BIC_PkgWatt);
3932                         BIC_PRESENT(BIC_CorWatt);
3933                 }
3934                 break;
3935         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
3936                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3937                 BIC_PRESENT(BIC_PKG__);
3938                 BIC_PRESENT(BIC_RAM__);
3939                 if (rapl_joules) {
3940                         BIC_PRESENT(BIC_Pkg_J);
3941                         BIC_PRESENT(BIC_Cor_J);
3942                         BIC_PRESENT(BIC_RAM_J);
3943                 } else {
3944                         BIC_PRESENT(BIC_PkgWatt);
3945                         BIC_PRESENT(BIC_CorWatt);
3946                         BIC_PRESENT(BIC_RAMWatt);
3947                 }
3948                 break;
3949         default:
3950                 return;
3951         }
3952
3953         /* units on package 0, verify later other packages match */
3954         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3955                 return;
3956
3957         rapl_power_units = 1.0 / (1 << (msr & 0xF));
3958         if (model == INTEL_FAM6_ATOM_SILVERMONT)
3959                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3960         else
3961                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3962
3963         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3964
3965         time_unit = msr >> 16 & 0xF;
3966         if (time_unit == 0)
3967                 time_unit = 0xA;
3968
3969         rapl_time_units = 1.0 / (1 << (time_unit));
3970
3971         tdp = get_tdp_intel(model);
3972
3973         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3974         if (!quiet)
3975                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3976 }
3977
3978 void rapl_probe_amd(unsigned int family, unsigned int model)
3979 {
3980         unsigned long long msr;
3981         unsigned int eax, ebx, ecx, edx;
3982         unsigned int has_rapl = 0;
3983         double tdp;
3984
3985         if (max_extended_level >= 0x80000007) {
3986                 __cpuid(0x80000007, eax, ebx, ecx, edx);
3987                 /* RAPL (Fam 17h) */
3988                 has_rapl = edx & (1 << 14);
3989         }
3990
3991         if (!has_rapl)
3992                 return;
3993
3994         switch (family) {
3995         case 0x17: /* Zen, Zen+ */
3996                 do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
3997                 if (rapl_joules) {
3998                         BIC_PRESENT(BIC_Pkg_J);
3999                         BIC_PRESENT(BIC_Cor_J);
4000                 } else {
4001                         BIC_PRESENT(BIC_PkgWatt);
4002                         BIC_PRESENT(BIC_CorWatt);
4003                 }
4004                 break;
4005         default:
4006                 return;
4007         }
4008
4009         if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4010                 return;
4011
4012         rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4013         rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4014         rapl_power_units = ldexp(1.0, -(msr & 0xf));
4015
4016         tdp = get_tdp_amd(model);
4017
4018         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4019         if (!quiet)
4020                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4021 }
4022
4023 /*
4024  * rapl_probe()
4025  *
4026  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4027  */
4028 void rapl_probe(unsigned int family, unsigned int model)
4029 {
4030         if (genuine_intel)
4031                 rapl_probe_intel(family, model);
4032         if (authentic_amd)
4033                 rapl_probe_amd(family, model);
4034 }
4035
4036 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4037 {
4038         if (!genuine_intel)
4039                 return;
4040
4041         if (family != 6)
4042                 return;
4043
4044         switch (model) {
4045         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
4046         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4047         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
4048                 do_gfx_perf_limit_reasons = 1;
4049         case INTEL_FAM6_HASWELL_X:      /* HSX */
4050                 do_core_perf_limit_reasons = 1;
4051                 do_ring_perf_limit_reasons = 1;
4052         default:
4053                 return;
4054         }
4055 }
4056
4057 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4058 {
4059         if (is_skx(family, model) || is_bdx(family, model))
4060                 has_automatic_cstate_conversion = 1;
4061 }
4062
4063 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4064 {
4065         unsigned long long msr;
4066         unsigned int dts, dts2;
4067         int cpu;
4068
4069         if (!(do_dts || do_ptm))
4070                 return 0;
4071
4072         cpu = t->cpu_id;
4073
4074         /* DTS is per-core, no need to print for each thread */
4075         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4076                 return 0;
4077
4078         if (cpu_migrate(cpu)) {
4079                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4080                 return -1;
4081         }
4082
4083         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4084                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4085                         return 0;
4086
4087                 dts = (msr >> 16) & 0x7F;
4088                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
4089                         cpu, msr, tcc_activation_temp - dts);
4090
4091                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4092                         return 0;
4093
4094                 dts = (msr >> 16) & 0x7F;
4095                 dts2 = (msr >> 8) & 0x7F;
4096                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4097                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4098         }
4099
4100
4101         if (do_dts && debug) {
4102                 unsigned int resolution;
4103
4104                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4105                         return 0;
4106
4107                 dts = (msr >> 16) & 0x7F;
4108                 resolution = (msr >> 27) & 0xF;
4109                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4110                         cpu, msr, tcc_activation_temp - dts, resolution);
4111
4112                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4113                         return 0;
4114
4115                 dts = (msr >> 16) & 0x7F;
4116                 dts2 = (msr >> 8) & 0x7F;
4117                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4118                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4119         }
4120
4121         return 0;
4122 }
4123
4124 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4125 {
4126         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4127                 cpu, label,
4128                 ((msr >> 15) & 1) ? "EN" : "DIS",
4129                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
4130                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4131                 (((msr >> 16) & 1) ? "EN" : "DIS"));
4132
4133         return;
4134 }
4135
4136 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4137 {
4138         unsigned long long msr;
4139         const char *msr_name;
4140         int cpu;
4141
4142         if (!do_rapl)
4143                 return 0;
4144
4145         /* RAPL counters are per package, so print only for 1st thread/package */
4146         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4147                 return 0;
4148
4149         cpu = t->cpu_id;
4150         if (cpu_migrate(cpu)) {
4151                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4152                 return -1;
4153         }
4154
4155         if (do_rapl & RAPL_AMD_F17H) {
4156                 msr_name = "MSR_RAPL_PWR_UNIT";
4157                 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4158                         return -1;
4159         } else {
4160                 msr_name = "MSR_RAPL_POWER_UNIT";
4161                 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4162                         return -1;
4163         }
4164
4165         fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4166                 rapl_power_units, rapl_energy_units, rapl_time_units);
4167
4168         if (do_rapl & RAPL_PKG_POWER_INFO) {
4169
4170                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4171                         return -5;
4172
4173
4174                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4175                         cpu, msr,
4176                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4177                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4178                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4179                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4180
4181         }
4182         if (do_rapl & RAPL_PKG) {
4183
4184                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4185                         return -9;
4186
4187                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4188                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4189
4190                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
4191                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4192                         cpu,
4193                         ((msr >> 47) & 1) ? "EN" : "DIS",
4194                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
4195                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4196                         ((msr >> 48) & 1) ? "EN" : "DIS");
4197         }
4198
4199         if (do_rapl & RAPL_DRAM_POWER_INFO) {
4200                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4201                         return -6;
4202
4203                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4204                         cpu, msr,
4205                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4206                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4207                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4208                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4209         }
4210         if (do_rapl & RAPL_DRAM) {
4211                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4212                         return -9;
4213                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4214                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4215
4216                 print_power_limit_msr(cpu, msr, "DRAM Limit");
4217         }
4218         if (do_rapl & RAPL_CORE_POLICY) {
4219                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4220                         return -7;
4221
4222                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4223         }
4224         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4225                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4226                         return -9;
4227                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4228                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4229                 print_power_limit_msr(cpu, msr, "Cores Limit");
4230         }
4231         if (do_rapl & RAPL_GFX) {
4232                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4233                         return -8;
4234
4235                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4236
4237                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4238                         return -9;
4239                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4240                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4241                 print_power_limit_msr(cpu, msr, "GFX Limit");
4242         }
4243         return 0;
4244 }
4245
4246 /*
4247  * SNB adds support for additional MSRs:
4248  *
4249  * MSR_PKG_C7_RESIDENCY            0x000003fa
4250  * MSR_CORE_C7_RESIDENCY           0x000003fe
4251  * MSR_PKG_C2_RESIDENCY            0x0000060d
4252  */
4253
4254 int has_snb_msrs(unsigned int family, unsigned int model)
4255 {
4256         if (!genuine_intel)
4257                 return 0;
4258
4259         switch (model) {
4260         case INTEL_FAM6_SANDYBRIDGE:
4261         case INTEL_FAM6_SANDYBRIDGE_X:
4262         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
4263         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
4264         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
4265         case INTEL_FAM6_HASWELL_X:      /* HSW */
4266         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4267         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
4268         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4269         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
4270         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4271         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4272         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4273         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4274         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4275         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4276         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
4277                 return 1;
4278         }
4279         return 0;
4280 }
4281
4282 /*
4283  * HSW ULT added support for C8/C9/C10 MSRs:
4284  *
4285  * MSR_PKG_C8_RESIDENCY         0x00000630
4286  * MSR_PKG_C9_RESIDENCY         0x00000631
4287  * MSR_PKG_C10_RESIDENCY        0x00000632
4288  *
4289  * MSR_PKGC8_IRTL               0x00000633
4290  * MSR_PKGC9_IRTL               0x00000634
4291  * MSR_PKGC10_IRTL              0x00000635
4292  *
4293  */
4294 int has_c8910_msrs(unsigned int family, unsigned int model)
4295 {
4296         if (!genuine_intel)
4297                 return 0;
4298
4299         switch (model) {
4300         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4301         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4302         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4303         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4304         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4305         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4306                 return 1;
4307         }
4308         return 0;
4309 }
4310
4311 /*
4312  * SKL adds support for additional MSRS:
4313  *
4314  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4315  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4316  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4317  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4318  */
4319 int has_skl_msrs(unsigned int family, unsigned int model)
4320 {
4321         if (!genuine_intel)
4322                 return 0;
4323
4324         switch (model) {
4325         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4326         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4327                 return 1;
4328         }
4329         return 0;
4330 }
4331
4332 int is_slm(unsigned int family, unsigned int model)
4333 {
4334         if (!genuine_intel)
4335                 return 0;
4336         switch (model) {
4337         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4338         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
4339                 return 1;
4340         }
4341         return 0;
4342 }
4343
4344 int is_knl(unsigned int family, unsigned int model)
4345 {
4346         if (!genuine_intel)
4347                 return 0;
4348         switch (model) {
4349         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4350                 return 1;
4351         }
4352         return 0;
4353 }
4354
4355 int is_cnl(unsigned int family, unsigned int model)
4356 {
4357         if (!genuine_intel)
4358                 return 0;
4359
4360         switch (model) {
4361         case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4362                 return 1;
4363         }
4364
4365         return 0;
4366 }
4367
4368 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4369 {
4370         if (is_knl(family, model))
4371                 return 1024;
4372         return 1;
4373 }
4374
4375 #define SLM_BCLK_FREQS 5
4376 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4377
4378 double slm_bclk(void)
4379 {
4380         unsigned long long msr = 3;
4381         unsigned int i;
4382         double freq;
4383
4384         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4385                 fprintf(outf, "SLM BCLK: unknown\n");
4386
4387         i = msr & 0xf;
4388         if (i >= SLM_BCLK_FREQS) {
4389                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4390                 i = 3;
4391         }
4392         freq = slm_freq_table[i];
4393
4394         if (!quiet)
4395                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4396
4397         return freq;
4398 }
4399
4400 double discover_bclk(unsigned int family, unsigned int model)
4401 {
4402         if (has_snb_msrs(family, model) || is_knl(family, model))
4403                 return 100.00;
4404         else if (is_slm(family, model))
4405                 return slm_bclk();
4406         else
4407                 return 133.33;
4408 }
4409
4410 /*
4411  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4412  * the Thermal Control Circuit (TCC) activates.
4413  * This is usually equal to tjMax.
4414  *
4415  * Older processors do not have this MSR, so there we guess,
4416  * but also allow cmdline over-ride with -T.
4417  *
4418  * Several MSR temperature values are in units of degrees-C
4419  * below this value, including the Digital Thermal Sensor (DTS),
4420  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4421  */
4422 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4423 {
4424         unsigned long long msr;
4425         unsigned int target_c_local;
4426         int cpu;
4427
4428         /* tcc_activation_temp is used only for dts or ptm */
4429         if (!(do_dts || do_ptm))
4430                 return 0;
4431
4432         /* this is a per-package concept */
4433         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4434                 return 0;
4435
4436         cpu = t->cpu_id;
4437         if (cpu_migrate(cpu)) {
4438                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4439                 return -1;
4440         }
4441
4442         if (tcc_activation_temp_override != 0) {
4443                 tcc_activation_temp = tcc_activation_temp_override;
4444                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4445                         cpu, tcc_activation_temp);
4446                 return 0;
4447         }
4448
4449         /* Temperature Target MSR is Nehalem and newer only */
4450         if (!do_nhm_platform_info)
4451                 goto guess;
4452
4453         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4454                 goto guess;
4455
4456         target_c_local = (msr >> 16) & 0xFF;
4457
4458         if (!quiet)
4459                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4460                         cpu, msr, target_c_local);
4461
4462         if (!target_c_local)
4463                 goto guess;
4464
4465         tcc_activation_temp = target_c_local;
4466
4467         return 0;
4468
4469 guess:
4470         tcc_activation_temp = TJMAX_DEFAULT;
4471         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4472                 cpu, tcc_activation_temp);
4473
4474         return 0;
4475 }
4476
4477 void decode_feature_control_msr(void)
4478 {
4479         unsigned long long msr;
4480
4481         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4482                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4483                         base_cpu, msr,
4484                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4485                         msr & (1 << 18) ? "SGX" : "");
4486 }
4487
4488 void decode_misc_enable_msr(void)
4489 {
4490         unsigned long long msr;
4491
4492         if (!genuine_intel)
4493                 return;
4494
4495         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4496                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4497                         base_cpu, msr,
4498                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4499                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4500                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4501                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4502                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4503 }
4504
4505 void decode_misc_feature_control(void)
4506 {
4507         unsigned long long msr;
4508
4509         if (!has_misc_feature_control)
4510                 return;
4511
4512         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4513                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4514                         base_cpu, msr,
4515                         msr & (0 << 0) ? "No-" : "",
4516                         msr & (1 << 0) ? "No-" : "",
4517                         msr & (2 << 0) ? "No-" : "",
4518                         msr & (3 << 0) ? "No-" : "");
4519 }
4520 /*
4521  * Decode MSR_MISC_PWR_MGMT
4522  *
4523  * Decode the bits according to the Nehalem documentation
4524  * bit[0] seems to continue to have same meaning going forward
4525  * bit[1] less so...
4526  */
4527 void decode_misc_pwr_mgmt_msr(void)
4528 {
4529         unsigned long long msr;
4530
4531         if (!do_nhm_platform_info)
4532                 return;
4533
4534         if (no_MSR_MISC_PWR_MGMT)
4535                 return;
4536
4537         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4538                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4539                         base_cpu, msr,
4540                         msr & (1 << 0) ? "DIS" : "EN",
4541                         msr & (1 << 1) ? "EN" : "DIS",
4542                         msr & (1 << 8) ? "EN" : "DIS");
4543 }
4544 /*
4545  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4546  *
4547  * This MSRs are present on Silvermont processors,
4548  * Intel Atom processor E3000 series (Baytrail), and friends.
4549  */
4550 void decode_c6_demotion_policy_msr(void)
4551 {
4552         unsigned long long msr;
4553
4554         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4555                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4556                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4557
4558         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4559                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4560                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4561 }
4562
4563 /*
4564  * When models are the same, for the purpose of turbostat, reuse
4565  */
4566 unsigned int intel_model_duplicates(unsigned int model)
4567 {
4568
4569         switch(model) {
4570         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
4571         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
4572         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
4573         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
4574         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
4575                 return INTEL_FAM6_NEHALEM;
4576
4577         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
4578         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
4579                 return INTEL_FAM6_NEHALEM_EX;
4580
4581         case INTEL_FAM6_XEON_PHI_KNM:
4582                 return INTEL_FAM6_XEON_PHI_KNL;
4583
4584         case INTEL_FAM6_BROADWELL_X:
4585         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
4586                 return INTEL_FAM6_BROADWELL_X;
4587
4588         case INTEL_FAM6_SKYLAKE_MOBILE:
4589         case INTEL_FAM6_SKYLAKE_DESKTOP:
4590         case INTEL_FAM6_KABYLAKE_MOBILE:
4591         case INTEL_FAM6_KABYLAKE_DESKTOP:
4592                 return INTEL_FAM6_SKYLAKE_MOBILE;
4593
4594         case INTEL_FAM6_ICELAKE_MOBILE:
4595         case INTEL_FAM6_ICELAKE_NNPI:
4596                 return INTEL_FAM6_CANNONLAKE_MOBILE;
4597
4598         case INTEL_FAM6_ATOM_TREMONT_X:
4599                 return INTEL_FAM6_ATOM_GOLDMONT_X;
4600         }
4601         return model;
4602 }
4603 void process_cpuid()
4604 {
4605         unsigned int eax, ebx, ecx, edx;
4606         unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
4607         unsigned int has_turbo;
4608
4609         eax = ebx = ecx = edx = 0;
4610
4611         __cpuid(0, max_level, ebx, ecx, edx);
4612
4613         if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
4614                 genuine_intel = 1;
4615         else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
4616                 authentic_amd = 1;
4617
4618         if (!quiet)
4619                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4620                         (char *)&ebx, (char *)&edx, (char *)&ecx);
4621
4622         __cpuid(1, fms, ebx, ecx, edx);
4623         family = (fms >> 8) & 0xf;
4624         model = (fms >> 4) & 0xf;
4625         stepping = fms & 0xf;
4626         if (family == 0xf)
4627                 family += (fms >> 20) & 0xff;
4628         if (family >= 6)
4629                 model += ((fms >> 16) & 0xf) << 4;
4630         ecx_flags = ecx;
4631         edx_flags = edx;
4632
4633         /*
4634          * check max extended function levels of CPUID.
4635          * This is needed to check for invariant TSC.
4636          * This check is valid for both Intel and AMD.
4637          */
4638         ebx = ecx = edx = 0;
4639         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4640
4641         if (!quiet) {
4642                 fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4643                         max_level, max_extended_level, family, model, stepping, family, model, stepping);
4644                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4645                         ecx_flags & (1 << 0) ? "SSE3" : "-",
4646                         ecx_flags & (1 << 3) ? "MONITOR" : "-",
4647                         ecx_flags & (1 << 6) ? "SMX" : "-",
4648                         ecx_flags & (1 << 7) ? "EIST" : "-",
4649                         ecx_flags & (1 << 8) ? "TM2" : "-",
4650                         edx_flags & (1 << 4) ? "TSC" : "-",
4651                         edx_flags & (1 << 5) ? "MSR" : "-",
4652                         edx_flags & (1 << 22) ? "ACPI-TM" : "-",
4653                         edx_flags & (1 << 28) ? "HT" : "-",
4654                         edx_flags & (1 << 29) ? "TM" : "-");
4655         }
4656         if (genuine_intel)
4657                 model = intel_model_duplicates(model);
4658
4659         if (!(edx_flags & (1 << 5)))
4660                 errx(1, "CPUID: no MSR");
4661
4662         if (max_extended_level >= 0x80000007) {
4663
4664                 /*
4665                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4666                  * this check is valid for both Intel and AMD
4667                  */
4668                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4669                 has_invariant_tsc = edx & (1 << 8);
4670         }
4671
4672         /*
4673          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4674          * this check is valid for both Intel and AMD
4675          */
4676
4677         __cpuid(0x6, eax, ebx, ecx, edx);
4678         has_aperf = ecx & (1 << 0);
4679         if (has_aperf) {
4680                 BIC_PRESENT(BIC_Avg_MHz);
4681                 BIC_PRESENT(BIC_Busy);
4682                 BIC_PRESENT(BIC_Bzy_MHz);
4683         }
4684         do_dts = eax & (1 << 0);
4685         if (do_dts)
4686                 BIC_PRESENT(BIC_CoreTmp);
4687         has_turbo = eax & (1 << 1);
4688         do_ptm = eax & (1 << 6);
4689         if (do_ptm)
4690                 BIC_PRESENT(BIC_PkgTmp);
4691         has_hwp = eax & (1 << 7);
4692         has_hwp_notify = eax & (1 << 8);
4693         has_hwp_activity_window = eax & (1 << 9);
4694         has_hwp_epp = eax & (1 << 10);
4695         has_hwp_pkg = eax & (1 << 11);
4696         has_epb = ecx & (1 << 3);
4697
4698         if (!quiet)
4699                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4700                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4701                         has_aperf ? "" : "No-",
4702                         has_turbo ? "" : "No-",
4703                         do_dts ? "" : "No-",
4704                         do_ptm ? "" : "No-",
4705                         has_hwp ? "" : "No-",
4706                         has_hwp_notify ? "" : "No-",
4707                         has_hwp_activity_window ? "" : "No-",
4708                         has_hwp_epp ? "" : "No-",
4709                         has_hwp_pkg ? "" : "No-",
4710                         has_epb ? "" : "No-");
4711
4712         if (!quiet)
4713                 decode_misc_enable_msr();
4714
4715
4716         if (max_level >= 0x7 && !quiet) {
4717                 int has_sgx;
4718
4719                 ecx = 0;
4720
4721                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4722
4723                 has_sgx = ebx & (1 << 2);
4724                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4725
4726                 if (has_sgx)
4727                         decode_feature_control_msr();
4728         }
4729
4730         if (max_level >= 0x15) {
4731                 unsigned int eax_crystal;
4732                 unsigned int ebx_tsc;
4733
4734                 /*
4735                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4736                  */
4737                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4738                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4739
4740                 if (ebx_tsc != 0) {
4741
4742                         if (!quiet && (ebx != 0))
4743                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4744                                         eax_crystal, ebx_tsc, crystal_hz);
4745
4746                         if (crystal_hz == 0)
4747                                 switch(model) {
4748                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4749                                         crystal_hz = 24000000;  /* 24.0 MHz */
4750                                         break;
4751                                 case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
4752                                         crystal_hz = 25000000;  /* 25.0 MHz */
4753                                         break;
4754                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4755                                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4756                                         crystal_hz = 19200000;  /* 19.2 MHz */
4757                                         break;
4758                                 default:
4759                                         crystal_hz = 0;
4760                         }
4761
4762                         if (crystal_hz) {
4763                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4764                                 if (!quiet)
4765                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4766                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4767                         }
4768                 }
4769         }
4770         if (max_level >= 0x16) {
4771                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
4772
4773                 /*
4774                  * CPUID 16H Base MHz, Max MHz, Bus MHz
4775                  */
4776                 base_mhz = max_mhz = bus_mhz = edx = 0;
4777
4778                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4779                 if (!quiet)
4780                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4781                                 base_mhz, max_mhz, bus_mhz);
4782         }
4783
4784         if (has_aperf)
4785                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4786
4787         BIC_PRESENT(BIC_IRQ);
4788         BIC_PRESENT(BIC_TSC_MHz);
4789
4790         if (probe_nhm_msrs(family, model)) {
4791                 do_nhm_platform_info = 1;
4792                 BIC_PRESENT(BIC_CPU_c1);
4793                 BIC_PRESENT(BIC_CPU_c3);
4794                 BIC_PRESENT(BIC_CPU_c6);
4795                 BIC_PRESENT(BIC_SMI);
4796         }
4797         do_snb_cstates = has_snb_msrs(family, model);
4798
4799         if (do_snb_cstates)
4800                 BIC_PRESENT(BIC_CPU_c7);
4801
4802         do_irtl_snb = has_snb_msrs(family, model);
4803         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4804                 BIC_PRESENT(BIC_Pkgpc2);
4805         if (pkg_cstate_limit >= PCL__3)
4806                 BIC_PRESENT(BIC_Pkgpc3);
4807         if (pkg_cstate_limit >= PCL__6)
4808                 BIC_PRESENT(BIC_Pkgpc6);
4809         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4810                 BIC_PRESENT(BIC_Pkgpc7);
4811         if (has_slv_msrs(family, model)) {
4812                 BIC_NOT_PRESENT(BIC_Pkgpc2);
4813                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4814                 BIC_PRESENT(BIC_Pkgpc6);
4815                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4816                 BIC_PRESENT(BIC_Mod_c6);
4817                 use_c1_residency_msr = 1;
4818         }
4819         if (is_dnv(family, model)) {
4820                 BIC_PRESENT(BIC_CPU_c1);
4821                 BIC_NOT_PRESENT(BIC_CPU_c3);
4822                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4823                 BIC_NOT_PRESENT(BIC_CPU_c7);
4824                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4825                 use_c1_residency_msr = 1;
4826         }
4827         if (is_skx(family, model)) {
4828                 BIC_NOT_PRESENT(BIC_CPU_c3);
4829                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4830                 BIC_NOT_PRESENT(BIC_CPU_c7);
4831                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4832         }
4833         if (is_bdx(family, model)) {
4834                 BIC_NOT_PRESENT(BIC_CPU_c7);
4835                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4836         }
4837         if (has_c8910_msrs(family, model)) {
4838                 BIC_PRESENT(BIC_Pkgpc8);
4839                 BIC_PRESENT(BIC_Pkgpc9);
4840                 BIC_PRESENT(BIC_Pkgpc10);
4841         }
4842         do_irtl_hsw = has_c8910_msrs(family, model);
4843         if (has_skl_msrs(family, model)) {
4844                 BIC_PRESENT(BIC_Totl_c0);
4845                 BIC_PRESENT(BIC_Any_c0);
4846                 BIC_PRESENT(BIC_GFX_c0);
4847                 BIC_PRESENT(BIC_CPUGFX);
4848         }
4849         do_slm_cstates = is_slm(family, model);
4850         do_knl_cstates  = is_knl(family, model);
4851
4852         if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
4853                 BIC_NOT_PRESENT(BIC_CPU_c3);
4854
4855         if (!quiet)
4856                 decode_misc_pwr_mgmt_msr();
4857
4858         if (!quiet && has_slv_msrs(family, model))
4859                 decode_c6_demotion_policy_msr();
4860
4861         rapl_probe(family, model);
4862         perf_limit_reasons_probe(family, model);
4863         automatic_cstate_conversion_probe(family, model);
4864
4865         if (!quiet)
4866                 dump_cstate_pstate_config_info(family, model);
4867
4868         if (!quiet)
4869                 dump_sysfs_cstate_config();
4870         if (!quiet)
4871                 dump_sysfs_pstate_config();
4872
4873         if (has_skl_msrs(family, model))
4874                 calculate_tsc_tweak();
4875
4876         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4877                 BIC_PRESENT(BIC_GFX_rc6);
4878
4879         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4880                 BIC_PRESENT(BIC_GFXMHz);
4881
4882         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4883                 BIC_PRESENT(BIC_CPU_LPI);
4884         else
4885                 BIC_NOT_PRESENT(BIC_CPU_LPI);
4886
4887         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
4888                 BIC_PRESENT(BIC_SYS_LPI);
4889         else
4890                 BIC_NOT_PRESENT(BIC_SYS_LPI);
4891
4892         if (!quiet)
4893                 decode_misc_feature_control();
4894
4895         return;
4896 }
4897
4898 /*
4899  * in /dev/cpu/ return success for names that are numbers
4900  * ie. filter out ".", "..", "microcode".
4901  */
4902 int dir_filter(const struct dirent *dirp)
4903 {
4904         if (isdigit(dirp->d_name[0]))
4905                 return 1;
4906         else
4907                 return 0;
4908 }
4909
4910 int open_dev_cpu_msr(int dummy1)
4911 {
4912         return 0;
4913 }
4914
4915 void topology_probe()
4916 {
4917         int i;
4918         int max_core_id = 0;
4919         int max_package_id = 0;
4920         int max_die_id = 0;
4921         int max_siblings = 0;
4922
4923         /* Initialize num_cpus, max_cpu_num */
4924         set_max_cpu_num();
4925         topo.num_cpus = 0;
4926         for_all_proc_cpus(count_cpus);
4927         if (!summary_only && topo.num_cpus > 1)
4928                 BIC_PRESENT(BIC_CPU);
4929
4930         if (debug > 1)
4931                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4932
4933         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4934         if (cpus == NULL)
4935                 err(1, "calloc cpus");
4936
4937         /*
4938          * Allocate and initialize cpu_present_set
4939          */
4940         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4941         if (cpu_present_set == NULL)
4942                 err(3, "CPU_ALLOC");
4943         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4944         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4945         for_all_proc_cpus(mark_cpu_present);
4946
4947         /*
4948          * Validate that all cpus in cpu_subset are also in cpu_present_set
4949          */
4950         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4951                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4952                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4953                                 err(1, "cpu%d not present", i);
4954         }
4955
4956         /*
4957          * Allocate and initialize cpu_affinity_set
4958          */
4959         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4960         if (cpu_affinity_set == NULL)
4961                 err(3, "CPU_ALLOC");
4962         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4963         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4964
4965         for_all_proc_cpus(init_thread_id);
4966
4967         /*
4968          * For online cpus
4969          * find max_core_id, max_package_id
4970          */
4971         for (i = 0; i <= topo.max_cpu_num; ++i) {
4972                 int siblings;
4973
4974                 if (cpu_is_not_present(i)) {
4975                         if (debug > 1)
4976                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4977                         continue;
4978                 }
4979
4980                 cpus[i].logical_cpu_id = i;
4981
4982                 /* get package information */
4983                 cpus[i].physical_package_id = get_physical_package_id(i);
4984                 if (cpus[i].physical_package_id > max_package_id)
4985                         max_package_id = cpus[i].physical_package_id;
4986
4987                 /* get die information */
4988                 cpus[i].die_id = get_die_id(i);
4989                 if (cpus[i].die_id > max_die_id)
4990                         max_die_id = cpus[i].die_id;
4991
4992                 /* get numa node information */
4993                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
4994                 if (cpus[i].physical_node_id > topo.max_node_num)
4995                         topo.max_node_num = cpus[i].physical_node_id;
4996
4997                 /* get core information */
4998                 cpus[i].physical_core_id = get_core_id(i);
4999                 if (cpus[i].physical_core_id > max_core_id)
5000                         max_core_id = cpus[i].physical_core_id;
5001
5002                 /* get thread information */
5003                 siblings = get_thread_siblings(&cpus[i]);
5004                 if (siblings > max_siblings)
5005                         max_siblings = siblings;
5006                 if (cpus[i].thread_id == 0)
5007                         topo.num_cores++;
5008         }
5009
5010         topo.cores_per_node = max_core_id + 1;
5011         if (debug > 1)
5012                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
5013                         max_core_id, topo.cores_per_node);
5014         if (!summary_only && topo.cores_per_node > 1)
5015                 BIC_PRESENT(BIC_Core);
5016
5017         topo.num_die = max_die_id + 1;
5018         if (debug > 1)
5019                 fprintf(outf, "max_die_id %d, sizing for %d die\n",
5020                                 max_die_id, topo.num_die);
5021         if (!summary_only && topo.num_die > 1)
5022                 BIC_PRESENT(BIC_Die);
5023
5024         topo.num_packages = max_package_id + 1;
5025         if (debug > 1)
5026                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
5027                         max_package_id, topo.num_packages);
5028         if (!summary_only && topo.num_packages > 1)
5029                 BIC_PRESENT(BIC_Package);
5030
5031         set_node_data();
5032         if (debug > 1)
5033                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5034         if (!summary_only && topo.nodes_per_pkg > 1)
5035                 BIC_PRESENT(BIC_Node);
5036
5037         topo.threads_per_core = max_siblings;
5038         if (debug > 1)
5039                 fprintf(outf, "max_siblings %d\n", max_siblings);
5040
5041         if (debug < 1)
5042                 return;
5043
5044         for (i = 0; i <= topo.max_cpu_num; ++i) {
5045                 if (cpu_is_not_present(i))
5046                         continue;
5047                 fprintf(outf,
5048                         "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
5049                         i, cpus[i].physical_package_id, cpus[i].die_id,
5050                         cpus[i].physical_node_id,
5051                         cpus[i].logical_node_id,
5052                         cpus[i].physical_core_id,
5053                         cpus[i].thread_id);
5054         }
5055
5056 }
5057
5058 void
5059 allocate_counters(struct thread_data **t, struct core_data **c,
5060                   struct pkg_data **p)
5061 {
5062         int i;
5063         int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
5064                         topo.num_packages;
5065         int num_threads = topo.threads_per_core * num_cores;
5066
5067         *t = calloc(num_threads, sizeof(struct thread_data));
5068         if (*t == NULL)
5069                 goto error;
5070
5071         for (i = 0; i < num_threads; i++)
5072                 (*t)[i].cpu_id = -1;
5073
5074         *c = calloc(num_cores, sizeof(struct core_data));
5075         if (*c == NULL)
5076                 goto error;
5077
5078         for (i = 0; i < num_cores; i++)
5079                 (*c)[i].core_id = -1;
5080
5081         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
5082         if (*p == NULL)
5083                 goto error;
5084
5085         for (i = 0; i < topo.num_packages; i++)
5086                 (*p)[i].package_id = i;
5087
5088         return;
5089 error:
5090         err(1, "calloc counters");
5091 }
5092 /*
5093  * init_counter()
5094  *
5095  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
5096  */
5097 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
5098         struct pkg_data *pkg_base, int cpu_id)
5099 {
5100         int pkg_id = cpus[cpu_id].physical_package_id;
5101         int node_id = cpus[cpu_id].logical_node_id;
5102         int core_id = cpus[cpu_id].physical_core_id;
5103         int thread_id = cpus[cpu_id].thread_id;
5104         struct thread_data *t;
5105         struct core_data *c;
5106         struct pkg_data *p;
5107
5108
5109         /* Workaround for systems where physical_node_id==-1
5110          * and logical_node_id==(-1 - topo.num_cpus)
5111          */
5112         if (node_id < 0)
5113                 node_id = 0;
5114
5115         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
5116         c = GET_CORE(core_base, core_id, node_id, pkg_id);
5117         p = GET_PKG(pkg_base, pkg_id);
5118
5119         t->cpu_id = cpu_id;
5120         if (thread_id == 0) {
5121                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
5122                 if (cpu_is_first_core_in_package(cpu_id))
5123                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
5124         }
5125
5126         c->core_id = core_id;
5127         p->package_id = pkg_id;
5128 }
5129
5130
5131 int initialize_counters(int cpu_id)
5132 {
5133         init_counter(EVEN_COUNTERS, cpu_id);
5134         init_counter(ODD_COUNTERS, cpu_id);
5135         return 0;
5136 }
5137
5138 void allocate_output_buffer()
5139 {
5140         output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
5141         outp = output_buffer;
5142         if (outp == NULL)
5143                 err(-1, "calloc output buffer");
5144 }
5145 void allocate_fd_percpu(void)
5146 {
5147         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5148         if (fd_percpu == NULL)
5149                 err(-1, "calloc fd_percpu");
5150 }
5151 void allocate_irq_buffers(void)
5152 {
5153         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
5154         if (irq_column_2_cpu == NULL)
5155                 err(-1, "calloc %d", topo.num_cpus);
5156
5157         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5158         if (irqs_per_cpu == NULL)
5159                 err(-1, "calloc %d", topo.max_cpu_num + 1);
5160 }
5161 void setup_all_buffers(void)
5162 {
5163         topology_probe();
5164         allocate_irq_buffers();
5165         allocate_fd_percpu();
5166         allocate_counters(&thread_even, &core_even, &package_even);
5167         allocate_counters(&thread_odd, &core_odd, &package_odd);
5168         allocate_output_buffer();
5169         for_all_proc_cpus(initialize_counters);
5170 }
5171
5172 void set_base_cpu(void)
5173 {
5174         base_cpu = sched_getcpu();
5175         if (base_cpu < 0)
5176                 err(-ENODEV, "No valid cpus found");
5177
5178         if (debug > 1)
5179                 fprintf(outf, "base_cpu = %d\n", base_cpu);
5180 }
5181
5182 void turbostat_init()
5183 {
5184         setup_all_buffers();
5185         set_base_cpu();
5186         check_dev_msr();
5187         check_permissions();
5188         process_cpuid();
5189
5190
5191         if (!quiet)
5192                 for_all_cpus(print_hwp, ODD_COUNTERS);
5193
5194         if (!quiet)
5195                 for_all_cpus(print_epb, ODD_COUNTERS);
5196
5197         if (!quiet)
5198                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
5199
5200         if (!quiet)
5201                 for_all_cpus(print_rapl, ODD_COUNTERS);
5202
5203         for_all_cpus(set_temperature_target, ODD_COUNTERS);
5204
5205         if (!quiet)
5206                 for_all_cpus(print_thermal, ODD_COUNTERS);
5207
5208         if (!quiet && do_irtl_snb)
5209                 print_irtl();
5210 }
5211
5212 int fork_it(char **argv)
5213 {
5214         pid_t child_pid;
5215         int status;
5216
5217         snapshot_proc_sysfs_files();
5218         status = for_all_cpus(get_counters, EVEN_COUNTERS);
5219         first_counter_read = 0;
5220         if (status)
5221                 exit(status);
5222         /* clear affinity side-effect of get_counters() */
5223         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5224         gettimeofday(&tv_even, (struct timezone *)NULL);
5225
5226         child_pid = fork();
5227         if (!child_pid) {
5228                 /* child */
5229                 execvp(argv[0], argv);
5230                 err(errno, "exec %s", argv[0]);
5231         } else {
5232
5233                 /* parent */
5234                 if (child_pid == -1)
5235                         err(1, "fork");
5236
5237                 signal(SIGINT, SIG_IGN);
5238                 signal(SIGQUIT, SIG_IGN);
5239                 if (waitpid(child_pid, &status, 0) == -1)
5240                         err(status, "waitpid");
5241
5242                 if (WIFEXITED(status))
5243                         status = WEXITSTATUS(status);
5244         }
5245         /*
5246          * n.b. fork_it() does not check for errors from for_all_cpus()
5247          * because re-starting is problematic when forking
5248          */
5249         snapshot_proc_sysfs_files();
5250         for_all_cpus(get_counters, ODD_COUNTERS);
5251         gettimeofday(&tv_odd, (struct timezone *)NULL);
5252         timersub(&tv_odd, &tv_even, &tv_delta);
5253         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5254                 fprintf(outf, "%s: Counter reset detected\n", progname);
5255         else {
5256                 compute_average(EVEN_COUNTERS);
5257                 format_all_counters(EVEN_COUNTERS);
5258         }
5259
5260         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5261
5262         flush_output_stderr();
5263
5264         return status;
5265 }
5266
5267 int get_and_dump_counters(void)
5268 {
5269         int status;
5270
5271         snapshot_proc_sysfs_files();
5272         status = for_all_cpus(get_counters, ODD_COUNTERS);
5273         if (status)
5274                 return status;
5275
5276         status = for_all_cpus(dump_counters, ODD_COUNTERS);
5277         if (status)
5278                 return status;
5279
5280         flush_output_stdout();
5281
5282         return status;
5283 }
5284
5285 void print_version() {
5286         fprintf(outf, "turbostat version 19.03.20"
5287                 " - Len Brown <lenb@kernel.org>\n");
5288 }
5289
5290 int add_counter(unsigned int msr_num, char *path, char *name,
5291         unsigned int width, enum counter_scope scope,
5292         enum counter_type type, enum counter_format format, int flags)
5293 {
5294         struct msr_counter *msrp;
5295
5296         msrp = calloc(1, sizeof(struct msr_counter));
5297         if (msrp == NULL) {
5298                 perror("calloc");
5299                 exit(1);
5300         }
5301
5302         msrp->msr_num = msr_num;
5303         strncpy(msrp->name, name, NAME_BYTES);
5304         if (path)
5305                 strncpy(msrp->path, path, PATH_BYTES);
5306         msrp->width = width;
5307         msrp->type = type;
5308         msrp->format = format;
5309         msrp->flags = flags;
5310
5311         switch (scope) {
5312
5313         case SCOPE_CPU:
5314                 msrp->next = sys.tp;
5315                 sys.tp = msrp;
5316                 sys.added_thread_counters++;
5317                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5318                         fprintf(stderr, "exceeded max %d added thread counters\n",
5319                                 MAX_ADDED_COUNTERS);
5320                         exit(-1);
5321                 }
5322                 break;
5323
5324         case SCOPE_CORE:
5325                 msrp->next = sys.cp;
5326                 sys.cp = msrp;
5327                 sys.added_core_counters++;
5328                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5329                         fprintf(stderr, "exceeded max %d added core counters\n",
5330                                 MAX_ADDED_COUNTERS);
5331                         exit(-1);
5332                 }
5333                 break;
5334
5335         case SCOPE_PACKAGE:
5336                 msrp->next = sys.pp;
5337                 sys.pp = msrp;
5338                 sys.added_package_counters++;
5339                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5340                         fprintf(stderr, "exceeded max %d added package counters\n",
5341                                 MAX_ADDED_COUNTERS);
5342                         exit(-1);
5343                 }
5344                 break;
5345         }
5346
5347         return 0;
5348 }
5349
5350 void parse_add_command(char *add_command)
5351 {
5352         int msr_num = 0;
5353         char *path = NULL;
5354         char name_buffer[NAME_BYTES] = "";
5355         int width = 64;
5356         int fail = 0;
5357         enum counter_scope scope = SCOPE_CPU;
5358         enum counter_type type = COUNTER_CYCLES;
5359         enum counter_format format = FORMAT_DELTA;
5360
5361         while (add_command) {
5362
5363                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5364                         goto next;
5365
5366                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
5367                         goto next;
5368
5369                 if (*add_command == '/') {
5370                         path = add_command;
5371                         goto next;
5372                 }
5373
5374                 if (sscanf(add_command, "u%d", &width) == 1) {
5375                         if ((width == 32) || (width == 64))
5376                                 goto next;
5377                         width = 64;
5378                 }
5379                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5380                         scope = SCOPE_CPU;
5381                         goto next;
5382                 }
5383                 if (!strncmp(add_command, "core", strlen("core"))) {
5384                         scope = SCOPE_CORE;
5385                         goto next;
5386                 }
5387                 if (!strncmp(add_command, "package", strlen("package"))) {
5388                         scope = SCOPE_PACKAGE;
5389                         goto next;
5390                 }
5391                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5392                         type = COUNTER_CYCLES;
5393                         goto next;
5394                 }
5395                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5396                         type = COUNTER_SECONDS;
5397                         goto next;
5398                 }
5399                 if (!strncmp(add_command, "usec", strlen("usec"))) {
5400                         type = COUNTER_USEC;
5401                         goto next;
5402                 }
5403                 if (!strncmp(add_command, "raw", strlen("raw"))) {
5404                         format = FORMAT_RAW;
5405                         goto next;
5406                 }
5407                 if (!strncmp(add_command, "delta", strlen("delta"))) {
5408                         format = FORMAT_DELTA;
5409                         goto next;
5410                 }
5411                 if (!strncmp(add_command, "percent", strlen("percent"))) {
5412                         format = FORMAT_PERCENT;
5413                         goto next;
5414                 }
5415
5416                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
5417                         char *eos;
5418
5419                         eos = strchr(name_buffer, ',');
5420                         if (eos)
5421                                 *eos = '\0';
5422                         goto next;
5423                 }
5424
5425 next:
5426                 add_command = strchr(add_command, ',');
5427                 if (add_command) {
5428                         *add_command = '\0';
5429                         add_command++;
5430                 }
5431
5432         }
5433         if ((msr_num == 0) && (path == NULL)) {
5434                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5435                 fail++;
5436         }
5437
5438         /* generate default column header */
5439         if (*name_buffer == '\0') {
5440                 if (width == 32)
5441                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5442                 else
5443                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5444         }
5445
5446         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5447                 fail++;
5448
5449         if (fail) {
5450                 help();
5451                 exit(1);
5452         }
5453 }
5454
5455 int is_deferred_skip(char *name)
5456 {
5457         int i;
5458
5459         for (i = 0; i < deferred_skip_index; ++i)
5460                 if (!strcmp(name, deferred_skip_names[i]))
5461                         return 1;
5462         return 0;
5463 }
5464
5465 void probe_sysfs(void)
5466 {
5467         char path[64];
5468         char name_buf[16];
5469         FILE *input;
5470         int state;
5471         char *sp;
5472
5473         if (!DO_BIC(BIC_sysfs))
5474                 return;
5475
5476         for (state = 10; state >= 0; --state) {
5477
5478                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5479                         base_cpu, state);
5480                 input = fopen(path, "r");
5481                 if (input == NULL)
5482                         continue;
5483                 if (!fgets(name_buf, sizeof(name_buf), input))
5484                         err(1, "%s: failed to read file", path);
5485
5486                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5487                 sp = strchr(name_buf, '-');
5488                 if (!sp)
5489                         sp = strchrnul(name_buf, '\n');
5490                 *sp = '%';
5491                 *(sp + 1) = '\0';
5492
5493                 fclose(input);
5494
5495                 sprintf(path, "cpuidle/state%d/time", state);
5496
5497                 if (is_deferred_skip(name_buf))
5498                         continue;
5499
5500                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5501                                 FORMAT_PERCENT, SYSFS_PERCPU);
5502         }
5503
5504         for (state = 10; state >= 0; --state) {
5505
5506                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5507                         base_cpu, state);
5508                 input = fopen(path, "r");
5509                 if (input == NULL)
5510                         continue;
5511                 if (!fgets(name_buf, sizeof(name_buf), input))
5512                         err(1, "%s: failed to read file", path);
5513                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5514                 sp = strchr(name_buf, '-');
5515                 if (!sp)
5516                         sp = strchrnul(name_buf, '\n');
5517                 *sp = '\0';
5518                 fclose(input);
5519
5520                 sprintf(path, "cpuidle/state%d/usage", state);
5521
5522                 if (is_deferred_skip(name_buf))
5523                         continue;
5524
5525                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5526                                 FORMAT_DELTA, SYSFS_PERCPU);
5527         }
5528
5529 }
5530
5531
5532 /*
5533  * parse cpuset with following syntax
5534  * 1,2,4..6,8-10 and set bits in cpu_subset
5535  */
5536 void parse_cpu_command(char *optarg)
5537 {
5538         unsigned int start, end;
5539         char *next;
5540
5541         if (!strcmp(optarg, "core")) {
5542                 if (cpu_subset)
5543                         goto error;
5544                 show_core_only++;
5545                 return;
5546         }
5547         if (!strcmp(optarg, "package")) {
5548                 if (cpu_subset)
5549                         goto error;
5550                 show_pkg_only++;
5551                 return;
5552         }
5553         if (show_core_only || show_pkg_only)
5554                 goto error;
5555
5556         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5557         if (cpu_subset == NULL)
5558                 err(3, "CPU_ALLOC");
5559         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5560
5561         CPU_ZERO_S(cpu_subset_size, cpu_subset);
5562
5563         next = optarg;
5564
5565         while (next && *next) {
5566
5567                 if (*next == '-')       /* no negative cpu numbers */
5568                         goto error;
5569
5570                 start = strtoul(next, &next, 10);
5571
5572                 if (start >= CPU_SUBSET_MAXCPUS)
5573                         goto error;
5574                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
5575
5576                 if (*next == '\0')
5577                         break;
5578
5579                 if (*next == ',') {
5580                         next += 1;
5581                         continue;
5582                 }
5583
5584                 if (*next == '-') {
5585                         next += 1;      /* start range */
5586                 } else if (*next == '.') {
5587                         next += 1;
5588                         if (*next == '.')
5589                                 next += 1;      /* start range */
5590                         else
5591                                 goto error;
5592                 }
5593
5594                 end = strtoul(next, &next, 10);
5595                 if (end <= start)
5596                         goto error;
5597
5598                 while (++start <= end) {
5599                         if (start >= CPU_SUBSET_MAXCPUS)
5600                                 goto error;
5601                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
5602                 }
5603
5604                 if (*next == ',')
5605                         next += 1;
5606                 else if (*next != '\0')
5607                         goto error;
5608         }
5609
5610         return;
5611
5612 error:
5613         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5614         help();
5615         exit(-1);
5616 }
5617
5618
5619 void cmdline(int argc, char **argv)
5620 {
5621         int opt;
5622         int option_index = 0;
5623         static struct option long_options[] = {
5624                 {"add",         required_argument,      0, 'a'},
5625                 {"cpu",         required_argument,      0, 'c'},
5626                 {"Dump",        no_argument,            0, 'D'},
5627                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5628                 {"enable",      required_argument,      0, 'e'},
5629                 {"interval",    required_argument,      0, 'i'},
5630                 {"num_iterations",      required_argument,      0, 'n'},
5631                 {"help",        no_argument,            0, 'h'},
5632                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5633                 {"Joules",      no_argument,            0, 'J'},
5634                 {"list",        no_argument,            0, 'l'},
5635                 {"out",         required_argument,      0, 'o'},
5636                 {"quiet",       no_argument,            0, 'q'},
5637                 {"show",        required_argument,      0, 's'},
5638                 {"Summary",     no_argument,            0, 'S'},
5639                 {"TCC",         required_argument,      0, 'T'},
5640                 {"version",     no_argument,            0, 'v' },
5641                 {0,             0,                      0,  0 }
5642         };
5643
5644         progname = argv[0];
5645
5646         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5647                                 long_options, &option_index)) != -1) {
5648                 switch (opt) {
5649                 case 'a':
5650                         parse_add_command(optarg);
5651                         break;
5652                 case 'c':
5653                         parse_cpu_command(optarg);
5654                         break;
5655                 case 'D':
5656                         dump_only++;
5657                         break;
5658                 case 'e':
5659                         /* --enable specified counter */
5660                         bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5661                         break;
5662                 case 'd':
5663                         debug++;
5664                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5665                         break;
5666                 case 'H':
5667                         /*
5668                          * --hide: do not show those specified
5669                          *  multiple invocations simply clear more bits in enabled mask
5670                          */
5671                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5672                         break;
5673                 case 'h':
5674                 default:
5675                         help();
5676                         exit(1);
5677                 case 'i':
5678                         {
5679                                 double interval = strtod(optarg, NULL);
5680
5681                                 if (interval < 0.001) {
5682                                         fprintf(outf, "interval %f seconds is too small\n",
5683                                                 interval);
5684                                         exit(2);
5685                                 }
5686
5687                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5688                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5689                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5690                         }
5691                         break;
5692                 case 'J':
5693                         rapl_joules++;
5694                         break;
5695                 case 'l':
5696                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5697                         list_header_only++;
5698                         quiet++;
5699                         break;
5700                 case 'o':
5701                         outf = fopen_or_die(optarg, "w");
5702                         break;
5703                 case 'q':
5704                         quiet = 1;
5705                         break;
5706                 case 'n':
5707                         num_iterations = strtod(optarg, NULL);
5708
5709                         if (num_iterations <= 0) {
5710                                 fprintf(outf, "iterations %d should be positive number\n",
5711                                         num_iterations);
5712                                 exit(2);
5713                         }
5714                         break;
5715                 case 's':
5716                         /*
5717                          * --show: show only those specified
5718                          *  The 1st invocation will clear and replace the enabled mask
5719                          *  subsequent invocations can add to it.
5720                          */
5721                         if (shown == 0)
5722                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5723                         else
5724                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5725                         shown = 1;
5726                         break;
5727                 case 'S':
5728                         summary_only++;
5729                         break;
5730                 case 'T':
5731                         tcc_activation_temp_override = atoi(optarg);
5732                         break;
5733                 case 'v':
5734                         print_version();
5735                         exit(0);
5736                         break;
5737                 }
5738         }
5739 }
5740
5741 int main(int argc, char **argv)
5742 {
5743         outf = stderr;
5744         cmdline(argc, argv);
5745
5746         if (!quiet)
5747                 print_version();
5748
5749         probe_sysfs();
5750
5751         turbostat_init();
5752
5753         /* dump counters and exit */
5754         if (dump_only)
5755                 return get_and_dump_counters();
5756
5757         /* list header and exit */
5758         if (list_header_only) {
5759                 print_header(",");
5760                 flush_output_stdout();
5761                 return 0;
5762         }
5763
5764         /*
5765          * if any params left, it must be a command to fork
5766          */
5767         if (argc - optind)
5768                 return fork_it(argv + optind);
5769         else
5770                 turbostat_loop();
5771
5772         return 0;
5773 }