Merge tag 'backlight-next-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/lee...
[linux-2.6-microblaze.git] / tools / power / x86 / turbostat / turbostat.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * turbostat -- show CPU frequency and C-state residency
4  * on modern Intel and AMD processors.
5  *
6  * Copyright (c) 2013 Intel Corporation.
7  * Len Brown <len.brown@intel.com>
8  */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <sys/capability.h>
34 #include <errno.h>
35 #include <math.h>
36
37 char *proc_stat = "/proc/stat";
38 FILE *outf;
39 int *fd_percpu;
40 struct timeval interval_tv = {5, 0};
41 struct timespec interval_ts = {5, 0};
42 unsigned int num_iterations;
43 unsigned int debug;
44 unsigned int quiet;
45 unsigned int shown;
46 unsigned int sums_need_wide_columns;
47 unsigned int rapl_joules;
48 unsigned int summary_only;
49 unsigned int list_header_only;
50 unsigned int dump_only;
51 unsigned int do_snb_cstates;
52 unsigned int do_knl_cstates;
53 unsigned int do_slm_cstates;
54 unsigned int use_c1_residency_msr;
55 unsigned int has_aperf;
56 unsigned int has_epb;
57 unsigned int do_irtl_snb;
58 unsigned int do_irtl_hsw;
59 unsigned int units = 1000000;   /* MHz etc */
60 unsigned int genuine_intel;
61 unsigned int authentic_amd;
62 unsigned int hygon_genuine;
63 unsigned int max_level, max_extended_level;
64 unsigned int has_invariant_tsc;
65 unsigned int do_nhm_platform_info;
66 unsigned int no_MSR_MISC_PWR_MGMT;
67 unsigned int aperf_mperf_multiplier = 1;
68 double bclk;
69 double base_hz;
70 unsigned int has_base_hz;
71 double tsc_tweak = 1.0;
72 unsigned int show_pkg_only;
73 unsigned int show_core_only;
74 char *output_buffer, *outp;
75 unsigned int do_rapl;
76 unsigned int do_dts;
77 unsigned int do_ptm;
78 unsigned long long  gfx_cur_rc6_ms;
79 unsigned long long cpuidle_cur_cpu_lpi_us;
80 unsigned long long cpuidle_cur_sys_lpi_us;
81 unsigned int gfx_cur_mhz;
82 unsigned int tcc_activation_temp;
83 unsigned int tcc_activation_temp_override;
84 double rapl_power_units, rapl_time_units;
85 double rapl_dram_energy_units, rapl_energy_units;
86 double rapl_joule_counter_range;
87 unsigned int do_core_perf_limit_reasons;
88 unsigned int has_automatic_cstate_conversion;
89 unsigned int do_gfx_perf_limit_reasons;
90 unsigned int do_ring_perf_limit_reasons;
91 unsigned int crystal_hz;
92 unsigned long long tsc_hz;
93 int base_cpu;
94 double discover_bclk(unsigned int family, unsigned int model);
95 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
96                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
97 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
98 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
99 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
100 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
101 unsigned int has_misc_feature_control;
102 unsigned int first_counter_read = 1;
103 int ignore_stdin;
104
105 #define RAPL_PKG                (1 << 0)
106                                         /* 0x610 MSR_PKG_POWER_LIMIT */
107                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
108 #define RAPL_PKG_PERF_STATUS    (1 << 1)
109                                         /* 0x613 MSR_PKG_PERF_STATUS */
110 #define RAPL_PKG_POWER_INFO     (1 << 2)
111                                         /* 0x614 MSR_PKG_POWER_INFO */
112
113 #define RAPL_DRAM               (1 << 3)
114                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
115                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
116 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
117                                         /* 0x61b MSR_DRAM_PERF_STATUS */
118 #define RAPL_DRAM_POWER_INFO    (1 << 5)
119                                         /* 0x61c MSR_DRAM_POWER_INFO */
120
121 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
122                                         /* 0x638 MSR_PP0_POWER_LIMIT */
123 #define RAPL_CORE_POLICY        (1 << 7)
124                                         /* 0x63a MSR_PP0_POLICY */
125
126 #define RAPL_GFX                (1 << 8)
127                                         /* 0x640 MSR_PP1_POWER_LIMIT */
128                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
129                                         /* 0x642 MSR_PP1_POLICY */
130
131 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
132                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
133 #define RAPL_PER_CORE_ENERGY    (1 << 10)
134                                         /* Indicates cores energy collection is per-core,
135                                          * not per-package. */
136 #define RAPL_AMD_F17H           (1 << 11)
137                                         /* 0xc0010299 MSR_RAPL_PWR_UNIT */
138                                         /* 0xc001029a MSR_CORE_ENERGY_STAT */
139                                         /* 0xc001029b MSR_PKG_ENERGY_STAT */
140 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
141 #define TJMAX_DEFAULT   100
142
143 /* MSRs that are not yet in the kernel-provided header. */
144 #define MSR_RAPL_PWR_UNIT       0xc0010299
145 #define MSR_CORE_ENERGY_STAT    0xc001029a
146 #define MSR_PKG_ENERGY_STAT     0xc001029b
147
148 #define MAX(a, b) ((a) > (b) ? (a) : (b))
149
150 /*
151  * buffer size used by sscanf() for added column names
152  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
153  */
154 #define NAME_BYTES 20
155 #define PATH_BYTES 128
156
157 int backwards_count;
158 char *progname;
159
160 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
161 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
162 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
163 #define MAX_ADDED_COUNTERS 8
164 #define MAX_ADDED_THREAD_COUNTERS 24
165 #define BITMASK_SIZE 32
166
167 struct thread_data {
168         struct timeval tv_begin;
169         struct timeval tv_end;
170         struct timeval tv_delta;
171         unsigned long long tsc;
172         unsigned long long aperf;
173         unsigned long long mperf;
174         unsigned long long c1;
175         unsigned long long  irq_count;
176         unsigned int smi_count;
177         unsigned int cpu_id;
178         unsigned int apic_id;
179         unsigned int x2apic_id;
180         unsigned int flags;
181 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
182 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
183         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
184 } *thread_even, *thread_odd;
185
186 struct core_data {
187         unsigned long long c3;
188         unsigned long long c6;
189         unsigned long long c7;
190         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
191         unsigned int core_temp_c;
192         unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
193         unsigned int core_id;
194         unsigned long long counter[MAX_ADDED_COUNTERS];
195 } *core_even, *core_odd;
196
197 struct pkg_data {
198         unsigned long long pc2;
199         unsigned long long pc3;
200         unsigned long long pc6;
201         unsigned long long pc7;
202         unsigned long long pc8;
203         unsigned long long pc9;
204         unsigned long long pc10;
205         unsigned long long cpu_lpi;
206         unsigned long long sys_lpi;
207         unsigned long long pkg_wtd_core_c0;
208         unsigned long long pkg_any_core_c0;
209         unsigned long long pkg_any_gfxe_c0;
210         unsigned long long pkg_both_core_gfxe_c0;
211         long long gfx_rc6_ms;
212         unsigned int gfx_mhz;
213         unsigned int package_id;
214         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
215         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
216         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
217         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
218         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
219         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
220         unsigned int pkg_temp_c;
221         unsigned long long counter[MAX_ADDED_COUNTERS];
222 } *package_even, *package_odd;
223
224 #define ODD_COUNTERS thread_odd, core_odd, package_odd
225 #define EVEN_COUNTERS thread_even, core_even, package_even
226
227 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
228         ((thread_base) +                                                      \
229          ((pkg_no) *                                                          \
230           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
231          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
232          ((core_no) * topo.threads_per_core) +                                \
233          (thread_no))
234
235 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
236         ((core_base) +                                                  \
237          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
238          ((node_no) * topo.cores_per_node) +                            \
239          (core_no))
240
241
242 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
243
244 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
245 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
246 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
247
248 struct msr_counter {
249         unsigned int msr_num;
250         char name[NAME_BYTES];
251         char path[PATH_BYTES];
252         unsigned int width;
253         enum counter_type type;
254         enum counter_format format;
255         struct msr_counter *next;
256         unsigned int flags;
257 #define FLAGS_HIDE      (1 << 0)
258 #define FLAGS_SHOW      (1 << 1)
259 #define SYSFS_PERCPU    (1 << 1)
260 };
261
262 struct sys_counters {
263         unsigned int added_thread_counters;
264         unsigned int added_core_counters;
265         unsigned int added_package_counters;
266         struct msr_counter *tp;
267         struct msr_counter *cp;
268         struct msr_counter *pp;
269 } sys;
270
271 struct system_summary {
272         struct thread_data threads;
273         struct core_data cores;
274         struct pkg_data packages;
275 } average;
276
277 struct cpu_topology {
278         int physical_package_id;
279         int die_id;
280         int logical_cpu_id;
281         int physical_node_id;
282         int logical_node_id;    /* 0-based count within the package */
283         int physical_core_id;
284         int thread_id;
285         cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
286 } *cpus;
287
288 struct topo_params {
289         int num_packages;
290         int num_die;
291         int num_cpus;
292         int num_cores;
293         int max_cpu_num;
294         int max_node_num;
295         int nodes_per_pkg;
296         int cores_per_node;
297         int threads_per_core;
298 } topo;
299
300 struct timeval tv_even, tv_odd, tv_delta;
301
302 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
303 int *irqs_per_cpu;              /* indexed by cpu_num */
304
305 void setup_all_buffers(void);
306
307 char *sys_lpi_file;
308 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
309 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
310
311 int cpu_is_not_present(int cpu)
312 {
313         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
314 }
315 /*
316  * run func(thread, core, package) in topology order
317  * skip non-present cpus
318  */
319
320 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
321         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
322 {
323         int retval, pkg_no, core_no, thread_no, node_no;
324
325         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
326                 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
327                         for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
328                                 for (thread_no = 0; thread_no <
329                                         topo.threads_per_core; ++thread_no) {
330                                         struct thread_data *t;
331                                         struct core_data *c;
332                                         struct pkg_data *p;
333
334                                         t = GET_THREAD(thread_base, thread_no,
335                                                        core_no, node_no,
336                                                        pkg_no);
337
338                                         if (cpu_is_not_present(t->cpu_id))
339                                                 continue;
340
341                                         c = GET_CORE(core_base, core_no,
342                                                      node_no, pkg_no);
343                                         p = GET_PKG(pkg_base, pkg_no);
344
345                                         retval = func(t, c, p);
346                                         if (retval)
347                                                 return retval;
348                                 }
349                         }
350                 }
351         }
352         return 0;
353 }
354
355 int cpu_migrate(int cpu)
356 {
357         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
358         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
359         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
360                 return -1;
361         else
362                 return 0;
363 }
364 int get_msr_fd(int cpu)
365 {
366         char pathname[32];
367         int fd;
368
369         fd = fd_percpu[cpu];
370
371         if (fd)
372                 return fd;
373
374         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
375         fd = open(pathname, O_RDONLY);
376         if (fd < 0)
377                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
378
379         fd_percpu[cpu] = fd;
380
381         return fd;
382 }
383
384 int get_msr(int cpu, off_t offset, unsigned long long *msr)
385 {
386         ssize_t retval;
387
388         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
389
390         if (retval != sizeof *msr)
391                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
392
393         return 0;
394 }
395
396 /*
397  * This list matches the column headers, except
398  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
399  * 2. Core and CPU are moved to the end, we can't have strings that contain them
400  *    matching on them for --show and --hide.
401  */
402 struct msr_counter bic[] = {
403         { 0x0, "usec" },
404         { 0x0, "Time_Of_Day_Seconds" },
405         { 0x0, "Package" },
406         { 0x0, "Node" },
407         { 0x0, "Avg_MHz" },
408         { 0x0, "Busy%" },
409         { 0x0, "Bzy_MHz" },
410         { 0x0, "TSC_MHz" },
411         { 0x0, "IRQ" },
412         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
413         { 0x0, "sysfs" },
414         { 0x0, "CPU%c1" },
415         { 0x0, "CPU%c3" },
416         { 0x0, "CPU%c6" },
417         { 0x0, "CPU%c7" },
418         { 0x0, "ThreadC" },
419         { 0x0, "CoreTmp" },
420         { 0x0, "CoreCnt" },
421         { 0x0, "PkgTmp" },
422         { 0x0, "GFX%rc6" },
423         { 0x0, "GFXMHz" },
424         { 0x0, "Pkg%pc2" },
425         { 0x0, "Pkg%pc3" },
426         { 0x0, "Pkg%pc6" },
427         { 0x0, "Pkg%pc7" },
428         { 0x0, "Pkg%pc8" },
429         { 0x0, "Pkg%pc9" },
430         { 0x0, "Pk%pc10" },
431         { 0x0, "CPU%LPI" },
432         { 0x0, "SYS%LPI" },
433         { 0x0, "PkgWatt" },
434         { 0x0, "CorWatt" },
435         { 0x0, "GFXWatt" },
436         { 0x0, "PkgCnt" },
437         { 0x0, "RAMWatt" },
438         { 0x0, "PKG_%" },
439         { 0x0, "RAM_%" },
440         { 0x0, "Pkg_J" },
441         { 0x0, "Cor_J" },
442         { 0x0, "GFX_J" },
443         { 0x0, "RAM_J" },
444         { 0x0, "Mod%c6" },
445         { 0x0, "Totl%C0" },
446         { 0x0, "Any%C0" },
447         { 0x0, "GFX%C0" },
448         { 0x0, "CPUGFX%" },
449         { 0x0, "Core" },
450         { 0x0, "CPU" },
451         { 0x0, "APIC" },
452         { 0x0, "X2APIC" },
453         { 0x0, "Die" },
454 };
455
456 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
457 #define BIC_USEC        (1ULL << 0)
458 #define BIC_TOD         (1ULL << 1)
459 #define BIC_Package     (1ULL << 2)
460 #define BIC_Node        (1ULL << 3)
461 #define BIC_Avg_MHz     (1ULL << 4)
462 #define BIC_Busy        (1ULL << 5)
463 #define BIC_Bzy_MHz     (1ULL << 6)
464 #define BIC_TSC_MHz     (1ULL << 7)
465 #define BIC_IRQ         (1ULL << 8)
466 #define BIC_SMI         (1ULL << 9)
467 #define BIC_sysfs       (1ULL << 10)
468 #define BIC_CPU_c1      (1ULL << 11)
469 #define BIC_CPU_c3      (1ULL << 12)
470 #define BIC_CPU_c6      (1ULL << 13)
471 #define BIC_CPU_c7      (1ULL << 14)
472 #define BIC_ThreadC     (1ULL << 15)
473 #define BIC_CoreTmp     (1ULL << 16)
474 #define BIC_CoreCnt     (1ULL << 17)
475 #define BIC_PkgTmp      (1ULL << 18)
476 #define BIC_GFX_rc6     (1ULL << 19)
477 #define BIC_GFXMHz      (1ULL << 20)
478 #define BIC_Pkgpc2      (1ULL << 21)
479 #define BIC_Pkgpc3      (1ULL << 22)
480 #define BIC_Pkgpc6      (1ULL << 23)
481 #define BIC_Pkgpc7      (1ULL << 24)
482 #define BIC_Pkgpc8      (1ULL << 25)
483 #define BIC_Pkgpc9      (1ULL << 26)
484 #define BIC_Pkgpc10     (1ULL << 27)
485 #define BIC_CPU_LPI     (1ULL << 28)
486 #define BIC_SYS_LPI     (1ULL << 29)
487 #define BIC_PkgWatt     (1ULL << 30)
488 #define BIC_CorWatt     (1ULL << 31)
489 #define BIC_GFXWatt     (1ULL << 32)
490 #define BIC_PkgCnt      (1ULL << 33)
491 #define BIC_RAMWatt     (1ULL << 34)
492 #define BIC_PKG__       (1ULL << 35)
493 #define BIC_RAM__       (1ULL << 36)
494 #define BIC_Pkg_J       (1ULL << 37)
495 #define BIC_Cor_J       (1ULL << 38)
496 #define BIC_GFX_J       (1ULL << 39)
497 #define BIC_RAM_J       (1ULL << 40)
498 #define BIC_Mod_c6      (1ULL << 41)
499 #define BIC_Totl_c0     (1ULL << 42)
500 #define BIC_Any_c0      (1ULL << 43)
501 #define BIC_GFX_c0      (1ULL << 44)
502 #define BIC_CPUGFX      (1ULL << 45)
503 #define BIC_Core        (1ULL << 46)
504 #define BIC_CPU         (1ULL << 47)
505 #define BIC_APIC        (1ULL << 48)
506 #define BIC_X2APIC      (1ULL << 49)
507 #define BIC_Die         (1ULL << 50)
508
509 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
510
511 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
512 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
513
514 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
515 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
516 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
517 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
518 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
519
520
521 #define MAX_DEFERRED 16
522 char *deferred_skip_names[MAX_DEFERRED];
523 int deferred_skip_index;
524
525 /*
526  * HIDE_LIST - hide this list of counters, show the rest [default]
527  * SHOW_LIST - show this list of counters, hide the rest
528  */
529 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
530
531 void help(void)
532 {
533         fprintf(outf,
534         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
535         "\n"
536         "Turbostat forks the specified COMMAND and prints statistics\n"
537         "when COMMAND completes.\n"
538         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
539         "to print statistics, until interrupted.\n"
540         "  -a, --add    add a counter\n"
541         "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
542         "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
543         "                 {core | package | j,k,l..m,n-p }\n"
544         "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
545         "  -D, --Dump   displays the raw counter values\n"
546         "  -e, --enable [all | column]\n"
547         "               shows all or the specified disabled column\n"
548         "  -H, --hide [column|column,column,...]\n"
549         "               hide the specified column(s)\n"
550         "  -i, --interval sec.subsec\n"
551         "               Override default 5-second measurement interval\n"
552         "  -J, --Joules displays energy in Joules instead of Watts\n"
553         "  -l, --list   list column headers only\n"
554         "  -n, --num_iterations num\n"
555         "               number of the measurement iterations\n"
556         "  -o, --out file\n"
557         "               create or truncate \"file\" for all output\n"
558         "  -q, --quiet  skip decoding system configuration header\n"
559         "  -s, --show [column|column,column,...]\n"
560         "               show only the specified column(s)\n"
561         "  -S, --Summary\n"
562         "               limits output to 1-line system summary per interval\n"
563         "  -T, --TCC temperature\n"
564         "               sets the Thermal Control Circuit temperature in\n"
565         "                 degrees Celsius\n"
566         "  -h, --help   print this help message\n"
567         "  -v, --version        print version information\n"
568         "\n"
569         "For more help, run \"man turbostat\"\n");
570 }
571
572 /*
573  * bic_lookup
574  * for all the strings in comma separate name_list,
575  * set the approprate bit in return value.
576  */
577 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
578 {
579         int i;
580         unsigned long long retval = 0;
581
582         while (name_list) {
583                 char *comma;
584
585                 comma = strchr(name_list, ',');
586
587                 if (comma)
588                         *comma = '\0';
589
590                 if (!strcmp(name_list, "all"))
591                         return ~0;
592
593                 for (i = 0; i < MAX_BIC; ++i) {
594                         if (!strcmp(name_list, bic[i].name)) {
595                                 retval |= (1ULL << i);
596                                 break;
597                         }
598                 }
599                 if (i == MAX_BIC) {
600                         if (mode == SHOW_LIST) {
601                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
602                                 exit(-1);
603                         }
604                         deferred_skip_names[deferred_skip_index++] = name_list;
605                         if (debug)
606                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
607                         if (deferred_skip_index >= MAX_DEFERRED) {
608                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
609                                         MAX_DEFERRED, name_list);
610                                 help();
611                                 exit(1);
612                         }
613                 }
614
615                 name_list = comma;
616                 if (name_list)
617                         name_list++;
618
619         }
620         return retval;
621 }
622
623
624 void print_header(char *delim)
625 {
626         struct msr_counter *mp;
627         int printed = 0;
628
629         if (DO_BIC(BIC_USEC))
630                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
631         if (DO_BIC(BIC_TOD))
632                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
633         if (DO_BIC(BIC_Package))
634                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
635         if (DO_BIC(BIC_Die))
636                 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
637         if (DO_BIC(BIC_Node))
638                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
639         if (DO_BIC(BIC_Core))
640                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
641         if (DO_BIC(BIC_CPU))
642                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
643         if (DO_BIC(BIC_APIC))
644                 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
645         if (DO_BIC(BIC_X2APIC))
646                 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
647         if (DO_BIC(BIC_Avg_MHz))
648                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
649         if (DO_BIC(BIC_Busy))
650                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
651         if (DO_BIC(BIC_Bzy_MHz))
652                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
653         if (DO_BIC(BIC_TSC_MHz))
654                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
655
656         if (DO_BIC(BIC_IRQ)) {
657                 if (sums_need_wide_columns)
658                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
659                 else
660                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
661         }
662
663         if (DO_BIC(BIC_SMI))
664                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
665
666         for (mp = sys.tp; mp; mp = mp->next) {
667
668                 if (mp->format == FORMAT_RAW) {
669                         if (mp->width == 64)
670                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
671                         else
672                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
673                 } else {
674                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
675                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
676                         else
677                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
678                 }
679         }
680
681         if (DO_BIC(BIC_CPU_c1))
682                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
683         if (DO_BIC(BIC_CPU_c3))
684                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
685         if (DO_BIC(BIC_CPU_c6))
686                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
687         if (DO_BIC(BIC_CPU_c7))
688                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
689
690         if (DO_BIC(BIC_Mod_c6))
691                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
692
693         if (DO_BIC(BIC_CoreTmp))
694                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
695
696         if (do_rapl && !rapl_joules) {
697                 if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
698                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
699         } else if (do_rapl && rapl_joules) {
700                 if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
701                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
702         }
703
704         for (mp = sys.cp; mp; mp = mp->next) {
705                 if (mp->format == FORMAT_RAW) {
706                         if (mp->width == 64)
707                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
708                         else
709                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
710                 } else {
711                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
712                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
713                         else
714                                 outp += sprintf(outp, "%s%s", delim, mp->name);
715                 }
716         }
717
718         if (DO_BIC(BIC_PkgTmp))
719                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
720
721         if (DO_BIC(BIC_GFX_rc6))
722                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
723
724         if (DO_BIC(BIC_GFXMHz))
725                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
726
727         if (DO_BIC(BIC_Totl_c0))
728                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
729         if (DO_BIC(BIC_Any_c0))
730                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
731         if (DO_BIC(BIC_GFX_c0))
732                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
733         if (DO_BIC(BIC_CPUGFX))
734                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
735
736         if (DO_BIC(BIC_Pkgpc2))
737                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
738         if (DO_BIC(BIC_Pkgpc3))
739                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
740         if (DO_BIC(BIC_Pkgpc6))
741                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
742         if (DO_BIC(BIC_Pkgpc7))
743                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
744         if (DO_BIC(BIC_Pkgpc8))
745                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
746         if (DO_BIC(BIC_Pkgpc9))
747                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
748         if (DO_BIC(BIC_Pkgpc10))
749                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
750         if (DO_BIC(BIC_CPU_LPI))
751                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
752         if (DO_BIC(BIC_SYS_LPI))
753                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
754
755         if (do_rapl && !rapl_joules) {
756                 if (DO_BIC(BIC_PkgWatt))
757                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
758                 if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
759                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
760                 if (DO_BIC(BIC_GFXWatt))
761                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
762                 if (DO_BIC(BIC_RAMWatt))
763                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
764                 if (DO_BIC(BIC_PKG__))
765                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
766                 if (DO_BIC(BIC_RAM__))
767                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
768         } else if (do_rapl && rapl_joules) {
769                 if (DO_BIC(BIC_Pkg_J))
770                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
771                 if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
772                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
773                 if (DO_BIC(BIC_GFX_J))
774                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
775                 if (DO_BIC(BIC_RAM_J))
776                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
777                 if (DO_BIC(BIC_PKG__))
778                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
779                 if (DO_BIC(BIC_RAM__))
780                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
781         }
782         for (mp = sys.pp; mp; mp = mp->next) {
783                 if (mp->format == FORMAT_RAW) {
784                         if (mp->width == 64)
785                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
786                         else
787                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
788                 } else {
789                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
790                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
791                         else
792                                 outp += sprintf(outp, "%s%s", delim, mp->name);
793                 }
794         }
795
796         outp += sprintf(outp, "\n");
797 }
798
799 int dump_counters(struct thread_data *t, struct core_data *c,
800         struct pkg_data *p)
801 {
802         int i;
803         struct msr_counter *mp;
804
805         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
806
807         if (t) {
808                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
809                         t->cpu_id, t->flags);
810                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
811                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
812                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
813                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
814
815                 if (DO_BIC(BIC_IRQ))
816                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
817                 if (DO_BIC(BIC_SMI))
818                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
819
820                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
821                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
822                                 i, mp->msr_num, t->counter[i]);
823                 }
824         }
825
826         if (c) {
827                 outp += sprintf(outp, "core: %d\n", c->core_id);
828                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
829                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
830                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
831                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
832                 outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
833
834                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
835                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
836                                 i, mp->msr_num, c->counter[i]);
837                 }
838                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
839         }
840
841         if (p) {
842                 outp += sprintf(outp, "package: %d\n", p->package_id);
843
844                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
845                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
846                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
847                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
848
849                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
850                 if (DO_BIC(BIC_Pkgpc3))
851                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
852                 if (DO_BIC(BIC_Pkgpc6))
853                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
854                 if (DO_BIC(BIC_Pkgpc7))
855                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
856                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
857                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
858                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
859                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
860                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
861                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
862                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
863                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
864                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
865                 outp += sprintf(outp, "Throttle PKG: %0X\n",
866                         p->rapl_pkg_perf_status);
867                 outp += sprintf(outp, "Throttle RAM: %0X\n",
868                         p->rapl_dram_perf_status);
869                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
870
871                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
872                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
873                                 i, mp->msr_num, p->counter[i]);
874                 }
875         }
876
877         outp += sprintf(outp, "\n");
878
879         return 0;
880 }
881
882 /*
883  * column formatting convention & formats
884  */
885 int format_counters(struct thread_data *t, struct core_data *c,
886         struct pkg_data *p)
887 {
888         double interval_float, tsc;
889         char *fmt8;
890         int i;
891         struct msr_counter *mp;
892         char *delim = "\t";
893         int printed = 0;
894
895          /* if showing only 1st thread in core and this isn't one, bail out */
896         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
897                 return 0;
898
899          /* if showing only 1st thread in pkg and this isn't one, bail out */
900         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
901                 return 0;
902
903         /*if not summary line and --cpu is used */
904         if ((t != &average.threads) &&
905                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
906                 return 0;
907
908         if (DO_BIC(BIC_USEC)) {
909                 /* on each row, print how many usec each timestamp took to gather */
910                 struct timeval tv;
911
912                 timersub(&t->tv_end, &t->tv_begin, &tv);
913                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
914         }
915
916         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
917         if (DO_BIC(BIC_TOD))
918                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
919
920         interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
921
922         tsc = t->tsc * tsc_tweak;
923
924         /* topo columns, print blanks on 1st (average) line */
925         if (t == &average.threads) {
926                 if (DO_BIC(BIC_Package))
927                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
928                 if (DO_BIC(BIC_Die))
929                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
930                 if (DO_BIC(BIC_Node))
931                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
932                 if (DO_BIC(BIC_Core))
933                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
934                 if (DO_BIC(BIC_CPU))
935                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
936                 if (DO_BIC(BIC_APIC))
937                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
938                 if (DO_BIC(BIC_X2APIC))
939                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
940         } else {
941                 if (DO_BIC(BIC_Package)) {
942                         if (p)
943                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
944                         else
945                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
946                 }
947                 if (DO_BIC(BIC_Die)) {
948                         if (c)
949                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
950                         else
951                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
952                 }
953                 if (DO_BIC(BIC_Node)) {
954                         if (t)
955                                 outp += sprintf(outp, "%s%d",
956                                                 (printed++ ? delim : ""),
957                                               cpus[t->cpu_id].physical_node_id);
958                         else
959                                 outp += sprintf(outp, "%s-",
960                                                 (printed++ ? delim : ""));
961                 }
962                 if (DO_BIC(BIC_Core)) {
963                         if (c)
964                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
965                         else
966                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
967                 }
968                 if (DO_BIC(BIC_CPU))
969                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
970                 if (DO_BIC(BIC_APIC))
971                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
972                 if (DO_BIC(BIC_X2APIC))
973                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
974         }
975
976         if (DO_BIC(BIC_Avg_MHz))
977                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
978                         1.0 / units * t->aperf / interval_float);
979
980         if (DO_BIC(BIC_Busy))
981                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
982
983         if (DO_BIC(BIC_Bzy_MHz)) {
984                 if (has_base_hz)
985                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
986                 else
987                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
988                                 tsc / units * t->aperf / t->mperf / interval_float);
989         }
990
991         if (DO_BIC(BIC_TSC_MHz))
992                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
993
994         /* IRQ */
995         if (DO_BIC(BIC_IRQ)) {
996                 if (sums_need_wide_columns)
997                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
998                 else
999                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1000         }
1001
1002         /* SMI */
1003         if (DO_BIC(BIC_SMI))
1004                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1005
1006         /* Added counters */
1007         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1008                 if (mp->format == FORMAT_RAW) {
1009                         if (mp->width == 32)
1010                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
1011                         else
1012                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1013                 } else if (mp->format == FORMAT_DELTA) {
1014                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1015                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1016                         else
1017                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1018                 } else if (mp->format == FORMAT_PERCENT) {
1019                         if (mp->type == COUNTER_USEC)
1020                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
1021                         else
1022                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
1023                 }
1024         }
1025
1026         /* C1 */
1027         if (DO_BIC(BIC_CPU_c1))
1028                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1029
1030
1031         /* print per-core data only for 1st thread in core */
1032         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1033                 goto done;
1034
1035         if (DO_BIC(BIC_CPU_c3))
1036                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1037         if (DO_BIC(BIC_CPU_c6))
1038                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1039         if (DO_BIC(BIC_CPU_c7))
1040                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1041
1042         /* Mod%c6 */
1043         if (DO_BIC(BIC_Mod_c6))
1044                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1045
1046         if (DO_BIC(BIC_CoreTmp))
1047                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1048
1049         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1050                 if (mp->format == FORMAT_RAW) {
1051                         if (mp->width == 32)
1052                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1053                         else
1054                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1055                 } else if (mp->format == FORMAT_DELTA) {
1056                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1057                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1058                         else
1059                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1060                 } else if (mp->format == FORMAT_PERCENT) {
1061                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1062                 }
1063         }
1064
1065         /*
1066          * If measurement interval exceeds minimum RAPL Joule Counter range,
1067          * indicate that results are suspect by printing "**" in fraction place.
1068          */
1069         if (interval_float < rapl_joule_counter_range)
1070                 fmt8 = "%s%.2f";
1071         else
1072                 fmt8 = "%6.0f**";
1073
1074         if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1075                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1076         if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1077                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1078
1079         /* print per-package data only for 1st core in package */
1080         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1081                 goto done;
1082
1083         /* PkgTmp */
1084         if (DO_BIC(BIC_PkgTmp))
1085                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1086
1087         /* GFXrc6 */
1088         if (DO_BIC(BIC_GFX_rc6)) {
1089                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1090                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1091                 } else {
1092                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1093                                 p->gfx_rc6_ms / 10.0 / interval_float);
1094                 }
1095         }
1096
1097         /* GFXMHz */
1098         if (DO_BIC(BIC_GFXMHz))
1099                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1100
1101         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1102         if (DO_BIC(BIC_Totl_c0))
1103                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1104         if (DO_BIC(BIC_Any_c0))
1105                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1106         if (DO_BIC(BIC_GFX_c0))
1107                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1108         if (DO_BIC(BIC_CPUGFX))
1109                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1110
1111         if (DO_BIC(BIC_Pkgpc2))
1112                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1113         if (DO_BIC(BIC_Pkgpc3))
1114                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1115         if (DO_BIC(BIC_Pkgpc6))
1116                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1117         if (DO_BIC(BIC_Pkgpc7))
1118                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1119         if (DO_BIC(BIC_Pkgpc8))
1120                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1121         if (DO_BIC(BIC_Pkgpc9))
1122                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1123         if (DO_BIC(BIC_Pkgpc10))
1124                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1125
1126         if (DO_BIC(BIC_CPU_LPI))
1127                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1128         if (DO_BIC(BIC_SYS_LPI))
1129                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1130
1131         if (DO_BIC(BIC_PkgWatt))
1132                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1133         if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1134                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1135         if (DO_BIC(BIC_GFXWatt))
1136                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1137         if (DO_BIC(BIC_RAMWatt))
1138                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1139         if (DO_BIC(BIC_Pkg_J))
1140                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1141         if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1142                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1143         if (DO_BIC(BIC_GFX_J))
1144                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1145         if (DO_BIC(BIC_RAM_J))
1146                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1147         if (DO_BIC(BIC_PKG__))
1148                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1149         if (DO_BIC(BIC_RAM__))
1150                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1151
1152         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1153                 if (mp->format == FORMAT_RAW) {
1154                         if (mp->width == 32)
1155                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1156                         else
1157                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1158                 } else if (mp->format == FORMAT_DELTA) {
1159                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1160                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1161                         else
1162                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1163                 } else if (mp->format == FORMAT_PERCENT) {
1164                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1165                 }
1166         }
1167
1168 done:
1169         if (*(outp - 1) != '\n')
1170                 outp += sprintf(outp, "\n");
1171
1172         return 0;
1173 }
1174
1175 void flush_output_stdout(void)
1176 {
1177         FILE *filep;
1178
1179         if (outf == stderr)
1180                 filep = stdout;
1181         else
1182                 filep = outf;
1183
1184         fputs(output_buffer, filep);
1185         fflush(filep);
1186
1187         outp = output_buffer;
1188 }
1189 void flush_output_stderr(void)
1190 {
1191         fputs(output_buffer, outf);
1192         fflush(outf);
1193         outp = output_buffer;
1194 }
1195 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1196 {
1197         static int printed;
1198
1199         if (!printed || !summary_only)
1200                 print_header("\t");
1201
1202         format_counters(&average.threads, &average.cores, &average.packages);
1203
1204         printed = 1;
1205
1206         if (summary_only)
1207                 return;
1208
1209         for_all_cpus(format_counters, t, c, p);
1210 }
1211
1212 #define DELTA_WRAP32(new, old)                  \
1213         if (new > old) {                        \
1214                 old = new - old;                \
1215         } else {                                \
1216                 old = 0x100000000 + new - old;  \
1217         }
1218
1219 int
1220 delta_package(struct pkg_data *new, struct pkg_data *old)
1221 {
1222         int i;
1223         struct msr_counter *mp;
1224
1225
1226         if (DO_BIC(BIC_Totl_c0))
1227                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1228         if (DO_BIC(BIC_Any_c0))
1229                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1230         if (DO_BIC(BIC_GFX_c0))
1231                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1232         if (DO_BIC(BIC_CPUGFX))
1233                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1234
1235         old->pc2 = new->pc2 - old->pc2;
1236         if (DO_BIC(BIC_Pkgpc3))
1237                 old->pc3 = new->pc3 - old->pc3;
1238         if (DO_BIC(BIC_Pkgpc6))
1239                 old->pc6 = new->pc6 - old->pc6;
1240         if (DO_BIC(BIC_Pkgpc7))
1241                 old->pc7 = new->pc7 - old->pc7;
1242         old->pc8 = new->pc8 - old->pc8;
1243         old->pc9 = new->pc9 - old->pc9;
1244         old->pc10 = new->pc10 - old->pc10;
1245         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1246         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1247         old->pkg_temp_c = new->pkg_temp_c;
1248
1249         /* flag an error when rc6 counter resets/wraps */
1250         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1251                 old->gfx_rc6_ms = -1;
1252         else
1253                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1254
1255         old->gfx_mhz = new->gfx_mhz;
1256
1257         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1258         DELTA_WRAP32(new->energy_cores, old->energy_cores);
1259         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1260         DELTA_WRAP32(new->energy_dram, old->energy_dram);
1261         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1262         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1263
1264         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1265                 if (mp->format == FORMAT_RAW)
1266                         old->counter[i] = new->counter[i];
1267                 else
1268                         old->counter[i] = new->counter[i] - old->counter[i];
1269         }
1270
1271         return 0;
1272 }
1273
1274 void
1275 delta_core(struct core_data *new, struct core_data *old)
1276 {
1277         int i;
1278         struct msr_counter *mp;
1279
1280         old->c3 = new->c3 - old->c3;
1281         old->c6 = new->c6 - old->c6;
1282         old->c7 = new->c7 - old->c7;
1283         old->core_temp_c = new->core_temp_c;
1284         old->mc6_us = new->mc6_us - old->mc6_us;
1285
1286         DELTA_WRAP32(new->core_energy, old->core_energy);
1287
1288         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1289                 if (mp->format == FORMAT_RAW)
1290                         old->counter[i] = new->counter[i];
1291                 else
1292                         old->counter[i] = new->counter[i] - old->counter[i];
1293         }
1294 }
1295
1296 int soft_c1_residency_display(int bic)
1297 {
1298         if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1299                 return 0;
1300
1301         return DO_BIC_READ(bic);
1302 }
1303
1304 /*
1305  * old = new - old
1306  */
1307 int
1308 delta_thread(struct thread_data *new, struct thread_data *old,
1309         struct core_data *core_delta)
1310 {
1311         int i;
1312         struct msr_counter *mp;
1313
1314         /* we run cpuid just the 1st time, copy the results */
1315         if (DO_BIC(BIC_APIC))
1316                 new->apic_id = old->apic_id;
1317         if (DO_BIC(BIC_X2APIC))
1318                 new->x2apic_id = old->x2apic_id;
1319
1320         /*
1321          * the timestamps from start of measurement interval are in "old"
1322          * the timestamp from end of measurement interval are in "new"
1323          * over-write old w/ new so we can print end of interval values
1324          */
1325
1326         timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1327         old->tv_begin = new->tv_begin;
1328         old->tv_end = new->tv_end;
1329
1330         old->tsc = new->tsc - old->tsc;
1331
1332         /* check for TSC < 1 Mcycles over interval */
1333         if (old->tsc < (1000 * 1000))
1334                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1335                      "You can disable all c-states by booting with \"idle=poll\"\n"
1336                      "or just the deep ones with \"processor.max_cstate=1\"");
1337
1338         old->c1 = new->c1 - old->c1;
1339
1340         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
1341             soft_c1_residency_display(BIC_Avg_MHz)) {
1342                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1343                         old->aperf = new->aperf - old->aperf;
1344                         old->mperf = new->mperf - old->mperf;
1345                 } else {
1346                         return -1;
1347                 }
1348         }
1349
1350
1351         if (use_c1_residency_msr) {
1352                 /*
1353                  * Some models have a dedicated C1 residency MSR,
1354                  * which should be more accurate than the derivation below.
1355                  */
1356         } else {
1357                 /*
1358                  * As counter collection is not atomic,
1359                  * it is possible for mperf's non-halted cycles + idle states
1360                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1361                  */
1362                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1363                         old->c1 = 0;
1364                 else {
1365                         /* normal case, derive c1 */
1366                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1367                                 - core_delta->c6 - core_delta->c7;
1368                 }
1369         }
1370
1371         if (old->mperf == 0) {
1372                 if (debug > 1)
1373                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1374                 old->mperf = 1; /* divide by 0 protection */
1375         }
1376
1377         if (DO_BIC(BIC_IRQ))
1378                 old->irq_count = new->irq_count - old->irq_count;
1379
1380         if (DO_BIC(BIC_SMI))
1381                 old->smi_count = new->smi_count - old->smi_count;
1382
1383         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1384                 if (mp->format == FORMAT_RAW)
1385                         old->counter[i] = new->counter[i];
1386                 else
1387                         old->counter[i] = new->counter[i] - old->counter[i];
1388         }
1389         return 0;
1390 }
1391
1392 int delta_cpu(struct thread_data *t, struct core_data *c,
1393         struct pkg_data *p, struct thread_data *t2,
1394         struct core_data *c2, struct pkg_data *p2)
1395 {
1396         int retval = 0;
1397
1398         /* calculate core delta only for 1st thread in core */
1399         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1400                 delta_core(c, c2);
1401
1402         /* always calculate thread delta */
1403         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1404         if (retval)
1405                 return retval;
1406
1407         /* calculate package delta only for 1st core in package */
1408         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1409                 retval = delta_package(p, p2);
1410
1411         return retval;
1412 }
1413
1414 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1415 {
1416         int i;
1417         struct msr_counter  *mp;
1418
1419         t->tv_begin.tv_sec = 0;
1420         t->tv_begin.tv_usec = 0;
1421         t->tv_end.tv_sec = 0;
1422         t->tv_end.tv_usec = 0;
1423         t->tv_delta.tv_sec = 0;
1424         t->tv_delta.tv_usec = 0;
1425
1426         t->tsc = 0;
1427         t->aperf = 0;
1428         t->mperf = 0;
1429         t->c1 = 0;
1430
1431         t->irq_count = 0;
1432         t->smi_count = 0;
1433
1434         /* tells format_counters to dump all fields from this set */
1435         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1436
1437         c->c3 = 0;
1438         c->c6 = 0;
1439         c->c7 = 0;
1440         c->mc6_us = 0;
1441         c->core_temp_c = 0;
1442         c->core_energy = 0;
1443
1444         p->pkg_wtd_core_c0 = 0;
1445         p->pkg_any_core_c0 = 0;
1446         p->pkg_any_gfxe_c0 = 0;
1447         p->pkg_both_core_gfxe_c0 = 0;
1448
1449         p->pc2 = 0;
1450         if (DO_BIC(BIC_Pkgpc3))
1451                 p->pc3 = 0;
1452         if (DO_BIC(BIC_Pkgpc6))
1453                 p->pc6 = 0;
1454         if (DO_BIC(BIC_Pkgpc7))
1455                 p->pc7 = 0;
1456         p->pc8 = 0;
1457         p->pc9 = 0;
1458         p->pc10 = 0;
1459         p->cpu_lpi = 0;
1460         p->sys_lpi = 0;
1461
1462         p->energy_pkg = 0;
1463         p->energy_dram = 0;
1464         p->energy_cores = 0;
1465         p->energy_gfx = 0;
1466         p->rapl_pkg_perf_status = 0;
1467         p->rapl_dram_perf_status = 0;
1468         p->pkg_temp_c = 0;
1469
1470         p->gfx_rc6_ms = 0;
1471         p->gfx_mhz = 0;
1472         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1473                 t->counter[i] = 0;
1474
1475         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1476                 c->counter[i] = 0;
1477
1478         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1479                 p->counter[i] = 0;
1480 }
1481 int sum_counters(struct thread_data *t, struct core_data *c,
1482         struct pkg_data *p)
1483 {
1484         int i;
1485         struct msr_counter *mp;
1486
1487         /* copy un-changing apic_id's */
1488         if (DO_BIC(BIC_APIC))
1489                 average.threads.apic_id = t->apic_id;
1490         if (DO_BIC(BIC_X2APIC))
1491                 average.threads.x2apic_id = t->x2apic_id;
1492
1493         /* remember first tv_begin */
1494         if (average.threads.tv_begin.tv_sec == 0)
1495                 average.threads.tv_begin = t->tv_begin;
1496
1497         /* remember last tv_end */
1498         average.threads.tv_end = t->tv_end;
1499
1500         average.threads.tsc += t->tsc;
1501         average.threads.aperf += t->aperf;
1502         average.threads.mperf += t->mperf;
1503         average.threads.c1 += t->c1;
1504
1505         average.threads.irq_count += t->irq_count;
1506         average.threads.smi_count += t->smi_count;
1507
1508         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1509                 if (mp->format == FORMAT_RAW)
1510                         continue;
1511                 average.threads.counter[i] += t->counter[i];
1512         }
1513
1514         /* sum per-core values only for 1st thread in core */
1515         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1516                 return 0;
1517
1518         average.cores.c3 += c->c3;
1519         average.cores.c6 += c->c6;
1520         average.cores.c7 += c->c7;
1521         average.cores.mc6_us += c->mc6_us;
1522
1523         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1524
1525         average.cores.core_energy += c->core_energy;
1526
1527         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1528                 if (mp->format == FORMAT_RAW)
1529                         continue;
1530                 average.cores.counter[i] += c->counter[i];
1531         }
1532
1533         /* sum per-pkg values only for 1st core in pkg */
1534         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1535                 return 0;
1536
1537         if (DO_BIC(BIC_Totl_c0))
1538                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1539         if (DO_BIC(BIC_Any_c0))
1540                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1541         if (DO_BIC(BIC_GFX_c0))
1542                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1543         if (DO_BIC(BIC_CPUGFX))
1544                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1545
1546         average.packages.pc2 += p->pc2;
1547         if (DO_BIC(BIC_Pkgpc3))
1548                 average.packages.pc3 += p->pc3;
1549         if (DO_BIC(BIC_Pkgpc6))
1550                 average.packages.pc6 += p->pc6;
1551         if (DO_BIC(BIC_Pkgpc7))
1552                 average.packages.pc7 += p->pc7;
1553         average.packages.pc8 += p->pc8;
1554         average.packages.pc9 += p->pc9;
1555         average.packages.pc10 += p->pc10;
1556
1557         average.packages.cpu_lpi = p->cpu_lpi;
1558         average.packages.sys_lpi = p->sys_lpi;
1559
1560         average.packages.energy_pkg += p->energy_pkg;
1561         average.packages.energy_dram += p->energy_dram;
1562         average.packages.energy_cores += p->energy_cores;
1563         average.packages.energy_gfx += p->energy_gfx;
1564
1565         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1566         average.packages.gfx_mhz = p->gfx_mhz;
1567
1568         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1569
1570         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1571         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1572
1573         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1574                 if (mp->format == FORMAT_RAW)
1575                         continue;
1576                 average.packages.counter[i] += p->counter[i];
1577         }
1578         return 0;
1579 }
1580 /*
1581  * sum the counters for all cpus in the system
1582  * compute the weighted average
1583  */
1584 void compute_average(struct thread_data *t, struct core_data *c,
1585         struct pkg_data *p)
1586 {
1587         int i;
1588         struct msr_counter *mp;
1589
1590         clear_counters(&average.threads, &average.cores, &average.packages);
1591
1592         for_all_cpus(sum_counters, t, c, p);
1593
1594         /* Use the global time delta for the average. */
1595         average.threads.tv_delta = tv_delta;
1596
1597         average.threads.tsc /= topo.num_cpus;
1598         average.threads.aperf /= topo.num_cpus;
1599         average.threads.mperf /= topo.num_cpus;
1600         average.threads.c1 /= topo.num_cpus;
1601
1602         if (average.threads.irq_count > 9999999)
1603                 sums_need_wide_columns = 1;
1604
1605         average.cores.c3 /= topo.num_cores;
1606         average.cores.c6 /= topo.num_cores;
1607         average.cores.c7 /= topo.num_cores;
1608         average.cores.mc6_us /= topo.num_cores;
1609
1610         if (DO_BIC(BIC_Totl_c0))
1611                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1612         if (DO_BIC(BIC_Any_c0))
1613                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1614         if (DO_BIC(BIC_GFX_c0))
1615                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1616         if (DO_BIC(BIC_CPUGFX))
1617                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1618
1619         average.packages.pc2 /= topo.num_packages;
1620         if (DO_BIC(BIC_Pkgpc3))
1621                 average.packages.pc3 /= topo.num_packages;
1622         if (DO_BIC(BIC_Pkgpc6))
1623                 average.packages.pc6 /= topo.num_packages;
1624         if (DO_BIC(BIC_Pkgpc7))
1625                 average.packages.pc7 /= topo.num_packages;
1626
1627         average.packages.pc8 /= topo.num_packages;
1628         average.packages.pc9 /= topo.num_packages;
1629         average.packages.pc10 /= topo.num_packages;
1630
1631         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1632                 if (mp->format == FORMAT_RAW)
1633                         continue;
1634                 if (mp->type == COUNTER_ITEMS) {
1635                         if (average.threads.counter[i] > 9999999)
1636                                 sums_need_wide_columns = 1;
1637                         continue;
1638                 }
1639                 average.threads.counter[i] /= topo.num_cpus;
1640         }
1641         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1642                 if (mp->format == FORMAT_RAW)
1643                         continue;
1644                 if (mp->type == COUNTER_ITEMS) {
1645                         if (average.cores.counter[i] > 9999999)
1646                                 sums_need_wide_columns = 1;
1647                 }
1648                 average.cores.counter[i] /= topo.num_cores;
1649         }
1650         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1651                 if (mp->format == FORMAT_RAW)
1652                         continue;
1653                 if (mp->type == COUNTER_ITEMS) {
1654                         if (average.packages.counter[i] > 9999999)
1655                                 sums_need_wide_columns = 1;
1656                 }
1657                 average.packages.counter[i] /= topo.num_packages;
1658         }
1659 }
1660
1661 static unsigned long long rdtsc(void)
1662 {
1663         unsigned int low, high;
1664
1665         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1666
1667         return low | ((unsigned long long)high) << 32;
1668 }
1669
1670 /*
1671  * Open a file, and exit on failure
1672  */
1673 FILE *fopen_or_die(const char *path, const char *mode)
1674 {
1675         FILE *filep = fopen(path, mode);
1676
1677         if (!filep)
1678                 err(1, "%s: open failed", path);
1679         return filep;
1680 }
1681 /*
1682  * snapshot_sysfs_counter()
1683  *
1684  * return snapshot of given counter
1685  */
1686 unsigned long long snapshot_sysfs_counter(char *path)
1687 {
1688         FILE *fp;
1689         int retval;
1690         unsigned long long counter;
1691
1692         fp = fopen_or_die(path, "r");
1693
1694         retval = fscanf(fp, "%lld", &counter);
1695         if (retval != 1)
1696                 err(1, "snapshot_sysfs_counter(%s)", path);
1697
1698         fclose(fp);
1699
1700         return counter;
1701 }
1702
1703 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1704 {
1705         if (mp->msr_num != 0) {
1706                 if (get_msr(cpu, mp->msr_num, counterp))
1707                         return -1;
1708         } else {
1709                 char path[128 + PATH_BYTES];
1710
1711                 if (mp->flags & SYSFS_PERCPU) {
1712                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1713                                  cpu, mp->path);
1714
1715                         *counterp = snapshot_sysfs_counter(path);
1716                 } else {
1717                         *counterp = snapshot_sysfs_counter(mp->path);
1718                 }
1719         }
1720
1721         return 0;
1722 }
1723
1724 void get_apic_id(struct thread_data *t)
1725 {
1726         unsigned int eax, ebx, ecx, edx;
1727
1728         if (DO_BIC(BIC_APIC)) {
1729                 eax = ebx = ecx = edx = 0;
1730                 __cpuid(1, eax, ebx, ecx, edx);
1731
1732                 t->apic_id = (ebx >> 24) & 0xff;
1733         }
1734
1735         if (!DO_BIC(BIC_X2APIC))
1736                 return;
1737
1738         if (authentic_amd || hygon_genuine) {
1739                 unsigned int topology_extensions;
1740
1741                 if (max_extended_level < 0x8000001e)
1742                         return;
1743
1744                 eax = ebx = ecx = edx = 0;
1745                 __cpuid(0x80000001, eax, ebx, ecx, edx);
1746                         topology_extensions = ecx & (1 << 22);
1747
1748                 if (topology_extensions == 0)
1749                         return;
1750
1751                 eax = ebx = ecx = edx = 0;
1752                 __cpuid(0x8000001e, eax, ebx, ecx, edx);
1753
1754                 t->x2apic_id = eax;
1755                 return;
1756         }
1757
1758         if (!genuine_intel)
1759                 return;
1760
1761         if (max_level < 0xb)
1762                 return;
1763
1764         ecx = 0;
1765         __cpuid(0xb, eax, ebx, ecx, edx);
1766         t->x2apic_id = edx;
1767
1768         if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1769                 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
1770                                 t->cpu_id, t->apic_id, t->x2apic_id);
1771 }
1772
1773 /*
1774  * get_counters(...)
1775  * migrate to cpu
1776  * acquire and record local counters for that cpu
1777  */
1778 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1779 {
1780         int cpu = t->cpu_id;
1781         unsigned long long msr;
1782         int aperf_mperf_retry_count = 0;
1783         struct msr_counter *mp;
1784         int i;
1785
1786         if (cpu_migrate(cpu)) {
1787                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1788                 return -1;
1789         }
1790
1791         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1792
1793         if (first_counter_read)
1794                 get_apic_id(t);
1795 retry:
1796         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1797
1798         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
1799             soft_c1_residency_display(BIC_Avg_MHz)) {
1800                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1801
1802                 /*
1803                  * The TSC, APERF and MPERF must be read together for
1804                  * APERF/MPERF and MPERF/TSC to give accurate results.
1805                  *
1806                  * Unfortunately, APERF and MPERF are read by
1807                  * individual system call, so delays may occur
1808                  * between them.  If the time to read them
1809                  * varies by a large amount, we re-read them.
1810                  */
1811
1812                 /*
1813                  * This initial dummy APERF read has been seen to
1814                  * reduce jitter in the subsequent reads.
1815                  */
1816
1817                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1818                         return -3;
1819
1820                 t->tsc = rdtsc();       /* re-read close to APERF */
1821
1822                 tsc_before = t->tsc;
1823
1824                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1825                         return -3;
1826
1827                 tsc_between = rdtsc();
1828
1829                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1830                         return -4;
1831
1832                 tsc_after = rdtsc();
1833
1834                 aperf_time = tsc_between - tsc_before;
1835                 mperf_time = tsc_after - tsc_between;
1836
1837                 /*
1838                  * If the system call latency to read APERF and MPERF
1839                  * differ by more than 2x, then try again.
1840                  */
1841                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1842                         aperf_mperf_retry_count++;
1843                         if (aperf_mperf_retry_count < 5)
1844                                 goto retry;
1845                         else
1846                                 warnx("cpu%d jitter %lld %lld",
1847                                         cpu, aperf_time, mperf_time);
1848                 }
1849                 aperf_mperf_retry_count = 0;
1850
1851                 t->aperf = t->aperf * aperf_mperf_multiplier;
1852                 t->mperf = t->mperf * aperf_mperf_multiplier;
1853         }
1854
1855         if (DO_BIC(BIC_IRQ))
1856                 t->irq_count = irqs_per_cpu[cpu];
1857         if (DO_BIC(BIC_SMI)) {
1858                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1859                         return -5;
1860                 t->smi_count = msr & 0xFFFFFFFF;
1861         }
1862         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1863                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1864                         return -6;
1865         }
1866
1867         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1868                 if (get_mp(cpu, mp, &t->counter[i]))
1869                         return -10;
1870         }
1871
1872         /* collect core counters only for 1st thread in core */
1873         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1874                 goto done;
1875
1876         if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
1877                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1878                         return -6;
1879         }
1880
1881         if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
1882                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1883                         return -7;
1884         } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
1885                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1886                         return -7;
1887         }
1888
1889         if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7))
1890                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1891                         return -8;
1892
1893         if (DO_BIC(BIC_Mod_c6))
1894                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1895                         return -8;
1896
1897         if (DO_BIC(BIC_CoreTmp)) {
1898                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1899                         return -9;
1900                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1901         }
1902
1903         if (do_rapl & RAPL_AMD_F17H) {
1904                 if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
1905                         return -14;
1906                 c->core_energy = msr & 0xFFFFFFFF;
1907         }
1908
1909         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1910                 if (get_mp(cpu, mp, &c->counter[i]))
1911                         return -10;
1912         }
1913
1914         /* collect package counters only for 1st core in package */
1915         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1916                 goto done;
1917
1918         if (DO_BIC(BIC_Totl_c0)) {
1919                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1920                         return -10;
1921         }
1922         if (DO_BIC(BIC_Any_c0)) {
1923                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1924                         return -11;
1925         }
1926         if (DO_BIC(BIC_GFX_c0)) {
1927                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1928                         return -12;
1929         }
1930         if (DO_BIC(BIC_CPUGFX)) {
1931                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1932                         return -13;
1933         }
1934         if (DO_BIC(BIC_Pkgpc3))
1935                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1936                         return -9;
1937         if (DO_BIC(BIC_Pkgpc6)) {
1938                 if (do_slm_cstates) {
1939                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1940                                 return -10;
1941                 } else {
1942                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1943                                 return -10;
1944                 }
1945         }
1946
1947         if (DO_BIC(BIC_Pkgpc2))
1948                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1949                         return -11;
1950         if (DO_BIC(BIC_Pkgpc7))
1951                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1952                         return -12;
1953         if (DO_BIC(BIC_Pkgpc8))
1954                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1955                         return -13;
1956         if (DO_BIC(BIC_Pkgpc9))
1957                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1958                         return -13;
1959         if (DO_BIC(BIC_Pkgpc10))
1960                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1961                         return -13;
1962
1963         if (DO_BIC(BIC_CPU_LPI))
1964                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1965         if (DO_BIC(BIC_SYS_LPI))
1966                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
1967
1968         if (do_rapl & RAPL_PKG) {
1969                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1970                         return -13;
1971                 p->energy_pkg = msr & 0xFFFFFFFF;
1972         }
1973         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1974                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1975                         return -14;
1976                 p->energy_cores = msr & 0xFFFFFFFF;
1977         }
1978         if (do_rapl & RAPL_DRAM) {
1979                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1980                         return -15;
1981                 p->energy_dram = msr & 0xFFFFFFFF;
1982         }
1983         if (do_rapl & RAPL_GFX) {
1984                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1985                         return -16;
1986                 p->energy_gfx = msr & 0xFFFFFFFF;
1987         }
1988         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1989                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1990                         return -16;
1991                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1992         }
1993         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1994                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1995                         return -16;
1996                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1997         }
1998         if (do_rapl & RAPL_AMD_F17H) {
1999                 if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
2000                         return -13;
2001                 p->energy_pkg = msr & 0xFFFFFFFF;
2002         }
2003         if (DO_BIC(BIC_PkgTmp)) {
2004                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2005                         return -17;
2006                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
2007         }
2008
2009         if (DO_BIC(BIC_GFX_rc6))
2010                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
2011
2012         if (DO_BIC(BIC_GFXMHz))
2013                 p->gfx_mhz = gfx_cur_mhz;
2014
2015         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2016                 if (get_mp(cpu, mp, &p->counter[i]))
2017                         return -10;
2018         }
2019 done:
2020         gettimeofday(&t->tv_end, (struct timezone *)NULL);
2021
2022         return 0;
2023 }
2024
2025 /*
2026  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2027  * If you change the values, note they are used both in comparisons
2028  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2029  */
2030
2031 #define PCLUKN 0 /* Unknown */
2032 #define PCLRSV 1 /* Reserved */
2033 #define PCL__0 2 /* PC0 */
2034 #define PCL__1 3 /* PC1 */
2035 #define PCL__2 4 /* PC2 */
2036 #define PCL__3 5 /* PC3 */
2037 #define PCL__4 6 /* PC4 */
2038 #define PCL__6 7 /* PC6 */
2039 #define PCL_6N 8 /* PC6 No Retention */
2040 #define PCL_6R 9 /* PC6 Retention */
2041 #define PCL__7 10 /* PC7 */
2042 #define PCL_7S 11 /* PC7 Shrink */
2043 #define PCL__8 12 /* PC8 */
2044 #define PCL__9 13 /* PC9 */
2045 #define PCL_10 14 /* PC10 */
2046 #define PCLUNL 15 /* Unlimited */
2047
2048 int pkg_cstate_limit = PCLUKN;
2049 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2050         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
2051
2052 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2053 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2054 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2055 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
2056 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2057 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2058 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2059 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2060
2061
2062 static void
2063 calculate_tsc_tweak()
2064 {
2065         tsc_tweak = base_hz / tsc_hz;
2066 }
2067
2068 static void
2069 dump_nhm_platform_info(void)
2070 {
2071         unsigned long long msr;
2072         unsigned int ratio;
2073
2074         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2075
2076         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2077
2078         ratio = (msr >> 40) & 0xFF;
2079         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
2080                 ratio, bclk, ratio * bclk);
2081
2082         ratio = (msr >> 8) & 0xFF;
2083         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2084                 ratio, bclk, ratio * bclk);
2085
2086         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2087         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2088                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2089
2090         return;
2091 }
2092
2093 static void
2094 dump_hsw_turbo_ratio_limits(void)
2095 {
2096         unsigned long long msr;
2097         unsigned int ratio;
2098
2099         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2100
2101         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2102
2103         ratio = (msr >> 8) & 0xFF;
2104         if (ratio)
2105                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2106                         ratio, bclk, ratio * bclk);
2107
2108         ratio = (msr >> 0) & 0xFF;
2109         if (ratio)
2110                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2111                         ratio, bclk, ratio * bclk);
2112         return;
2113 }
2114
2115 static void
2116 dump_ivt_turbo_ratio_limits(void)
2117 {
2118         unsigned long long msr;
2119         unsigned int ratio;
2120
2121         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2122
2123         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2124
2125         ratio = (msr >> 56) & 0xFF;
2126         if (ratio)
2127                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2128                         ratio, bclk, ratio * bclk);
2129
2130         ratio = (msr >> 48) & 0xFF;
2131         if (ratio)
2132                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2133                         ratio, bclk, ratio * bclk);
2134
2135         ratio = (msr >> 40) & 0xFF;
2136         if (ratio)
2137                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2138                         ratio, bclk, ratio * bclk);
2139
2140         ratio = (msr >> 32) & 0xFF;
2141         if (ratio)
2142                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2143                         ratio, bclk, ratio * bclk);
2144
2145         ratio = (msr >> 24) & 0xFF;
2146         if (ratio)
2147                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2148                         ratio, bclk, ratio * bclk);
2149
2150         ratio = (msr >> 16) & 0xFF;
2151         if (ratio)
2152                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2153                         ratio, bclk, ratio * bclk);
2154
2155         ratio = (msr >> 8) & 0xFF;
2156         if (ratio)
2157                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2158                         ratio, bclk, ratio * bclk);
2159
2160         ratio = (msr >> 0) & 0xFF;
2161         if (ratio)
2162                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2163                         ratio, bclk, ratio * bclk);
2164         return;
2165 }
2166 int has_turbo_ratio_group_limits(int family, int model)
2167 {
2168
2169         if (!genuine_intel)
2170                 return 0;
2171
2172         switch (model) {
2173         case INTEL_FAM6_ATOM_GOLDMONT:
2174         case INTEL_FAM6_SKYLAKE_X:
2175         case INTEL_FAM6_ATOM_GOLDMONT_D:
2176                 return 1;
2177         }
2178         return 0;
2179 }
2180
2181 static void
2182 dump_turbo_ratio_limits(int family, int model)
2183 {
2184         unsigned long long msr, core_counts;
2185         unsigned int ratio, group_size;
2186
2187         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2188         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2189
2190         if (has_turbo_ratio_group_limits(family, model)) {
2191                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2192                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2193         } else {
2194                 core_counts = 0x0807060504030201;
2195         }
2196
2197         ratio = (msr >> 56) & 0xFF;
2198         group_size = (core_counts >> 56) & 0xFF;
2199         if (ratio)
2200                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2201                         ratio, bclk, ratio * bclk, group_size);
2202
2203         ratio = (msr >> 48) & 0xFF;
2204         group_size = (core_counts >> 48) & 0xFF;
2205         if (ratio)
2206                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2207                         ratio, bclk, ratio * bclk, group_size);
2208
2209         ratio = (msr >> 40) & 0xFF;
2210         group_size = (core_counts >> 40) & 0xFF;
2211         if (ratio)
2212                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2213                         ratio, bclk, ratio * bclk, group_size);
2214
2215         ratio = (msr >> 32) & 0xFF;
2216         group_size = (core_counts >> 32) & 0xFF;
2217         if (ratio)
2218                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2219                         ratio, bclk, ratio * bclk, group_size);
2220
2221         ratio = (msr >> 24) & 0xFF;
2222         group_size = (core_counts >> 24) & 0xFF;
2223         if (ratio)
2224                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2225                         ratio, bclk, ratio * bclk, group_size);
2226
2227         ratio = (msr >> 16) & 0xFF;
2228         group_size = (core_counts >> 16) & 0xFF;
2229         if (ratio)
2230                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2231                         ratio, bclk, ratio * bclk, group_size);
2232
2233         ratio = (msr >> 8) & 0xFF;
2234         group_size = (core_counts >> 8) & 0xFF;
2235         if (ratio)
2236                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2237                         ratio, bclk, ratio * bclk, group_size);
2238
2239         ratio = (msr >> 0) & 0xFF;
2240         group_size = (core_counts >> 0) & 0xFF;
2241         if (ratio)
2242                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2243                         ratio, bclk, ratio * bclk, group_size);
2244         return;
2245 }
2246
2247 static void
2248 dump_atom_turbo_ratio_limits(void)
2249 {
2250         unsigned long long msr;
2251         unsigned int ratio;
2252
2253         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2254         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2255
2256         ratio = (msr >> 0) & 0x3F;
2257         if (ratio)
2258                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2259                         ratio, bclk, ratio * bclk);
2260
2261         ratio = (msr >> 8) & 0x3F;
2262         if (ratio)
2263                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2264                         ratio, bclk, ratio * bclk);
2265
2266         ratio = (msr >> 16) & 0x3F;
2267         if (ratio)
2268                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2269                         ratio, bclk, ratio * bclk);
2270
2271         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2272         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2273
2274         ratio = (msr >> 24) & 0x3F;
2275         if (ratio)
2276                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2277                         ratio, bclk, ratio * bclk);
2278
2279         ratio = (msr >> 16) & 0x3F;
2280         if (ratio)
2281                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2282                         ratio, bclk, ratio * bclk);
2283
2284         ratio = (msr >> 8) & 0x3F;
2285         if (ratio)
2286                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2287                         ratio, bclk, ratio * bclk);
2288
2289         ratio = (msr >> 0) & 0x3F;
2290         if (ratio)
2291                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2292                         ratio, bclk, ratio * bclk);
2293 }
2294
2295 static void
2296 dump_knl_turbo_ratio_limits(void)
2297 {
2298         const unsigned int buckets_no = 7;
2299
2300         unsigned long long msr;
2301         int delta_cores, delta_ratio;
2302         int i, b_nr;
2303         unsigned int cores[buckets_no];
2304         unsigned int ratio[buckets_no];
2305
2306         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2307
2308         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2309                 base_cpu, msr);
2310
2311         /**
2312          * Turbo encoding in KNL is as follows:
2313          * [0] -- Reserved
2314          * [7:1] -- Base value of number of active cores of bucket 1.
2315          * [15:8] -- Base value of freq ratio of bucket 1.
2316          * [20:16] -- +ve delta of number of active cores of bucket 2.
2317          * i.e. active cores of bucket 2 =
2318          * active cores of bucket 1 + delta
2319          * [23:21] -- Negative delta of freq ratio of bucket 2.
2320          * i.e. freq ratio of bucket 2 =
2321          * freq ratio of bucket 1 - delta
2322          * [28:24]-- +ve delta of number of active cores of bucket 3.
2323          * [31:29]-- -ve delta of freq ratio of bucket 3.
2324          * [36:32]-- +ve delta of number of active cores of bucket 4.
2325          * [39:37]-- -ve delta of freq ratio of bucket 4.
2326          * [44:40]-- +ve delta of number of active cores of bucket 5.
2327          * [47:45]-- -ve delta of freq ratio of bucket 5.
2328          * [52:48]-- +ve delta of number of active cores of bucket 6.
2329          * [55:53]-- -ve delta of freq ratio of bucket 6.
2330          * [60:56]-- +ve delta of number of active cores of bucket 7.
2331          * [63:61]-- -ve delta of freq ratio of bucket 7.
2332          */
2333
2334         b_nr = 0;
2335         cores[b_nr] = (msr & 0xFF) >> 1;
2336         ratio[b_nr] = (msr >> 8) & 0xFF;
2337
2338         for (i = 16; i < 64; i += 8) {
2339                 delta_cores = (msr >> i) & 0x1F;
2340                 delta_ratio = (msr >> (i + 5)) & 0x7;
2341
2342                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2343                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2344                 b_nr++;
2345         }
2346
2347         for (i = buckets_no - 1; i >= 0; i--)
2348                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2349                         fprintf(outf,
2350                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2351                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2352 }
2353
2354 static void
2355 dump_nhm_cst_cfg(void)
2356 {
2357         unsigned long long msr;
2358
2359         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2360
2361         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2362
2363         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2364                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2365                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2366                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2367                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2368                 (msr & (1 << 15)) ? "" : "UN",
2369                 (unsigned int)msr & 0xF,
2370                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2371
2372 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2373         if (has_automatic_cstate_conversion) {
2374                 fprintf(outf, ", automatic c-state conversion=%s",
2375                         (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2376         }
2377
2378         fprintf(outf, ")\n");
2379
2380         return;
2381 }
2382
2383 static void
2384 dump_config_tdp(void)
2385 {
2386         unsigned long long msr;
2387
2388         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2389         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2390         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2391
2392         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2393         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2394         if (msr) {
2395                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2396                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2397                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2398                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2399         }
2400         fprintf(outf, ")\n");
2401
2402         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2403         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2404         if (msr) {
2405                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2406                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2407                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2408                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2409         }
2410         fprintf(outf, ")\n");
2411
2412         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2413         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2414         if ((msr) & 0x3)
2415                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2416         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2417         fprintf(outf, ")\n");
2418
2419         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2420         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2421         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2422         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2423         fprintf(outf, ")\n");
2424 }
2425
2426 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2427
2428 void print_irtl(void)
2429 {
2430         unsigned long long msr;
2431
2432         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2433         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2434         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2435                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2436
2437         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2438         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2439         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2440                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2441
2442         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2443         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2444         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2445                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2446
2447         if (!do_irtl_hsw)
2448                 return;
2449
2450         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2451         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2452         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2453                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2454
2455         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2456         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2457         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2458                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2459
2460         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2461         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2462         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2463                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2464
2465 }
2466 void free_fd_percpu(void)
2467 {
2468         int i;
2469
2470         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2471                 if (fd_percpu[i] != 0)
2472                         close(fd_percpu[i]);
2473         }
2474
2475         free(fd_percpu);
2476 }
2477
2478 void free_all_buffers(void)
2479 {
2480         int i;
2481
2482         CPU_FREE(cpu_present_set);
2483         cpu_present_set = NULL;
2484         cpu_present_setsize = 0;
2485
2486         CPU_FREE(cpu_affinity_set);
2487         cpu_affinity_set = NULL;
2488         cpu_affinity_setsize = 0;
2489
2490         free(thread_even);
2491         free(core_even);
2492         free(package_even);
2493
2494         thread_even = NULL;
2495         core_even = NULL;
2496         package_even = NULL;
2497
2498         free(thread_odd);
2499         free(core_odd);
2500         free(package_odd);
2501
2502         thread_odd = NULL;
2503         core_odd = NULL;
2504         package_odd = NULL;
2505
2506         free(output_buffer);
2507         output_buffer = NULL;
2508         outp = NULL;
2509
2510         free_fd_percpu();
2511
2512         free(irq_column_2_cpu);
2513         free(irqs_per_cpu);
2514
2515         for (i = 0; i <= topo.max_cpu_num; ++i) {
2516                 if (cpus[i].put_ids)
2517                         CPU_FREE(cpus[i].put_ids);
2518         }
2519         free(cpus);
2520 }
2521
2522
2523 /*
2524  * Parse a file containing a single int.
2525  * Return 0 if file can not be opened
2526  * Exit if file can be opened, but can not be parsed
2527  */
2528 int parse_int_file(const char *fmt, ...)
2529 {
2530         va_list args;
2531         char path[PATH_MAX];
2532         FILE *filep;
2533         int value;
2534
2535         va_start(args, fmt);
2536         vsnprintf(path, sizeof(path), fmt, args);
2537         va_end(args);
2538         filep = fopen(path, "r");
2539         if (!filep)
2540                 return 0;
2541         if (fscanf(filep, "%d", &value) != 1)
2542                 err(1, "%s: failed to parse number from file", path);
2543         fclose(filep);
2544         return value;
2545 }
2546
2547 /*
2548  * cpu_is_first_core_in_package(cpu)
2549  * return 1 if given CPU is 1st core in package
2550  */
2551 int cpu_is_first_core_in_package(int cpu)
2552 {
2553         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2554 }
2555
2556 int get_physical_package_id(int cpu)
2557 {
2558         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2559 }
2560
2561 int get_die_id(int cpu)
2562 {
2563         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2564 }
2565
2566 int get_core_id(int cpu)
2567 {
2568         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2569 }
2570
2571 void set_node_data(void)
2572 {
2573         int pkg, node, lnode, cpu, cpux;
2574         int cpu_count;
2575
2576         /* initialize logical_node_id */
2577         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2578                 cpus[cpu].logical_node_id = -1;
2579
2580         cpu_count = 0;
2581         for (pkg = 0; pkg < topo.num_packages; pkg++) {
2582                 lnode = 0;
2583                 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2584                         if (cpus[cpu].physical_package_id != pkg)
2585                                 continue;
2586                         /* find a cpu with an unset logical_node_id */
2587                         if (cpus[cpu].logical_node_id != -1)
2588                                 continue;
2589                         cpus[cpu].logical_node_id = lnode;
2590                         node = cpus[cpu].physical_node_id;
2591                         cpu_count++;
2592                         /*
2593                          * find all matching cpus on this pkg and set
2594                          * the logical_node_id
2595                          */
2596                         for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2597                                 if ((cpus[cpux].physical_package_id == pkg) &&
2598                                    (cpus[cpux].physical_node_id == node)) {
2599                                         cpus[cpux].logical_node_id = lnode;
2600                                         cpu_count++;
2601                                 }
2602                         }
2603                         lnode++;
2604                         if (lnode > topo.nodes_per_pkg)
2605                                 topo.nodes_per_pkg = lnode;
2606                 }
2607                 if (cpu_count >= topo.max_cpu_num)
2608                         break;
2609         }
2610 }
2611
2612 int get_physical_node_id(struct cpu_topology *thiscpu)
2613 {
2614         char path[80];
2615         FILE *filep;
2616         int i;
2617         int cpu = thiscpu->logical_cpu_id;
2618
2619         for (i = 0; i <= topo.max_cpu_num; i++) {
2620                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2621                         cpu, i);
2622                 filep = fopen(path, "r");
2623                 if (!filep)
2624                         continue;
2625                 fclose(filep);
2626                 return i;
2627         }
2628         return -1;
2629 }
2630
2631 int get_thread_siblings(struct cpu_topology *thiscpu)
2632 {
2633         char path[80], character;
2634         FILE *filep;
2635         unsigned long map;
2636         int so, shift, sib_core;
2637         int cpu = thiscpu->logical_cpu_id;
2638         int offset = topo.max_cpu_num + 1;
2639         size_t size;
2640         int thread_id = 0;
2641
2642         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2643         if (thiscpu->thread_id < 0)
2644                 thiscpu->thread_id = thread_id++;
2645         if (!thiscpu->put_ids)
2646                 return -1;
2647
2648         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2649         CPU_ZERO_S(size, thiscpu->put_ids);
2650
2651         sprintf(path,
2652                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2653         filep = fopen_or_die(path, "r");
2654         do {
2655                 offset -= BITMASK_SIZE;
2656                 if (fscanf(filep, "%lx%c", &map, &character) != 2)
2657                         err(1, "%s: failed to parse file", path);
2658                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2659                         if ((map >> shift) & 0x1) {
2660                                 so = shift + offset;
2661                                 sib_core = get_core_id(so);
2662                                 if (sib_core == thiscpu->physical_core_id) {
2663                                         CPU_SET_S(so, size, thiscpu->put_ids);
2664                                         if ((so != cpu) &&
2665                                             (cpus[so].thread_id < 0))
2666                                                 cpus[so].thread_id =
2667                                                                     thread_id++;
2668                                 }
2669                         }
2670                 }
2671         } while (!strncmp(&character, ",", 1));
2672         fclose(filep);
2673
2674         return CPU_COUNT_S(size, thiscpu->put_ids);
2675 }
2676
2677 /*
2678  * run func(thread, core, package) in topology order
2679  * skip non-present cpus
2680  */
2681
2682 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2683         struct pkg_data *, struct thread_data *, struct core_data *,
2684         struct pkg_data *), struct thread_data *thread_base,
2685         struct core_data *core_base, struct pkg_data *pkg_base,
2686         struct thread_data *thread_base2, struct core_data *core_base2,
2687         struct pkg_data *pkg_base2)
2688 {
2689         int retval, pkg_no, node_no, core_no, thread_no;
2690
2691         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2692                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2693                         for (core_no = 0; core_no < topo.cores_per_node;
2694                              ++core_no) {
2695                                 for (thread_no = 0; thread_no <
2696                                         topo.threads_per_core; ++thread_no) {
2697                                         struct thread_data *t, *t2;
2698                                         struct core_data *c, *c2;
2699                                         struct pkg_data *p, *p2;
2700
2701                                         t = GET_THREAD(thread_base, thread_no,
2702                                                        core_no, node_no,
2703                                                        pkg_no);
2704
2705                                         if (cpu_is_not_present(t->cpu_id))
2706                                                 continue;
2707
2708                                         t2 = GET_THREAD(thread_base2, thread_no,
2709                                                         core_no, node_no,
2710                                                         pkg_no);
2711
2712                                         c = GET_CORE(core_base, core_no,
2713                                                      node_no, pkg_no);
2714                                         c2 = GET_CORE(core_base2, core_no,
2715                                                       node_no,
2716                                                       pkg_no);
2717
2718                                         p = GET_PKG(pkg_base, pkg_no);
2719                                         p2 = GET_PKG(pkg_base2, pkg_no);
2720
2721                                         retval = func(t, c, p, t2, c2, p2);
2722                                         if (retval)
2723                                                 return retval;
2724                                 }
2725                         }
2726                 }
2727         }
2728         return 0;
2729 }
2730
2731 /*
2732  * run func(cpu) on every cpu in /proc/stat
2733  * return max_cpu number
2734  */
2735 int for_all_proc_cpus(int (func)(int))
2736 {
2737         FILE *fp;
2738         int cpu_num;
2739         int retval;
2740
2741         fp = fopen_or_die(proc_stat, "r");
2742
2743         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2744         if (retval != 0)
2745                 err(1, "%s: failed to parse format", proc_stat);
2746
2747         while (1) {
2748                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2749                 if (retval != 1)
2750                         break;
2751
2752                 retval = func(cpu_num);
2753                 if (retval) {
2754                         fclose(fp);
2755                         return(retval);
2756                 }
2757         }
2758         fclose(fp);
2759         return 0;
2760 }
2761
2762 void re_initialize(void)
2763 {
2764         free_all_buffers();
2765         setup_all_buffers();
2766         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2767 }
2768
2769 void set_max_cpu_num(void)
2770 {
2771         FILE *filep;
2772         unsigned long dummy;
2773
2774         topo.max_cpu_num = 0;
2775         filep = fopen_or_die(
2776                         "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2777                         "r");
2778         while (fscanf(filep, "%lx,", &dummy) == 1)
2779                 topo.max_cpu_num += BITMASK_SIZE;
2780         fclose(filep);
2781         topo.max_cpu_num--; /* 0 based */
2782 }
2783
2784 /*
2785  * count_cpus()
2786  * remember the last one seen, it will be the max
2787  */
2788 int count_cpus(int cpu)
2789 {
2790         topo.num_cpus++;
2791         return 0;
2792 }
2793 int mark_cpu_present(int cpu)
2794 {
2795         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2796         return 0;
2797 }
2798
2799 int init_thread_id(int cpu)
2800 {
2801         cpus[cpu].thread_id = -1;
2802         return 0;
2803 }
2804
2805 /*
2806  * snapshot_proc_interrupts()
2807  *
2808  * read and record summary of /proc/interrupts
2809  *
2810  * return 1 if config change requires a restart, else return 0
2811  */
2812 int snapshot_proc_interrupts(void)
2813 {
2814         static FILE *fp;
2815         int column, retval;
2816
2817         if (fp == NULL)
2818                 fp = fopen_or_die("/proc/interrupts", "r");
2819         else
2820                 rewind(fp);
2821
2822         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2823         for (column = 0; column < topo.num_cpus; ++column) {
2824                 int cpu_number;
2825
2826                 retval = fscanf(fp, " CPU%d", &cpu_number);
2827                 if (retval != 1)
2828                         break;
2829
2830                 if (cpu_number > topo.max_cpu_num) {
2831                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2832                         return 1;
2833                 }
2834
2835                 irq_column_2_cpu[column] = cpu_number;
2836                 irqs_per_cpu[cpu_number] = 0;
2837         }
2838
2839         /* read /proc/interrupt count lines and sum up irqs per cpu */
2840         while (1) {
2841                 int column;
2842                 char buf[64];
2843
2844                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2845                 if (retval != 1)
2846                         break;
2847
2848                 /* read the count per cpu */
2849                 for (column = 0; column < topo.num_cpus; ++column) {
2850
2851                         int cpu_number, irq_count;
2852
2853                         retval = fscanf(fp, " %d", &irq_count);
2854                         if (retval != 1)
2855                                 break;
2856
2857                         cpu_number = irq_column_2_cpu[column];
2858                         irqs_per_cpu[cpu_number] += irq_count;
2859
2860                 }
2861
2862                 while (getc(fp) != '\n')
2863                         ;       /* flush interrupt description */
2864
2865         }
2866         return 0;
2867 }
2868 /*
2869  * snapshot_gfx_rc6_ms()
2870  *
2871  * record snapshot of
2872  * /sys/class/drm/card0/power/rc6_residency_ms
2873  *
2874  * return 1 if config change requires a restart, else return 0
2875  */
2876 int snapshot_gfx_rc6_ms(void)
2877 {
2878         FILE *fp;
2879         int retval;
2880
2881         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2882
2883         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2884         if (retval != 1)
2885                 err(1, "GFX rc6");
2886
2887         fclose(fp);
2888
2889         return 0;
2890 }
2891 /*
2892  * snapshot_gfx_mhz()
2893  *
2894  * record snapshot of
2895  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2896  *
2897  * return 1 if config change requires a restart, else return 0
2898  */
2899 int snapshot_gfx_mhz(void)
2900 {
2901         static FILE *fp;
2902         int retval;
2903
2904         if (fp == NULL)
2905                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2906         else {
2907                 rewind(fp);
2908                 fflush(fp);
2909         }
2910
2911         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2912         if (retval != 1)
2913                 err(1, "GFX MHz");
2914
2915         return 0;
2916 }
2917
2918 /*
2919  * snapshot_cpu_lpi()
2920  *
2921  * record snapshot of
2922  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2923  */
2924 int snapshot_cpu_lpi_us(void)
2925 {
2926         FILE *fp;
2927         int retval;
2928
2929         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2930
2931         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2932         if (retval != 1) {
2933                 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
2934                 BIC_NOT_PRESENT(BIC_CPU_LPI);
2935                 fclose(fp);
2936                 return -1;
2937         }
2938
2939         fclose(fp);
2940
2941         return 0;
2942 }
2943 /*
2944  * snapshot_sys_lpi()
2945  *
2946  * record snapshot of sys_lpi_file
2947  */
2948 int snapshot_sys_lpi_us(void)
2949 {
2950         FILE *fp;
2951         int retval;
2952
2953         fp = fopen_or_die(sys_lpi_file, "r");
2954
2955         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2956         if (retval != 1) {
2957                 fprintf(stderr, "Disabling Low Power Idle System output\n");
2958                 BIC_NOT_PRESENT(BIC_SYS_LPI);
2959                 fclose(fp);
2960                 return -1;
2961         }
2962         fclose(fp);
2963
2964         return 0;
2965 }
2966 /*
2967  * snapshot /proc and /sys files
2968  *
2969  * return 1 if configuration restart needed, else return 0
2970  */
2971 int snapshot_proc_sysfs_files(void)
2972 {
2973         if (DO_BIC(BIC_IRQ))
2974                 if (snapshot_proc_interrupts())
2975                         return 1;
2976
2977         if (DO_BIC(BIC_GFX_rc6))
2978                 snapshot_gfx_rc6_ms();
2979
2980         if (DO_BIC(BIC_GFXMHz))
2981                 snapshot_gfx_mhz();
2982
2983         if (DO_BIC(BIC_CPU_LPI))
2984                 snapshot_cpu_lpi_us();
2985
2986         if (DO_BIC(BIC_SYS_LPI))
2987                 snapshot_sys_lpi_us();
2988
2989         return 0;
2990 }
2991
2992 int exit_requested;
2993
2994 static void signal_handler (int signal)
2995 {
2996         switch (signal) {
2997         case SIGINT:
2998                 exit_requested = 1;
2999                 if (debug)
3000                         fprintf(stderr, " SIGINT\n");
3001                 break;
3002         case SIGUSR1:
3003                 if (debug > 1)
3004                         fprintf(stderr, "SIGUSR1\n");
3005                 break;
3006         }
3007 }
3008
3009 void setup_signal_handler(void)
3010 {
3011         struct sigaction sa;
3012
3013         memset(&sa, 0, sizeof(sa));
3014
3015         sa.sa_handler = &signal_handler;
3016
3017         if (sigaction(SIGINT, &sa, NULL) < 0)
3018                 err(1, "sigaction SIGINT");
3019         if (sigaction(SIGUSR1, &sa, NULL) < 0)
3020                 err(1, "sigaction SIGUSR1");
3021 }
3022
3023 void do_sleep(void)
3024 {
3025         struct timeval tout;
3026         struct timespec rest;
3027         fd_set readfds;
3028         int retval;
3029
3030         FD_ZERO(&readfds);
3031         FD_SET(0, &readfds);
3032
3033         if (ignore_stdin) {
3034                 nanosleep(&interval_ts, NULL);
3035                 return;
3036         }
3037
3038         tout = interval_tv;
3039         retval = select(1, &readfds, NULL, NULL, &tout);
3040
3041         if (retval == 1) {
3042                 switch (getc(stdin)) {
3043                 case 'q':
3044                         exit_requested = 1;
3045                         break;
3046                 case EOF:
3047                         /*
3048                          * 'stdin' is a pipe closed on the other end. There
3049                          * won't be any further input.
3050                          */
3051                         ignore_stdin = 1;
3052                         /* Sleep the rest of the time */
3053                         rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3054                         rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3055                         nanosleep(&rest, NULL);
3056                 }
3057         }
3058 }
3059
3060
3061 void turbostat_loop()
3062 {
3063         int retval;
3064         int restarted = 0;
3065         int done_iters = 0;
3066
3067         setup_signal_handler();
3068
3069 restart:
3070         restarted++;
3071
3072         snapshot_proc_sysfs_files();
3073         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3074         first_counter_read = 0;
3075         if (retval < -1) {
3076                 exit(retval);
3077         } else if (retval == -1) {
3078                 if (restarted > 1) {
3079                         exit(retval);
3080                 }
3081                 re_initialize();
3082                 goto restart;
3083         }
3084         restarted = 0;
3085         done_iters = 0;
3086         gettimeofday(&tv_even, (struct timezone *)NULL);
3087
3088         while (1) {
3089                 if (for_all_proc_cpus(cpu_is_not_present)) {
3090                         re_initialize();
3091                         goto restart;
3092                 }
3093                 do_sleep();
3094                 if (snapshot_proc_sysfs_files())
3095                         goto restart;
3096                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
3097                 if (retval < -1) {
3098                         exit(retval);
3099                 } else if (retval == -1) {
3100                         re_initialize();
3101                         goto restart;
3102                 }
3103                 gettimeofday(&tv_odd, (struct timezone *)NULL);
3104                 timersub(&tv_odd, &tv_even, &tv_delta);
3105                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3106                         re_initialize();
3107                         goto restart;
3108                 }
3109                 compute_average(EVEN_COUNTERS);
3110                 format_all_counters(EVEN_COUNTERS);
3111                 flush_output_stdout();
3112                 if (exit_requested)
3113                         break;
3114                 if (num_iterations && ++done_iters >= num_iterations)
3115                         break;
3116                 do_sleep();
3117                 if (snapshot_proc_sysfs_files())
3118                         goto restart;
3119                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3120                 if (retval < -1) {
3121                         exit(retval);
3122                 } else if (retval == -1) {
3123                         re_initialize();
3124                         goto restart;
3125                 }
3126                 gettimeofday(&tv_even, (struct timezone *)NULL);
3127                 timersub(&tv_even, &tv_odd, &tv_delta);
3128                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3129                         re_initialize();
3130                         goto restart;
3131                 }
3132                 compute_average(ODD_COUNTERS);
3133                 format_all_counters(ODD_COUNTERS);
3134                 flush_output_stdout();
3135                 if (exit_requested)
3136                         break;
3137                 if (num_iterations && ++done_iters >= num_iterations)
3138                         break;
3139         }
3140 }
3141
3142 void check_dev_msr()
3143 {
3144         struct stat sb;
3145         char pathname[32];
3146
3147         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3148         if (stat(pathname, &sb))
3149                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3150                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3151 }
3152
3153 /*
3154  * check for CAP_SYS_RAWIO
3155  * return 0 on success
3156  * return 1 on fail
3157  */
3158 int check_for_cap_sys_rawio(void)
3159 {
3160         cap_t caps;
3161         cap_flag_value_t cap_flag_value;
3162
3163         caps = cap_get_proc();
3164         if (caps == NULL)
3165                 err(-6, "cap_get_proc\n");
3166
3167         if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3168                 err(-6, "cap_get\n");
3169
3170         if (cap_flag_value != CAP_SET) {
3171                 warnx("capget(CAP_SYS_RAWIO) failed,"
3172                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3173                 return 1;
3174         }
3175
3176         if (cap_free(caps) == -1)
3177                 err(-6, "cap_free\n");
3178
3179         return 0;
3180 }
3181 void check_permissions(void)
3182 {
3183         int do_exit = 0;
3184         char pathname[32];
3185
3186         /* check for CAP_SYS_RAWIO */
3187         do_exit += check_for_cap_sys_rawio();
3188
3189         /* test file permissions */
3190         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3191         if (euidaccess(pathname, R_OK)) {
3192                 do_exit++;
3193                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3194         }
3195
3196         /* if all else fails, thell them to be root */
3197         if (do_exit)
3198                 if (getuid() != 0)
3199                         warnx("... or simply run as root");
3200
3201         if (do_exit)
3202                 exit(-6);
3203 }
3204
3205 /*
3206  * NHM adds support for additional MSRs:
3207  *
3208  * MSR_SMI_COUNT                   0x00000034
3209  *
3210  * MSR_PLATFORM_INFO               0x000000ce
3211  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3212  *
3213  * MSR_MISC_PWR_MGMT               0x000001aa
3214  *
3215  * MSR_PKG_C3_RESIDENCY            0x000003f8
3216  * MSR_PKG_C6_RESIDENCY            0x000003f9
3217  * MSR_CORE_C3_RESIDENCY           0x000003fc
3218  * MSR_CORE_C6_RESIDENCY           0x000003fd
3219  *
3220  * Side effect:
3221  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3222  * sets has_misc_feature_control
3223  */
3224 int probe_nhm_msrs(unsigned int family, unsigned int model)
3225 {
3226         unsigned long long msr;
3227         unsigned int base_ratio;
3228         int *pkg_cstate_limits;
3229
3230         if (!genuine_intel)
3231                 return 0;
3232
3233         if (family != 6)
3234                 return 0;
3235
3236         bclk = discover_bclk(family, model);
3237
3238         switch (model) {
3239         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3240         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3241                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3242                 break;
3243         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3244         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3245         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3246         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3247                 pkg_cstate_limits = snb_pkg_cstate_limits;
3248                 has_misc_feature_control = 1;
3249                 break;
3250         case INTEL_FAM6_HASWELL:        /* HSW */
3251         case INTEL_FAM6_HASWELL_G:      /* HSW */
3252         case INTEL_FAM6_HASWELL_X:      /* HSX */
3253         case INTEL_FAM6_HASWELL_L:      /* HSW */
3254         case INTEL_FAM6_BROADWELL:      /* BDW */
3255         case INTEL_FAM6_BROADWELL_G:    /* BDW */
3256         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3257         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3258         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3259                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3260                 has_misc_feature_control = 1;
3261                 break;
3262         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3263                 pkg_cstate_limits = skx_pkg_cstate_limits;
3264                 has_misc_feature_control = 1;
3265                 break;
3266         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3267                 no_MSR_MISC_PWR_MGMT = 1;
3268         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
3269                 pkg_cstate_limits = slv_pkg_cstate_limits;
3270                 break;
3271         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3272                 pkg_cstate_limits = amt_pkg_cstate_limits;
3273                 no_MSR_MISC_PWR_MGMT = 1;
3274                 break;
3275         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3276                 pkg_cstate_limits = phi_pkg_cstate_limits;
3277                 break;
3278         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3279         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3280         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
3281         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
3282                 pkg_cstate_limits = glm_pkg_cstate_limits;
3283                 break;
3284         default:
3285                 return 0;
3286         }
3287         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3288         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3289
3290         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3291         base_ratio = (msr >> 8) & 0xFF;
3292
3293         base_hz = base_ratio * bclk * 1000000;
3294         has_base_hz = 1;
3295         return 1;
3296 }
3297 /*
3298  * SLV client has support for unique MSRs:
3299  *
3300  * MSR_CC6_DEMOTION_POLICY_CONFIG
3301  * MSR_MC6_DEMOTION_POLICY_CONFIG
3302  */
3303
3304 int has_slv_msrs(unsigned int family, unsigned int model)
3305 {
3306         if (!genuine_intel)
3307                 return 0;
3308
3309         switch (model) {
3310         case INTEL_FAM6_ATOM_SILVERMONT:
3311         case INTEL_FAM6_ATOM_SILVERMONT_MID:
3312         case INTEL_FAM6_ATOM_AIRMONT_MID:
3313                 return 1;
3314         }
3315         return 0;
3316 }
3317 int is_dnv(unsigned int family, unsigned int model)
3318 {
3319
3320         if (!genuine_intel)
3321                 return 0;
3322
3323         switch (model) {
3324         case INTEL_FAM6_ATOM_GOLDMONT_D:
3325                 return 1;
3326         }
3327         return 0;
3328 }
3329 int is_bdx(unsigned int family, unsigned int model)
3330 {
3331
3332         if (!genuine_intel)
3333                 return 0;
3334
3335         switch (model) {
3336         case INTEL_FAM6_BROADWELL_X:
3337                 return 1;
3338         }
3339         return 0;
3340 }
3341 int is_skx(unsigned int family, unsigned int model)
3342 {
3343
3344         if (!genuine_intel)
3345                 return 0;
3346
3347         switch (model) {
3348         case INTEL_FAM6_SKYLAKE_X:
3349                 return 1;
3350         }
3351         return 0;
3352 }
3353 int is_ehl(unsigned int family, unsigned int model)
3354 {
3355         if (!genuine_intel)
3356                 return 0;
3357
3358         switch (model) {
3359         case INTEL_FAM6_ATOM_TREMONT:
3360                 return 1;
3361         }
3362         return 0;
3363 }
3364
3365 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3366 {
3367         if (has_slv_msrs(family, model))
3368                 return 0;
3369
3370         switch (model) {
3371         /* Nehalem compatible, but do not include turbo-ratio limit support */
3372         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3373         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3374                 return 0;
3375         default:
3376                 return 1;
3377         }
3378 }
3379 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3380 {
3381         if (has_slv_msrs(family, model))
3382                 return 1;
3383
3384         return 0;
3385 }
3386 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3387 {
3388         if (!genuine_intel)
3389                 return 0;
3390
3391         if (family != 6)
3392                 return 0;
3393
3394         switch (model) {
3395         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3396         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3397                 return 1;
3398         default:
3399                 return 0;
3400         }
3401 }
3402 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3403 {
3404         if (!genuine_intel)
3405                 return 0;
3406
3407         if (family != 6)
3408                 return 0;
3409
3410         switch (model) {
3411         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3412                 return 1;
3413         default:
3414                 return 0;
3415         }
3416 }
3417
3418 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3419 {
3420         if (!genuine_intel)
3421                 return 0;
3422
3423         if (family != 6)
3424                 return 0;
3425
3426         switch (model) {
3427         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3428                 return 1;
3429         default:
3430                 return 0;
3431         }
3432 }
3433 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3434 {
3435         if (!genuine_intel)
3436                 return 0;
3437
3438         if (family != 6)
3439                 return 0;
3440
3441         switch (model) {
3442         case INTEL_FAM6_ATOM_GOLDMONT:
3443         case INTEL_FAM6_SKYLAKE_X:
3444                 return 1;
3445         default:
3446                 return 0;
3447         }
3448 }
3449 int has_config_tdp(unsigned int family, unsigned int model)
3450 {
3451         if (!genuine_intel)
3452                 return 0;
3453
3454         if (family != 6)
3455                 return 0;
3456
3457         switch (model) {
3458         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3459         case INTEL_FAM6_HASWELL:        /* HSW */
3460         case INTEL_FAM6_HASWELL_X:      /* HSX */
3461         case INTEL_FAM6_HASWELL_L:      /* HSW */
3462         case INTEL_FAM6_HASWELL_G:      /* HSW */
3463         case INTEL_FAM6_BROADWELL:      /* BDW */
3464         case INTEL_FAM6_BROADWELL_G:    /* BDW */
3465         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3466         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3467         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3468         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3469
3470         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3471                 return 1;
3472         default:
3473                 return 0;
3474         }
3475 }
3476
3477 static void
3478 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3479 {
3480         if (!do_nhm_platform_info)
3481                 return;
3482
3483         dump_nhm_platform_info();
3484
3485         if (has_hsw_turbo_ratio_limit(family, model))
3486                 dump_hsw_turbo_ratio_limits();
3487
3488         if (has_ivt_turbo_ratio_limit(family, model))
3489                 dump_ivt_turbo_ratio_limits();
3490
3491         if (has_turbo_ratio_limit(family, model))
3492                 dump_turbo_ratio_limits(family, model);
3493
3494         if (has_atom_turbo_ratio_limit(family, model))
3495                 dump_atom_turbo_ratio_limits();
3496
3497         if (has_knl_turbo_ratio_limit(family, model))
3498                 dump_knl_turbo_ratio_limits();
3499
3500         if (has_config_tdp(family, model))
3501                 dump_config_tdp();
3502
3503         dump_nhm_cst_cfg();
3504 }
3505
3506 static void dump_sysfs_file(char *path)
3507 {
3508         FILE *input;
3509         char cpuidle_buf[64];
3510
3511         input = fopen(path, "r");
3512         if (input == NULL) {
3513                 if (debug)
3514                         fprintf(outf, "NSFOD %s\n", path);
3515                 return;
3516         }
3517         if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
3518                 err(1, "%s: failed to read file", path);
3519         fclose(input);
3520
3521         fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
3522 }
3523 static void
3524 dump_sysfs_cstate_config(void)
3525 {
3526         char path[64];
3527         char name_buf[16];
3528         char desc[64];
3529         FILE *input;
3530         int state;
3531         char *sp;
3532
3533         if (!DO_BIC(BIC_sysfs))
3534                 return;
3535
3536         if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
3537                 fprintf(outf, "cpuidle not loaded\n");
3538                 return;
3539         }
3540
3541         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
3542         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
3543         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
3544
3545         for (state = 0; state < 10; ++state) {
3546
3547                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3548                         base_cpu, state);
3549                 input = fopen(path, "r");
3550                 if (input == NULL)
3551                         continue;
3552                 if (!fgets(name_buf, sizeof(name_buf), input))
3553                         err(1, "%s: failed to read file", path);
3554
3555                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3556                 sp = strchr(name_buf, '-');
3557                 if (!sp)
3558                         sp = strchrnul(name_buf, '\n');
3559                 *sp = '\0';
3560                 fclose(input);
3561
3562                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3563                         base_cpu, state);
3564                 input = fopen(path, "r");
3565                 if (input == NULL)
3566                         continue;
3567                 if (!fgets(desc, sizeof(desc), input))
3568                         err(1, "%s: failed to read file", path);
3569
3570                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3571                 fclose(input);
3572         }
3573 }
3574 static void
3575 dump_sysfs_pstate_config(void)
3576 {
3577         char path[64];
3578         char driver_buf[64];
3579         char governor_buf[64];
3580         FILE *input;
3581         int turbo;
3582
3583         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3584                         base_cpu);
3585         input = fopen(path, "r");
3586         if (input == NULL) {
3587                 fprintf(outf, "NSFOD %s\n", path);
3588                 return;
3589         }
3590         if (!fgets(driver_buf, sizeof(driver_buf), input))
3591                 err(1, "%s: failed to read file", path);
3592         fclose(input);
3593
3594         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3595                         base_cpu);
3596         input = fopen(path, "r");
3597         if (input == NULL) {
3598                 fprintf(outf, "NSFOD %s\n", path);
3599                 return;
3600         }
3601         if (!fgets(governor_buf, sizeof(governor_buf), input))
3602                 err(1, "%s: failed to read file", path);
3603         fclose(input);
3604
3605         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3606         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3607
3608         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3609         input = fopen(path, "r");
3610         if (input != NULL) {
3611                 if (fscanf(input, "%d", &turbo) != 1)
3612                         err(1, "%s: failed to parse number from file", path);
3613                 fprintf(outf, "cpufreq boost: %d\n", turbo);
3614                 fclose(input);
3615         }
3616
3617         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3618         input = fopen(path, "r");
3619         if (input != NULL) {
3620                 if (fscanf(input, "%d", &turbo) != 1)
3621                         err(1, "%s: failed to parse number from file", path);
3622                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3623                 fclose(input);
3624         }
3625 }
3626
3627
3628 /*
3629  * print_epb()
3630  * Decode the ENERGY_PERF_BIAS MSR
3631  */
3632 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3633 {
3634         unsigned long long msr;
3635         char *epb_string;
3636         int cpu;
3637
3638         if (!has_epb)
3639                 return 0;
3640
3641         cpu = t->cpu_id;
3642
3643         /* EPB is per-package */
3644         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3645                 return 0;
3646
3647         if (cpu_migrate(cpu)) {
3648                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3649                 return -1;
3650         }
3651
3652         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3653                 return 0;
3654
3655         switch (msr & 0xF) {
3656         case ENERGY_PERF_BIAS_PERFORMANCE:
3657                 epb_string = "performance";
3658                 break;
3659         case ENERGY_PERF_BIAS_NORMAL:
3660                 epb_string = "balanced";
3661                 break;
3662         case ENERGY_PERF_BIAS_POWERSAVE:
3663                 epb_string = "powersave";
3664                 break;
3665         default:
3666                 epb_string = "custom";
3667                 break;
3668         }
3669         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3670
3671         return 0;
3672 }
3673 /*
3674  * print_hwp()
3675  * Decode the MSR_HWP_CAPABILITIES
3676  */
3677 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3678 {
3679         unsigned long long msr;
3680         int cpu;
3681
3682         if (!has_hwp)
3683                 return 0;
3684
3685         cpu = t->cpu_id;
3686
3687         /* MSR_HWP_CAPABILITIES is per-package */
3688         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3689                 return 0;
3690
3691         if (cpu_migrate(cpu)) {
3692                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3693                 return -1;
3694         }
3695
3696         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3697                 return 0;
3698
3699         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3700                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3701
3702         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3703         if ((msr & (1 << 0)) == 0)
3704                 return 0;
3705
3706         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3707                 return 0;
3708
3709         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3710                         "(high %d guar %d eff %d low %d)\n",
3711                         cpu, msr,
3712                         (unsigned int)HWP_HIGHEST_PERF(msr),
3713                         (unsigned int)HWP_GUARANTEED_PERF(msr),
3714                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3715                         (unsigned int)HWP_LOWEST_PERF(msr));
3716
3717         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3718                 return 0;
3719
3720         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3721                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3722                         cpu, msr,
3723                         (unsigned int)(((msr) >> 0) & 0xff),
3724                         (unsigned int)(((msr) >> 8) & 0xff),
3725                         (unsigned int)(((msr) >> 16) & 0xff),
3726                         (unsigned int)(((msr) >> 24) & 0xff),
3727                         (unsigned int)(((msr) >> 32) & 0xff3),
3728                         (unsigned int)(((msr) >> 42) & 0x1));
3729
3730         if (has_hwp_pkg) {
3731                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3732                         return 0;
3733
3734                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3735                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3736                         cpu, msr,
3737                         (unsigned int)(((msr) >> 0) & 0xff),
3738                         (unsigned int)(((msr) >> 8) & 0xff),
3739                         (unsigned int)(((msr) >> 16) & 0xff),
3740                         (unsigned int)(((msr) >> 24) & 0xff),
3741                         (unsigned int)(((msr) >> 32) & 0xff3));
3742         }
3743         if (has_hwp_notify) {
3744                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3745                         return 0;
3746
3747                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3748                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3749                         cpu, msr,
3750                         ((msr) & 0x1) ? "EN" : "Dis",
3751                         ((msr) & 0x2) ? "EN" : "Dis");
3752         }
3753         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3754                 return 0;
3755
3756         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3757                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3758                         cpu, msr,
3759                         ((msr) & 0x1) ? "" : "No-",
3760                         ((msr) & 0x2) ? "" : "No-");
3761
3762         return 0;
3763 }
3764
3765 /*
3766  * print_perf_limit()
3767  */
3768 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3769 {
3770         unsigned long long msr;
3771         int cpu;
3772
3773         cpu = t->cpu_id;
3774
3775         /* per-package */
3776         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3777                 return 0;
3778
3779         if (cpu_migrate(cpu)) {
3780                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3781                 return -1;
3782         }
3783
3784         if (do_core_perf_limit_reasons) {
3785                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3786                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3787                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3788                         (msr & 1 << 15) ? "bit15, " : "",
3789                         (msr & 1 << 14) ? "bit14, " : "",
3790                         (msr & 1 << 13) ? "Transitions, " : "",
3791                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3792                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
3793                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3794                         (msr & 1 << 9) ? "CorePwr, " : "",
3795                         (msr & 1 << 8) ? "Amps, " : "",
3796                         (msr & 1 << 6) ? "VR-Therm, " : "",
3797                         (msr & 1 << 5) ? "Auto-HWP, " : "",
3798                         (msr & 1 << 4) ? "Graphics, " : "",
3799                         (msr & 1 << 2) ? "bit2, " : "",
3800                         (msr & 1 << 1) ? "ThermStatus, " : "",
3801                         (msr & 1 << 0) ? "PROCHOT, " : "");
3802                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3803                         (msr & 1 << 31) ? "bit31, " : "",
3804                         (msr & 1 << 30) ? "bit30, " : "",
3805                         (msr & 1 << 29) ? "Transitions, " : "",
3806                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3807                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
3808                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3809                         (msr & 1 << 25) ? "CorePwr, " : "",
3810                         (msr & 1 << 24) ? "Amps, " : "",
3811                         (msr & 1 << 22) ? "VR-Therm, " : "",
3812                         (msr & 1 << 21) ? "Auto-HWP, " : "",
3813                         (msr & 1 << 20) ? "Graphics, " : "",
3814                         (msr & 1 << 18) ? "bit18, " : "",
3815                         (msr & 1 << 17) ? "ThermStatus, " : "",
3816                         (msr & 1 << 16) ? "PROCHOT, " : "");
3817
3818         }
3819         if (do_gfx_perf_limit_reasons) {
3820                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3821                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3822                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3823                         (msr & 1 << 0) ? "PROCHOT, " : "",
3824                         (msr & 1 << 1) ? "ThermStatus, " : "",
3825                         (msr & 1 << 4) ? "Graphics, " : "",
3826                         (msr & 1 << 6) ? "VR-Therm, " : "",
3827                         (msr & 1 << 8) ? "Amps, " : "",
3828                         (msr & 1 << 9) ? "GFXPwr, " : "",
3829                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3830                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3831                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3832                         (msr & 1 << 16) ? "PROCHOT, " : "",
3833                         (msr & 1 << 17) ? "ThermStatus, " : "",
3834                         (msr & 1 << 20) ? "Graphics, " : "",
3835                         (msr & 1 << 22) ? "VR-Therm, " : "",
3836                         (msr & 1 << 24) ? "Amps, " : "",
3837                         (msr & 1 << 25) ? "GFXPwr, " : "",
3838                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3839                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3840         }
3841         if (do_ring_perf_limit_reasons) {
3842                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3843                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3844                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
3845                         (msr & 1 << 0) ? "PROCHOT, " : "",
3846                         (msr & 1 << 1) ? "ThermStatus, " : "",
3847                         (msr & 1 << 6) ? "VR-Therm, " : "",
3848                         (msr & 1 << 8) ? "Amps, " : "",
3849                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3850                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3851                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3852                         (msr & 1 << 16) ? "PROCHOT, " : "",
3853                         (msr & 1 << 17) ? "ThermStatus, " : "",
3854                         (msr & 1 << 22) ? "VR-Therm, " : "",
3855                         (msr & 1 << 24) ? "Amps, " : "",
3856                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3857                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3858         }
3859         return 0;
3860 }
3861
3862 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3863 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3864
3865 double get_tdp_intel(unsigned int model)
3866 {
3867         unsigned long long msr;
3868
3869         if (do_rapl & RAPL_PKG_POWER_INFO)
3870                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3871                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3872
3873         switch (model) {
3874         case INTEL_FAM6_ATOM_SILVERMONT:
3875         case INTEL_FAM6_ATOM_SILVERMONT_D:
3876                 return 30.0;
3877         default:
3878                 return 135.0;
3879         }
3880 }
3881
3882 double get_tdp_amd(unsigned int family)
3883 {
3884         switch (family) {
3885         case 0x17:
3886         case 0x18:
3887         default:
3888                 /* This is the max stock TDP of HEDT/Server Fam17h chips */
3889                 return 250.0;
3890         }
3891 }
3892
3893 /*
3894  * rapl_dram_energy_units_probe()
3895  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3896  */
3897 static double
3898 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3899 {
3900         /* only called for genuine_intel, family 6 */
3901
3902         switch (model) {
3903         case INTEL_FAM6_HASWELL_X:      /* HSX */
3904         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3905         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3906                 return (rapl_dram_energy_units = 15.3 / 1000000);
3907         default:
3908                 return (rapl_energy_units);
3909         }
3910 }
3911
3912 void rapl_probe_intel(unsigned int family, unsigned int model)
3913 {
3914         unsigned long long msr;
3915         unsigned int time_unit;
3916         double tdp;
3917
3918         if (family != 6)
3919                 return;
3920
3921         switch (model) {
3922         case INTEL_FAM6_SANDYBRIDGE:
3923         case INTEL_FAM6_IVYBRIDGE:
3924         case INTEL_FAM6_HASWELL:        /* HSW */
3925         case INTEL_FAM6_HASWELL_L:      /* HSW */
3926         case INTEL_FAM6_HASWELL_G:      /* HSW */
3927         case INTEL_FAM6_BROADWELL:      /* BDW */
3928         case INTEL_FAM6_BROADWELL_G:    /* BDW */
3929                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3930                 if (rapl_joules) {
3931                         BIC_PRESENT(BIC_Pkg_J);
3932                         BIC_PRESENT(BIC_Cor_J);
3933                         BIC_PRESENT(BIC_GFX_J);
3934                 } else {
3935                         BIC_PRESENT(BIC_PkgWatt);
3936                         BIC_PRESENT(BIC_CorWatt);
3937                         BIC_PRESENT(BIC_GFXWatt);
3938                 }
3939                 break;
3940         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3941         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3942                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3943                 if (rapl_joules)
3944                         BIC_PRESENT(BIC_Pkg_J);
3945                 else
3946                         BIC_PRESENT(BIC_PkgWatt);
3947                 break;
3948         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
3949                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3950                 if (rapl_joules) {
3951                         BIC_PRESENT(BIC_Pkg_J);
3952                         BIC_PRESENT(BIC_Cor_J);
3953                         BIC_PRESENT(BIC_RAM_J);
3954                         BIC_PRESENT(BIC_GFX_J);
3955                 } else {
3956                         BIC_PRESENT(BIC_PkgWatt);
3957                         BIC_PRESENT(BIC_CorWatt);
3958                         BIC_PRESENT(BIC_RAMWatt);
3959                         BIC_PRESENT(BIC_GFXWatt);
3960                 }
3961                 break;
3962         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3963         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3964                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3965                 BIC_PRESENT(BIC_PKG__);
3966                 BIC_PRESENT(BIC_RAM__);
3967                 if (rapl_joules) {
3968                         BIC_PRESENT(BIC_Pkg_J);
3969                         BIC_PRESENT(BIC_Cor_J);
3970                         BIC_PRESENT(BIC_RAM_J);
3971                         BIC_PRESENT(BIC_GFX_J);
3972                 } else {
3973                         BIC_PRESENT(BIC_PkgWatt);
3974                         BIC_PRESENT(BIC_CorWatt);
3975                         BIC_PRESENT(BIC_RAMWatt);
3976                         BIC_PRESENT(BIC_GFXWatt);
3977                 }
3978                 break;
3979         case INTEL_FAM6_HASWELL_X:      /* HSX */
3980         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3981         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3982         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3983                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3984                 BIC_PRESENT(BIC_PKG__);
3985                 BIC_PRESENT(BIC_RAM__);
3986                 if (rapl_joules) {
3987                         BIC_PRESENT(BIC_Pkg_J);
3988                         BIC_PRESENT(BIC_RAM_J);
3989                 } else {
3990                         BIC_PRESENT(BIC_PkgWatt);
3991                         BIC_PRESENT(BIC_RAMWatt);
3992                 }
3993                 break;
3994         case INTEL_FAM6_SANDYBRIDGE_X:
3995         case INTEL_FAM6_IVYBRIDGE_X:
3996                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3997                 BIC_PRESENT(BIC_PKG__);
3998                 BIC_PRESENT(BIC_RAM__);
3999                 if (rapl_joules) {
4000                         BIC_PRESENT(BIC_Pkg_J);
4001                         BIC_PRESENT(BIC_Cor_J);
4002                         BIC_PRESENT(BIC_RAM_J);
4003                 } else {
4004                         BIC_PRESENT(BIC_PkgWatt);
4005                         BIC_PRESENT(BIC_CorWatt);
4006                         BIC_PRESENT(BIC_RAMWatt);
4007                 }
4008                 break;
4009         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4010         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4011                 do_rapl = RAPL_PKG | RAPL_CORES;
4012                 if (rapl_joules) {
4013                         BIC_PRESENT(BIC_Pkg_J);
4014                         BIC_PRESENT(BIC_Cor_J);
4015                 } else {
4016                         BIC_PRESENT(BIC_PkgWatt);
4017                         BIC_PRESENT(BIC_CorWatt);
4018                 }
4019                 break;
4020         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4021                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4022                 BIC_PRESENT(BIC_PKG__);
4023                 BIC_PRESENT(BIC_RAM__);
4024                 if (rapl_joules) {
4025                         BIC_PRESENT(BIC_Pkg_J);
4026                         BIC_PRESENT(BIC_Cor_J);
4027                         BIC_PRESENT(BIC_RAM_J);
4028                 } else {
4029                         BIC_PRESENT(BIC_PkgWatt);
4030                         BIC_PRESENT(BIC_CorWatt);
4031                         BIC_PRESENT(BIC_RAMWatt);
4032                 }
4033                 break;
4034         default:
4035                 return;
4036         }
4037
4038         /* units on package 0, verify later other packages match */
4039         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4040                 return;
4041
4042         rapl_power_units = 1.0 / (1 << (msr & 0xF));
4043         if (model == INTEL_FAM6_ATOM_SILVERMONT)
4044                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4045         else
4046                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4047
4048         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4049
4050         time_unit = msr >> 16 & 0xF;
4051         if (time_unit == 0)
4052                 time_unit = 0xA;
4053
4054         rapl_time_units = 1.0 / (1 << (time_unit));
4055
4056         tdp = get_tdp_intel(model);
4057
4058         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4059         if (!quiet)
4060                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4061 }
4062
4063 void rapl_probe_amd(unsigned int family, unsigned int model)
4064 {
4065         unsigned long long msr;
4066         unsigned int eax, ebx, ecx, edx;
4067         unsigned int has_rapl = 0;
4068         double tdp;
4069
4070         if (max_extended_level >= 0x80000007) {
4071                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4072                 /* RAPL (Fam 17h) */
4073                 has_rapl = edx & (1 << 14);
4074         }
4075
4076         if (!has_rapl)
4077                 return;
4078
4079         switch (family) {
4080         case 0x17: /* Zen, Zen+ */
4081         case 0x18: /* Hygon Dhyana */
4082                 do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4083                 if (rapl_joules) {
4084                         BIC_PRESENT(BIC_Pkg_J);
4085                         BIC_PRESENT(BIC_Cor_J);
4086                 } else {
4087                         BIC_PRESENT(BIC_PkgWatt);
4088                         BIC_PRESENT(BIC_CorWatt);
4089                 }
4090                 break;
4091         default:
4092                 return;
4093         }
4094
4095         if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4096                 return;
4097
4098         rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4099         rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4100         rapl_power_units = ldexp(1.0, -(msr & 0xf));
4101
4102         tdp = get_tdp_amd(family);
4103
4104         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4105         if (!quiet)
4106                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4107 }
4108
4109 /*
4110  * rapl_probe()
4111  *
4112  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4113  */
4114 void rapl_probe(unsigned int family, unsigned int model)
4115 {
4116         if (genuine_intel)
4117                 rapl_probe_intel(family, model);
4118         if (authentic_amd || hygon_genuine)
4119                 rapl_probe_amd(family, model);
4120 }
4121
4122 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4123 {
4124         if (!genuine_intel)
4125                 return;
4126
4127         if (family != 6)
4128                 return;
4129
4130         switch (model) {
4131         case INTEL_FAM6_HASWELL:        /* HSW */
4132         case INTEL_FAM6_HASWELL_L:      /* HSW */
4133         case INTEL_FAM6_HASWELL_G:      /* HSW */
4134                 do_gfx_perf_limit_reasons = 1;
4135         case INTEL_FAM6_HASWELL_X:      /* HSX */
4136                 do_core_perf_limit_reasons = 1;
4137                 do_ring_perf_limit_reasons = 1;
4138         default:
4139                 return;
4140         }
4141 }
4142
4143 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4144 {
4145         if (is_skx(family, model) || is_bdx(family, model))
4146                 has_automatic_cstate_conversion = 1;
4147 }
4148
4149 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4150 {
4151         unsigned long long msr;
4152         unsigned int dts, dts2;
4153         int cpu;
4154
4155         if (!(do_dts || do_ptm))
4156                 return 0;
4157
4158         cpu = t->cpu_id;
4159
4160         /* DTS is per-core, no need to print for each thread */
4161         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4162                 return 0;
4163
4164         if (cpu_migrate(cpu)) {
4165                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4166                 return -1;
4167         }
4168
4169         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4170                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4171                         return 0;
4172
4173                 dts = (msr >> 16) & 0x7F;
4174                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
4175                         cpu, msr, tcc_activation_temp - dts);
4176
4177                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4178                         return 0;
4179
4180                 dts = (msr >> 16) & 0x7F;
4181                 dts2 = (msr >> 8) & 0x7F;
4182                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4183                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4184         }
4185
4186
4187         if (do_dts && debug) {
4188                 unsigned int resolution;
4189
4190                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4191                         return 0;
4192
4193                 dts = (msr >> 16) & 0x7F;
4194                 resolution = (msr >> 27) & 0xF;
4195                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4196                         cpu, msr, tcc_activation_temp - dts, resolution);
4197
4198                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4199                         return 0;
4200
4201                 dts = (msr >> 16) & 0x7F;
4202                 dts2 = (msr >> 8) & 0x7F;
4203                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4204                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4205         }
4206
4207         return 0;
4208 }
4209
4210 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4211 {
4212         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4213                 cpu, label,
4214                 ((msr >> 15) & 1) ? "EN" : "DIS",
4215                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
4216                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4217                 (((msr >> 16) & 1) ? "EN" : "DIS"));
4218
4219         return;
4220 }
4221
4222 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4223 {
4224         unsigned long long msr;
4225         const char *msr_name;
4226         int cpu;
4227
4228         if (!do_rapl)
4229                 return 0;
4230
4231         /* RAPL counters are per package, so print only for 1st thread/package */
4232         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4233                 return 0;
4234
4235         cpu = t->cpu_id;
4236         if (cpu_migrate(cpu)) {
4237                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4238                 return -1;
4239         }
4240
4241         if (do_rapl & RAPL_AMD_F17H) {
4242                 msr_name = "MSR_RAPL_PWR_UNIT";
4243                 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4244                         return -1;
4245         } else {
4246                 msr_name = "MSR_RAPL_POWER_UNIT";
4247                 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4248                         return -1;
4249         }
4250
4251         fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4252                 rapl_power_units, rapl_energy_units, rapl_time_units);
4253
4254         if (do_rapl & RAPL_PKG_POWER_INFO) {
4255
4256                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4257                         return -5;
4258
4259
4260                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4261                         cpu, msr,
4262                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4263                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4264                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4265                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4266
4267         }
4268         if (do_rapl & RAPL_PKG) {
4269
4270                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4271                         return -9;
4272
4273                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4274                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4275
4276                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
4277                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4278                         cpu,
4279                         ((msr >> 47) & 1) ? "EN" : "DIS",
4280                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
4281                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4282                         ((msr >> 48) & 1) ? "EN" : "DIS");
4283         }
4284
4285         if (do_rapl & RAPL_DRAM_POWER_INFO) {
4286                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4287                         return -6;
4288
4289                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4290                         cpu, msr,
4291                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4292                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4293                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4294                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4295         }
4296         if (do_rapl & RAPL_DRAM) {
4297                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4298                         return -9;
4299                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4300                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4301
4302                 print_power_limit_msr(cpu, msr, "DRAM Limit");
4303         }
4304         if (do_rapl & RAPL_CORE_POLICY) {
4305                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4306                         return -7;
4307
4308                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4309         }
4310         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4311                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4312                         return -9;
4313                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4314                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4315                 print_power_limit_msr(cpu, msr, "Cores Limit");
4316         }
4317         if (do_rapl & RAPL_GFX) {
4318                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4319                         return -8;
4320
4321                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4322
4323                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4324                         return -9;
4325                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4326                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4327                 print_power_limit_msr(cpu, msr, "GFX Limit");
4328         }
4329         return 0;
4330 }
4331
4332 /*
4333  * SNB adds support for additional MSRs:
4334  *
4335  * MSR_PKG_C7_RESIDENCY            0x000003fa
4336  * MSR_CORE_C7_RESIDENCY           0x000003fe
4337  * MSR_PKG_C2_RESIDENCY            0x0000060d
4338  */
4339
4340 int has_snb_msrs(unsigned int family, unsigned int model)
4341 {
4342         if (!genuine_intel)
4343                 return 0;
4344
4345         switch (model) {
4346         case INTEL_FAM6_SANDYBRIDGE:
4347         case INTEL_FAM6_SANDYBRIDGE_X:
4348         case INTEL_FAM6_IVYBRIDGE:              /* IVB */
4349         case INTEL_FAM6_IVYBRIDGE_X:            /* IVB Xeon */
4350         case INTEL_FAM6_HASWELL:                /* HSW */
4351         case INTEL_FAM6_HASWELL_X:              /* HSW */
4352         case INTEL_FAM6_HASWELL_L:              /* HSW */
4353         case INTEL_FAM6_HASWELL_G:              /* HSW */
4354         case INTEL_FAM6_BROADWELL:              /* BDW */
4355         case INTEL_FAM6_BROADWELL_G:            /* BDW */
4356         case INTEL_FAM6_BROADWELL_X:            /* BDX */
4357         case INTEL_FAM6_SKYLAKE_L:              /* SKL */
4358         case INTEL_FAM6_CANNONLAKE_L:           /* CNL */
4359         case INTEL_FAM6_SKYLAKE_X:              /* SKX */
4360         case INTEL_FAM6_ATOM_GOLDMONT:          /* BXT */
4361         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4362         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4363         case INTEL_FAM6_ATOM_TREMONT:           /* EHL */
4364                 return 1;
4365         }
4366         return 0;
4367 }
4368
4369 /*
4370  * HSW ULT added support for C8/C9/C10 MSRs:
4371  *
4372  * MSR_PKG_C8_RESIDENCY         0x00000630
4373  * MSR_PKG_C9_RESIDENCY         0x00000631
4374  * MSR_PKG_C10_RESIDENCY        0x00000632
4375  *
4376  * MSR_PKGC8_IRTL               0x00000633
4377  * MSR_PKGC9_IRTL               0x00000634
4378  * MSR_PKGC10_IRTL              0x00000635
4379  *
4380  */
4381 int has_c8910_msrs(unsigned int family, unsigned int model)
4382 {
4383         if (!genuine_intel)
4384                 return 0;
4385
4386         switch (model) {
4387         case INTEL_FAM6_HASWELL_L:      /* HSW */
4388         case INTEL_FAM6_BROADWELL:      /* BDW */
4389         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4390         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4391         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4392         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4393         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4394                 return 1;
4395         }
4396         return 0;
4397 }
4398
4399 /*
4400  * SKL adds support for additional MSRS:
4401  *
4402  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4403  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4404  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4405  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4406  */
4407 int has_skl_msrs(unsigned int family, unsigned int model)
4408 {
4409         if (!genuine_intel)
4410                 return 0;
4411
4412         switch (model) {
4413         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4414         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4415                 return 1;
4416         }
4417         return 0;
4418 }
4419
4420 int is_slm(unsigned int family, unsigned int model)
4421 {
4422         if (!genuine_intel)
4423                 return 0;
4424         switch (model) {
4425         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4426         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4427                 return 1;
4428         }
4429         return 0;
4430 }
4431
4432 int is_knl(unsigned int family, unsigned int model)
4433 {
4434         if (!genuine_intel)
4435                 return 0;
4436         switch (model) {
4437         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4438                 return 1;
4439         }
4440         return 0;
4441 }
4442
4443 int is_cnl(unsigned int family, unsigned int model)
4444 {
4445         if (!genuine_intel)
4446                 return 0;
4447
4448         switch (model) {
4449         case INTEL_FAM6_CANNONLAKE_L: /* CNL */
4450                 return 1;
4451         }
4452
4453         return 0;
4454 }
4455
4456 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4457 {
4458         if (is_knl(family, model))
4459                 return 1024;
4460         return 1;
4461 }
4462
4463 #define SLM_BCLK_FREQS 5
4464 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4465
4466 double slm_bclk(void)
4467 {
4468         unsigned long long msr = 3;
4469         unsigned int i;
4470         double freq;
4471
4472         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4473                 fprintf(outf, "SLM BCLK: unknown\n");
4474
4475         i = msr & 0xf;
4476         if (i >= SLM_BCLK_FREQS) {
4477                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4478                 i = 3;
4479         }
4480         freq = slm_freq_table[i];
4481
4482         if (!quiet)
4483                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4484
4485         return freq;
4486 }
4487
4488 double discover_bclk(unsigned int family, unsigned int model)
4489 {
4490         if (has_snb_msrs(family, model) || is_knl(family, model))
4491                 return 100.00;
4492         else if (is_slm(family, model))
4493                 return slm_bclk();
4494         else
4495                 return 133.33;
4496 }
4497
4498 /*
4499  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4500  * the Thermal Control Circuit (TCC) activates.
4501  * This is usually equal to tjMax.
4502  *
4503  * Older processors do not have this MSR, so there we guess,
4504  * but also allow cmdline over-ride with -T.
4505  *
4506  * Several MSR temperature values are in units of degrees-C
4507  * below this value, including the Digital Thermal Sensor (DTS),
4508  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4509  */
4510 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4511 {
4512         unsigned long long msr;
4513         unsigned int target_c_local;
4514         int cpu;
4515
4516         /* tcc_activation_temp is used only for dts or ptm */
4517         if (!(do_dts || do_ptm))
4518                 return 0;
4519
4520         /* this is a per-package concept */
4521         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4522                 return 0;
4523
4524         cpu = t->cpu_id;
4525         if (cpu_migrate(cpu)) {
4526                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4527                 return -1;
4528         }
4529
4530         if (tcc_activation_temp_override != 0) {
4531                 tcc_activation_temp = tcc_activation_temp_override;
4532                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4533                         cpu, tcc_activation_temp);
4534                 return 0;
4535         }
4536
4537         /* Temperature Target MSR is Nehalem and newer only */
4538         if (!do_nhm_platform_info)
4539                 goto guess;
4540
4541         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4542                 goto guess;
4543
4544         target_c_local = (msr >> 16) & 0xFF;
4545
4546         if (!quiet)
4547                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4548                         cpu, msr, target_c_local);
4549
4550         if (!target_c_local)
4551                 goto guess;
4552
4553         tcc_activation_temp = target_c_local;
4554
4555         return 0;
4556
4557 guess:
4558         tcc_activation_temp = TJMAX_DEFAULT;
4559         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4560                 cpu, tcc_activation_temp);
4561
4562         return 0;
4563 }
4564
4565 void decode_feature_control_msr(void)
4566 {
4567         unsigned long long msr;
4568
4569         if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
4570                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4571                         base_cpu, msr,
4572                         msr & FEAT_CTL_LOCKED ? "" : "UN-",
4573                         msr & (1 << 18) ? "SGX" : "");
4574 }
4575
4576 void decode_misc_enable_msr(void)
4577 {
4578         unsigned long long msr;
4579
4580         if (!genuine_intel)
4581                 return;
4582
4583         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4584                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4585                         base_cpu, msr,
4586                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4587                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4588                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4589                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4590                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4591 }
4592
4593 void decode_misc_feature_control(void)
4594 {
4595         unsigned long long msr;
4596
4597         if (!has_misc_feature_control)
4598                 return;
4599
4600         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4601                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4602                         base_cpu, msr,
4603                         msr & (0 << 0) ? "No-" : "",
4604                         msr & (1 << 0) ? "No-" : "",
4605                         msr & (2 << 0) ? "No-" : "",
4606                         msr & (3 << 0) ? "No-" : "");
4607 }
4608 /*
4609  * Decode MSR_MISC_PWR_MGMT
4610  *
4611  * Decode the bits according to the Nehalem documentation
4612  * bit[0] seems to continue to have same meaning going forward
4613  * bit[1] less so...
4614  */
4615 void decode_misc_pwr_mgmt_msr(void)
4616 {
4617         unsigned long long msr;
4618
4619         if (!do_nhm_platform_info)
4620                 return;
4621
4622         if (no_MSR_MISC_PWR_MGMT)
4623                 return;
4624
4625         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4626                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4627                         base_cpu, msr,
4628                         msr & (1 << 0) ? "DIS" : "EN",
4629                         msr & (1 << 1) ? "EN" : "DIS",
4630                         msr & (1 << 8) ? "EN" : "DIS");
4631 }
4632 /*
4633  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4634  *
4635  * This MSRs are present on Silvermont processors,
4636  * Intel Atom processor E3000 series (Baytrail), and friends.
4637  */
4638 void decode_c6_demotion_policy_msr(void)
4639 {
4640         unsigned long long msr;
4641
4642         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4643                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4644                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4645
4646         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4647                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4648                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4649 }
4650
4651 /*
4652  * When models are the same, for the purpose of turbostat, reuse
4653  */
4654 unsigned int intel_model_duplicates(unsigned int model)
4655 {
4656
4657         switch(model) {
4658         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
4659         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
4660         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
4661         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
4662         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
4663                 return INTEL_FAM6_NEHALEM;
4664
4665         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
4666         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
4667                 return INTEL_FAM6_NEHALEM_EX;
4668
4669         case INTEL_FAM6_XEON_PHI_KNM:
4670                 return INTEL_FAM6_XEON_PHI_KNL;
4671
4672         case INTEL_FAM6_BROADWELL_X:
4673         case INTEL_FAM6_BROADWELL_D:    /* BDX-DE */
4674                 return INTEL_FAM6_BROADWELL_X;
4675
4676         case INTEL_FAM6_SKYLAKE_L:
4677         case INTEL_FAM6_SKYLAKE:
4678         case INTEL_FAM6_KABYLAKE_L:
4679         case INTEL_FAM6_KABYLAKE:
4680         case INTEL_FAM6_COMETLAKE_L:
4681         case INTEL_FAM6_COMETLAKE:
4682                 return INTEL_FAM6_SKYLAKE_L;
4683
4684         case INTEL_FAM6_ICELAKE_L:
4685         case INTEL_FAM6_ICELAKE_NNPI:
4686         case INTEL_FAM6_TIGERLAKE_L:
4687         case INTEL_FAM6_TIGERLAKE:
4688                 return INTEL_FAM6_CANNONLAKE_L;
4689
4690         case INTEL_FAM6_ATOM_TREMONT_D:
4691                 return INTEL_FAM6_ATOM_GOLDMONT_D;
4692
4693         case INTEL_FAM6_ATOM_TREMONT_L:
4694                 return INTEL_FAM6_ATOM_TREMONT;
4695
4696         case INTEL_FAM6_ICELAKE_X:
4697                 return INTEL_FAM6_SKYLAKE_X;
4698         }
4699         return model;
4700 }
4701 void process_cpuid()
4702 {
4703         unsigned int eax, ebx, ecx, edx;
4704         unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
4705         unsigned int has_turbo;
4706
4707         eax = ebx = ecx = edx = 0;
4708
4709         __cpuid(0, max_level, ebx, ecx, edx);
4710
4711         if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
4712                 genuine_intel = 1;
4713         else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
4714                 authentic_amd = 1;
4715         else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
4716                 hygon_genuine = 1;
4717
4718         if (!quiet)
4719                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4720                         (char *)&ebx, (char *)&edx, (char *)&ecx);
4721
4722         __cpuid(1, fms, ebx, ecx, edx);
4723         family = (fms >> 8) & 0xf;
4724         model = (fms >> 4) & 0xf;
4725         stepping = fms & 0xf;
4726         if (family == 0xf)
4727                 family += (fms >> 20) & 0xff;
4728         if (family >= 6)
4729                 model += ((fms >> 16) & 0xf) << 4;
4730         ecx_flags = ecx;
4731         edx_flags = edx;
4732
4733         /*
4734          * check max extended function levels of CPUID.
4735          * This is needed to check for invariant TSC.
4736          * This check is valid for both Intel and AMD.
4737          */
4738         ebx = ecx = edx = 0;
4739         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4740
4741         if (!quiet) {
4742                 fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4743                         max_level, max_extended_level, family, model, stepping, family, model, stepping);
4744                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4745                         ecx_flags & (1 << 0) ? "SSE3" : "-",
4746                         ecx_flags & (1 << 3) ? "MONITOR" : "-",
4747                         ecx_flags & (1 << 6) ? "SMX" : "-",
4748                         ecx_flags & (1 << 7) ? "EIST" : "-",
4749                         ecx_flags & (1 << 8) ? "TM2" : "-",
4750                         edx_flags & (1 << 4) ? "TSC" : "-",
4751                         edx_flags & (1 << 5) ? "MSR" : "-",
4752                         edx_flags & (1 << 22) ? "ACPI-TM" : "-",
4753                         edx_flags & (1 << 28) ? "HT" : "-",
4754                         edx_flags & (1 << 29) ? "TM" : "-");
4755         }
4756         if (genuine_intel)
4757                 model = intel_model_duplicates(model);
4758
4759         if (!(edx_flags & (1 << 5)))
4760                 errx(1, "CPUID: no MSR");
4761
4762         if (max_extended_level >= 0x80000007) {
4763
4764                 /*
4765                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4766                  * this check is valid for both Intel and AMD
4767                  */
4768                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4769                 has_invariant_tsc = edx & (1 << 8);
4770         }
4771
4772         /*
4773          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4774          * this check is valid for both Intel and AMD
4775          */
4776
4777         __cpuid(0x6, eax, ebx, ecx, edx);
4778         has_aperf = ecx & (1 << 0);
4779         if (has_aperf) {
4780                 BIC_PRESENT(BIC_Avg_MHz);
4781                 BIC_PRESENT(BIC_Busy);
4782                 BIC_PRESENT(BIC_Bzy_MHz);
4783         }
4784         do_dts = eax & (1 << 0);
4785         if (do_dts)
4786                 BIC_PRESENT(BIC_CoreTmp);
4787         has_turbo = eax & (1 << 1);
4788         do_ptm = eax & (1 << 6);
4789         if (do_ptm)
4790                 BIC_PRESENT(BIC_PkgTmp);
4791         has_hwp = eax & (1 << 7);
4792         has_hwp_notify = eax & (1 << 8);
4793         has_hwp_activity_window = eax & (1 << 9);
4794         has_hwp_epp = eax & (1 << 10);
4795         has_hwp_pkg = eax & (1 << 11);
4796         has_epb = ecx & (1 << 3);
4797
4798         if (!quiet)
4799                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4800                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4801                         has_aperf ? "" : "No-",
4802                         has_turbo ? "" : "No-",
4803                         do_dts ? "" : "No-",
4804                         do_ptm ? "" : "No-",
4805                         has_hwp ? "" : "No-",
4806                         has_hwp_notify ? "" : "No-",
4807                         has_hwp_activity_window ? "" : "No-",
4808                         has_hwp_epp ? "" : "No-",
4809                         has_hwp_pkg ? "" : "No-",
4810                         has_epb ? "" : "No-");
4811
4812         if (!quiet)
4813                 decode_misc_enable_msr();
4814
4815
4816         if (max_level >= 0x7 && !quiet) {
4817                 int has_sgx;
4818
4819                 ecx = 0;
4820
4821                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4822
4823                 has_sgx = ebx & (1 << 2);
4824                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4825
4826                 if (has_sgx)
4827                         decode_feature_control_msr();
4828         }
4829
4830         if (max_level >= 0x15) {
4831                 unsigned int eax_crystal;
4832                 unsigned int ebx_tsc;
4833
4834                 /*
4835                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4836                  */
4837                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4838                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4839
4840                 if (ebx_tsc != 0) {
4841
4842                         if (!quiet && (ebx != 0))
4843                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4844                                         eax_crystal, ebx_tsc, crystal_hz);
4845
4846                         if (crystal_hz == 0)
4847                                 switch(model) {
4848                                 case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4849                                         crystal_hz = 24000000;  /* 24.0 MHz */
4850                                         break;
4851                                 case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4852                                         crystal_hz = 25000000;  /* 25.0 MHz */
4853                                         break;
4854                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4855                                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4856                                         crystal_hz = 19200000;  /* 19.2 MHz */
4857                                         break;
4858                                 default:
4859                                         crystal_hz = 0;
4860                         }
4861
4862                         if (crystal_hz) {
4863                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4864                                 if (!quiet)
4865                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4866                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4867                         }
4868                 }
4869         }
4870         if (max_level >= 0x16) {
4871                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
4872
4873                 /*
4874                  * CPUID 16H Base MHz, Max MHz, Bus MHz
4875                  */
4876                 base_mhz = max_mhz = bus_mhz = edx = 0;
4877
4878                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4879                 if (!quiet)
4880                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4881                                 base_mhz, max_mhz, bus_mhz);
4882         }
4883
4884         if (has_aperf)
4885                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4886
4887         BIC_PRESENT(BIC_IRQ);
4888         BIC_PRESENT(BIC_TSC_MHz);
4889
4890         if (probe_nhm_msrs(family, model)) {
4891                 do_nhm_platform_info = 1;
4892                 BIC_PRESENT(BIC_CPU_c1);
4893                 BIC_PRESENT(BIC_CPU_c3);
4894                 BIC_PRESENT(BIC_CPU_c6);
4895                 BIC_PRESENT(BIC_SMI);
4896         }
4897         do_snb_cstates = has_snb_msrs(family, model);
4898
4899         if (do_snb_cstates)
4900                 BIC_PRESENT(BIC_CPU_c7);
4901
4902         do_irtl_snb = has_snb_msrs(family, model);
4903         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4904                 BIC_PRESENT(BIC_Pkgpc2);
4905         if (pkg_cstate_limit >= PCL__3)
4906                 BIC_PRESENT(BIC_Pkgpc3);
4907         if (pkg_cstate_limit >= PCL__6)
4908                 BIC_PRESENT(BIC_Pkgpc6);
4909         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4910                 BIC_PRESENT(BIC_Pkgpc7);
4911         if (has_slv_msrs(family, model)) {
4912                 BIC_NOT_PRESENT(BIC_Pkgpc2);
4913                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4914                 BIC_PRESENT(BIC_Pkgpc6);
4915                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4916                 BIC_PRESENT(BIC_Mod_c6);
4917                 use_c1_residency_msr = 1;
4918         }
4919         if (is_dnv(family, model)) {
4920                 BIC_PRESENT(BIC_CPU_c1);
4921                 BIC_NOT_PRESENT(BIC_CPU_c3);
4922                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4923                 BIC_NOT_PRESENT(BIC_CPU_c7);
4924                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4925                 use_c1_residency_msr = 1;
4926         }
4927         if (is_skx(family, model)) {
4928                 BIC_NOT_PRESENT(BIC_CPU_c3);
4929                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4930                 BIC_NOT_PRESENT(BIC_CPU_c7);
4931                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4932         }
4933         if (is_bdx(family, model)) {
4934                 BIC_NOT_PRESENT(BIC_CPU_c7);
4935                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4936         }
4937         if (has_c8910_msrs(family, model)) {
4938                 BIC_PRESENT(BIC_Pkgpc8);
4939                 BIC_PRESENT(BIC_Pkgpc9);
4940                 BIC_PRESENT(BIC_Pkgpc10);
4941         }
4942         do_irtl_hsw = has_c8910_msrs(family, model);
4943         if (has_skl_msrs(family, model)) {
4944                 BIC_PRESENT(BIC_Totl_c0);
4945                 BIC_PRESENT(BIC_Any_c0);
4946                 BIC_PRESENT(BIC_GFX_c0);
4947                 BIC_PRESENT(BIC_CPUGFX);
4948         }
4949         do_slm_cstates = is_slm(family, model);
4950         do_knl_cstates  = is_knl(family, model);
4951
4952         if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
4953             is_ehl(family, model))
4954                 BIC_NOT_PRESENT(BIC_CPU_c3);
4955
4956         if (!quiet)
4957                 decode_misc_pwr_mgmt_msr();
4958
4959         if (!quiet && has_slv_msrs(family, model))
4960                 decode_c6_demotion_policy_msr();
4961
4962         rapl_probe(family, model);
4963         perf_limit_reasons_probe(family, model);
4964         automatic_cstate_conversion_probe(family, model);
4965
4966         if (!quiet)
4967                 dump_cstate_pstate_config_info(family, model);
4968
4969         if (!quiet)
4970                 dump_sysfs_cstate_config();
4971         if (!quiet)
4972                 dump_sysfs_pstate_config();
4973
4974         if (has_skl_msrs(family, model))
4975                 calculate_tsc_tweak();
4976
4977         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4978                 BIC_PRESENT(BIC_GFX_rc6);
4979
4980         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4981                 BIC_PRESENT(BIC_GFXMHz);
4982
4983         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4984                 BIC_PRESENT(BIC_CPU_LPI);
4985         else
4986                 BIC_NOT_PRESENT(BIC_CPU_LPI);
4987
4988         if (!access(sys_lpi_file_sysfs, R_OK)) {
4989                 sys_lpi_file = sys_lpi_file_sysfs;
4990                 BIC_PRESENT(BIC_SYS_LPI);
4991         } else if (!access(sys_lpi_file_debugfs, R_OK)) {
4992                 sys_lpi_file = sys_lpi_file_debugfs;
4993                 BIC_PRESENT(BIC_SYS_LPI);
4994         } else {
4995                 sys_lpi_file_sysfs = NULL;
4996                 BIC_NOT_PRESENT(BIC_SYS_LPI);
4997         }
4998
4999         if (!quiet)
5000                 decode_misc_feature_control();
5001
5002         return;
5003 }
5004
5005 /*
5006  * in /dev/cpu/ return success for names that are numbers
5007  * ie. filter out ".", "..", "microcode".
5008  */
5009 int dir_filter(const struct dirent *dirp)
5010 {
5011         if (isdigit(dirp->d_name[0]))
5012                 return 1;
5013         else
5014                 return 0;
5015 }
5016
5017 int open_dev_cpu_msr(int dummy1)
5018 {
5019         return 0;
5020 }
5021
5022 void topology_probe()
5023 {
5024         int i;
5025         int max_core_id = 0;
5026         int max_package_id = 0;
5027         int max_die_id = 0;
5028         int max_siblings = 0;
5029
5030         /* Initialize num_cpus, max_cpu_num */
5031         set_max_cpu_num();
5032         topo.num_cpus = 0;
5033         for_all_proc_cpus(count_cpus);
5034         if (!summary_only && topo.num_cpus > 1)
5035                 BIC_PRESENT(BIC_CPU);
5036
5037         if (debug > 1)
5038                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
5039
5040         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
5041         if (cpus == NULL)
5042                 err(1, "calloc cpus");
5043
5044         /*
5045          * Allocate and initialize cpu_present_set
5046          */
5047         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
5048         if (cpu_present_set == NULL)
5049                 err(3, "CPU_ALLOC");
5050         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5051         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
5052         for_all_proc_cpus(mark_cpu_present);
5053
5054         /*
5055          * Validate that all cpus in cpu_subset are also in cpu_present_set
5056          */
5057         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
5058                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
5059                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
5060                                 err(1, "cpu%d not present", i);
5061         }
5062
5063         /*
5064          * Allocate and initialize cpu_affinity_set
5065          */
5066         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
5067         if (cpu_affinity_set == NULL)
5068                 err(3, "CPU_ALLOC");
5069         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5070         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
5071
5072         for_all_proc_cpus(init_thread_id);
5073
5074         /*
5075          * For online cpus
5076          * find max_core_id, max_package_id
5077          */
5078         for (i = 0; i <= topo.max_cpu_num; ++i) {
5079                 int siblings;
5080
5081                 if (cpu_is_not_present(i)) {
5082                         if (debug > 1)
5083                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
5084                         continue;
5085                 }
5086
5087                 cpus[i].logical_cpu_id = i;
5088
5089                 /* get package information */
5090                 cpus[i].physical_package_id = get_physical_package_id(i);
5091                 if (cpus[i].physical_package_id > max_package_id)
5092                         max_package_id = cpus[i].physical_package_id;
5093
5094                 /* get die information */
5095                 cpus[i].die_id = get_die_id(i);
5096                 if (cpus[i].die_id > max_die_id)
5097                         max_die_id = cpus[i].die_id;
5098
5099                 /* get numa node information */
5100                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
5101                 if (cpus[i].physical_node_id > topo.max_node_num)
5102                         topo.max_node_num = cpus[i].physical_node_id;
5103
5104                 /* get core information */
5105                 cpus[i].physical_core_id = get_core_id(i);
5106                 if (cpus[i].physical_core_id > max_core_id)
5107                         max_core_id = cpus[i].physical_core_id;
5108
5109                 /* get thread information */
5110                 siblings = get_thread_siblings(&cpus[i]);
5111                 if (siblings > max_siblings)
5112                         max_siblings = siblings;
5113                 if (cpus[i].thread_id == 0)
5114                         topo.num_cores++;
5115         }
5116
5117         topo.cores_per_node = max_core_id + 1;
5118         if (debug > 1)
5119                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
5120                         max_core_id, topo.cores_per_node);
5121         if (!summary_only && topo.cores_per_node > 1)
5122                 BIC_PRESENT(BIC_Core);
5123
5124         topo.num_die = max_die_id + 1;
5125         if (debug > 1)
5126                 fprintf(outf, "max_die_id %d, sizing for %d die\n",
5127                                 max_die_id, topo.num_die);
5128         if (!summary_only && topo.num_die > 1)
5129                 BIC_PRESENT(BIC_Die);
5130
5131         topo.num_packages = max_package_id + 1;
5132         if (debug > 1)
5133                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
5134                         max_package_id, topo.num_packages);
5135         if (!summary_only && topo.num_packages > 1)
5136                 BIC_PRESENT(BIC_Package);
5137
5138         set_node_data();
5139         if (debug > 1)
5140                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5141         if (!summary_only && topo.nodes_per_pkg > 1)
5142                 BIC_PRESENT(BIC_Node);
5143
5144         topo.threads_per_core = max_siblings;
5145         if (debug > 1)
5146                 fprintf(outf, "max_siblings %d\n", max_siblings);
5147
5148         if (debug < 1)
5149                 return;
5150
5151         for (i = 0; i <= topo.max_cpu_num; ++i) {
5152                 if (cpu_is_not_present(i))
5153                         continue;
5154                 fprintf(outf,
5155                         "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
5156                         i, cpus[i].physical_package_id, cpus[i].die_id,
5157                         cpus[i].physical_node_id,
5158                         cpus[i].logical_node_id,
5159                         cpus[i].physical_core_id,
5160                         cpus[i].thread_id);
5161         }
5162
5163 }
5164
5165 void
5166 allocate_counters(struct thread_data **t, struct core_data **c,
5167                   struct pkg_data **p)
5168 {
5169         int i;
5170         int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
5171                         topo.num_packages;
5172         int num_threads = topo.threads_per_core * num_cores;
5173
5174         *t = calloc(num_threads, sizeof(struct thread_data));
5175         if (*t == NULL)
5176                 goto error;
5177
5178         for (i = 0; i < num_threads; i++)
5179                 (*t)[i].cpu_id = -1;
5180
5181         *c = calloc(num_cores, sizeof(struct core_data));
5182         if (*c == NULL)
5183                 goto error;
5184
5185         for (i = 0; i < num_cores; i++)
5186                 (*c)[i].core_id = -1;
5187
5188         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
5189         if (*p == NULL)
5190                 goto error;
5191
5192         for (i = 0; i < topo.num_packages; i++)
5193                 (*p)[i].package_id = i;
5194
5195         return;
5196 error:
5197         err(1, "calloc counters");
5198 }
5199 /*
5200  * init_counter()
5201  *
5202  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
5203  */
5204 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
5205         struct pkg_data *pkg_base, int cpu_id)
5206 {
5207         int pkg_id = cpus[cpu_id].physical_package_id;
5208         int node_id = cpus[cpu_id].logical_node_id;
5209         int core_id = cpus[cpu_id].physical_core_id;
5210         int thread_id = cpus[cpu_id].thread_id;
5211         struct thread_data *t;
5212         struct core_data *c;
5213         struct pkg_data *p;
5214
5215
5216         /* Workaround for systems where physical_node_id==-1
5217          * and logical_node_id==(-1 - topo.num_cpus)
5218          */
5219         if (node_id < 0)
5220                 node_id = 0;
5221
5222         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
5223         c = GET_CORE(core_base, core_id, node_id, pkg_id);
5224         p = GET_PKG(pkg_base, pkg_id);
5225
5226         t->cpu_id = cpu_id;
5227         if (thread_id == 0) {
5228                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
5229                 if (cpu_is_first_core_in_package(cpu_id))
5230                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
5231         }
5232
5233         c->core_id = core_id;
5234         p->package_id = pkg_id;
5235 }
5236
5237
5238 int initialize_counters(int cpu_id)
5239 {
5240         init_counter(EVEN_COUNTERS, cpu_id);
5241         init_counter(ODD_COUNTERS, cpu_id);
5242         return 0;
5243 }
5244
5245 void allocate_output_buffer()
5246 {
5247         output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
5248         outp = output_buffer;
5249         if (outp == NULL)
5250                 err(-1, "calloc output buffer");
5251 }
5252 void allocate_fd_percpu(void)
5253 {
5254         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5255         if (fd_percpu == NULL)
5256                 err(-1, "calloc fd_percpu");
5257 }
5258 void allocate_irq_buffers(void)
5259 {
5260         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
5261         if (irq_column_2_cpu == NULL)
5262                 err(-1, "calloc %d", topo.num_cpus);
5263
5264         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5265         if (irqs_per_cpu == NULL)
5266                 err(-1, "calloc %d", topo.max_cpu_num + 1);
5267 }
5268 void setup_all_buffers(void)
5269 {
5270         topology_probe();
5271         allocate_irq_buffers();
5272         allocate_fd_percpu();
5273         allocate_counters(&thread_even, &core_even, &package_even);
5274         allocate_counters(&thread_odd, &core_odd, &package_odd);
5275         allocate_output_buffer();
5276         for_all_proc_cpus(initialize_counters);
5277 }
5278
5279 void set_base_cpu(void)
5280 {
5281         base_cpu = sched_getcpu();
5282         if (base_cpu < 0)
5283                 err(-ENODEV, "No valid cpus found");
5284
5285         if (debug > 1)
5286                 fprintf(outf, "base_cpu = %d\n", base_cpu);
5287 }
5288
5289 void turbostat_init()
5290 {
5291         setup_all_buffers();
5292         set_base_cpu();
5293         check_dev_msr();
5294         check_permissions();
5295         process_cpuid();
5296
5297
5298         if (!quiet)
5299                 for_all_cpus(print_hwp, ODD_COUNTERS);
5300
5301         if (!quiet)
5302                 for_all_cpus(print_epb, ODD_COUNTERS);
5303
5304         if (!quiet)
5305                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
5306
5307         if (!quiet)
5308                 for_all_cpus(print_rapl, ODD_COUNTERS);
5309
5310         for_all_cpus(set_temperature_target, ODD_COUNTERS);
5311
5312         if (!quiet)
5313                 for_all_cpus(print_thermal, ODD_COUNTERS);
5314
5315         if (!quiet && do_irtl_snb)
5316                 print_irtl();
5317 }
5318
5319 int fork_it(char **argv)
5320 {
5321         pid_t child_pid;
5322         int status;
5323
5324         snapshot_proc_sysfs_files();
5325         status = for_all_cpus(get_counters, EVEN_COUNTERS);
5326         first_counter_read = 0;
5327         if (status)
5328                 exit(status);
5329         /* clear affinity side-effect of get_counters() */
5330         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5331         gettimeofday(&tv_even, (struct timezone *)NULL);
5332
5333         child_pid = fork();
5334         if (!child_pid) {
5335                 /* child */
5336                 execvp(argv[0], argv);
5337                 err(errno, "exec %s", argv[0]);
5338         } else {
5339
5340                 /* parent */
5341                 if (child_pid == -1)
5342                         err(1, "fork");
5343
5344                 signal(SIGINT, SIG_IGN);
5345                 signal(SIGQUIT, SIG_IGN);
5346                 if (waitpid(child_pid, &status, 0) == -1)
5347                         err(status, "waitpid");
5348
5349                 if (WIFEXITED(status))
5350                         status = WEXITSTATUS(status);
5351         }
5352         /*
5353          * n.b. fork_it() does not check for errors from for_all_cpus()
5354          * because re-starting is problematic when forking
5355          */
5356         snapshot_proc_sysfs_files();
5357         for_all_cpus(get_counters, ODD_COUNTERS);
5358         gettimeofday(&tv_odd, (struct timezone *)NULL);
5359         timersub(&tv_odd, &tv_even, &tv_delta);
5360         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5361                 fprintf(outf, "%s: Counter reset detected\n", progname);
5362         else {
5363                 compute_average(EVEN_COUNTERS);
5364                 format_all_counters(EVEN_COUNTERS);
5365         }
5366
5367         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5368
5369         flush_output_stderr();
5370
5371         return status;
5372 }
5373
5374 int get_and_dump_counters(void)
5375 {
5376         int status;
5377
5378         snapshot_proc_sysfs_files();
5379         status = for_all_cpus(get_counters, ODD_COUNTERS);
5380         if (status)
5381                 return status;
5382
5383         status = for_all_cpus(dump_counters, ODD_COUNTERS);
5384         if (status)
5385                 return status;
5386
5387         flush_output_stdout();
5388
5389         return status;
5390 }
5391
5392 void print_version() {
5393         fprintf(outf, "turbostat version 20.03.20"
5394                 " - Len Brown <lenb@kernel.org>\n");
5395 }
5396
5397 int add_counter(unsigned int msr_num, char *path, char *name,
5398         unsigned int width, enum counter_scope scope,
5399         enum counter_type type, enum counter_format format, int flags)
5400 {
5401         struct msr_counter *msrp;
5402
5403         msrp = calloc(1, sizeof(struct msr_counter));
5404         if (msrp == NULL) {
5405                 perror("calloc");
5406                 exit(1);
5407         }
5408
5409         msrp->msr_num = msr_num;
5410         strncpy(msrp->name, name, NAME_BYTES - 1);
5411         if (path)
5412                 strncpy(msrp->path, path, PATH_BYTES - 1);
5413         msrp->width = width;
5414         msrp->type = type;
5415         msrp->format = format;
5416         msrp->flags = flags;
5417
5418         switch (scope) {
5419
5420         case SCOPE_CPU:
5421                 msrp->next = sys.tp;
5422                 sys.tp = msrp;
5423                 sys.added_thread_counters++;
5424                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5425                         fprintf(stderr, "exceeded max %d added thread counters\n",
5426                                 MAX_ADDED_COUNTERS);
5427                         exit(-1);
5428                 }
5429                 break;
5430
5431         case SCOPE_CORE:
5432                 msrp->next = sys.cp;
5433                 sys.cp = msrp;
5434                 sys.added_core_counters++;
5435                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5436                         fprintf(stderr, "exceeded max %d added core counters\n",
5437                                 MAX_ADDED_COUNTERS);
5438                         exit(-1);
5439                 }
5440                 break;
5441
5442         case SCOPE_PACKAGE:
5443                 msrp->next = sys.pp;
5444                 sys.pp = msrp;
5445                 sys.added_package_counters++;
5446                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5447                         fprintf(stderr, "exceeded max %d added package counters\n",
5448                                 MAX_ADDED_COUNTERS);
5449                         exit(-1);
5450                 }
5451                 break;
5452         }
5453
5454         return 0;
5455 }
5456
5457 void parse_add_command(char *add_command)
5458 {
5459         int msr_num = 0;
5460         char *path = NULL;
5461         char name_buffer[NAME_BYTES] = "";
5462         int width = 64;
5463         int fail = 0;
5464         enum counter_scope scope = SCOPE_CPU;
5465         enum counter_type type = COUNTER_CYCLES;
5466         enum counter_format format = FORMAT_DELTA;
5467
5468         while (add_command) {
5469
5470                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5471                         goto next;
5472
5473                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
5474                         goto next;
5475
5476                 if (*add_command == '/') {
5477                         path = add_command;
5478                         goto next;
5479                 }
5480
5481                 if (sscanf(add_command, "u%d", &width) == 1) {
5482                         if ((width == 32) || (width == 64))
5483                                 goto next;
5484                         width = 64;
5485                 }
5486                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5487                         scope = SCOPE_CPU;
5488                         goto next;
5489                 }
5490                 if (!strncmp(add_command, "core", strlen("core"))) {
5491                         scope = SCOPE_CORE;
5492                         goto next;
5493                 }
5494                 if (!strncmp(add_command, "package", strlen("package"))) {
5495                         scope = SCOPE_PACKAGE;
5496                         goto next;
5497                 }
5498                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5499                         type = COUNTER_CYCLES;
5500                         goto next;
5501                 }
5502                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5503                         type = COUNTER_SECONDS;
5504                         goto next;
5505                 }
5506                 if (!strncmp(add_command, "usec", strlen("usec"))) {
5507                         type = COUNTER_USEC;
5508                         goto next;
5509                 }
5510                 if (!strncmp(add_command, "raw", strlen("raw"))) {
5511                         format = FORMAT_RAW;
5512                         goto next;
5513                 }
5514                 if (!strncmp(add_command, "delta", strlen("delta"))) {
5515                         format = FORMAT_DELTA;
5516                         goto next;
5517                 }
5518                 if (!strncmp(add_command, "percent", strlen("percent"))) {
5519                         format = FORMAT_PERCENT;
5520                         goto next;
5521                 }
5522
5523                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
5524                         char *eos;
5525
5526                         eos = strchr(name_buffer, ',');
5527                         if (eos)
5528                                 *eos = '\0';
5529                         goto next;
5530                 }
5531
5532 next:
5533                 add_command = strchr(add_command, ',');
5534                 if (add_command) {
5535                         *add_command = '\0';
5536                         add_command++;
5537                 }
5538
5539         }
5540         if ((msr_num == 0) && (path == NULL)) {
5541                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5542                 fail++;
5543         }
5544
5545         /* generate default column header */
5546         if (*name_buffer == '\0') {
5547                 if (width == 32)
5548                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5549                 else
5550                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5551         }
5552
5553         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5554                 fail++;
5555
5556         if (fail) {
5557                 help();
5558                 exit(1);
5559         }
5560 }
5561
5562 int is_deferred_skip(char *name)
5563 {
5564         int i;
5565
5566         for (i = 0; i < deferred_skip_index; ++i)
5567                 if (!strcmp(name, deferred_skip_names[i]))
5568                         return 1;
5569         return 0;
5570 }
5571
5572 void probe_sysfs(void)
5573 {
5574         char path[64];
5575         char name_buf[16];
5576         FILE *input;
5577         int state;
5578         char *sp;
5579
5580         if (!DO_BIC(BIC_sysfs))
5581                 return;
5582
5583         for (state = 10; state >= 0; --state) {
5584
5585                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5586                         base_cpu, state);
5587                 input = fopen(path, "r");
5588                 if (input == NULL)
5589                         continue;
5590                 if (!fgets(name_buf, sizeof(name_buf), input))
5591                         err(1, "%s: failed to read file", path);
5592
5593                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5594                 sp = strchr(name_buf, '-');
5595                 if (!sp)
5596                         sp = strchrnul(name_buf, '\n');
5597                 *sp = '%';
5598                 *(sp + 1) = '\0';
5599
5600                 fclose(input);
5601
5602                 sprintf(path, "cpuidle/state%d/time", state);
5603
5604                 if (is_deferred_skip(name_buf))
5605                         continue;
5606
5607                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5608                                 FORMAT_PERCENT, SYSFS_PERCPU);
5609         }
5610
5611         for (state = 10; state >= 0; --state) {
5612
5613                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5614                         base_cpu, state);
5615                 input = fopen(path, "r");
5616                 if (input == NULL)
5617                         continue;
5618                 if (!fgets(name_buf, sizeof(name_buf), input))
5619                         err(1, "%s: failed to read file", path);
5620                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5621                 sp = strchr(name_buf, '-');
5622                 if (!sp)
5623                         sp = strchrnul(name_buf, '\n');
5624                 *sp = '\0';
5625                 fclose(input);
5626
5627                 sprintf(path, "cpuidle/state%d/usage", state);
5628
5629                 if (is_deferred_skip(name_buf))
5630                         continue;
5631
5632                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5633                                 FORMAT_DELTA, SYSFS_PERCPU);
5634         }
5635
5636 }
5637
5638
5639 /*
5640  * parse cpuset with following syntax
5641  * 1,2,4..6,8-10 and set bits in cpu_subset
5642  */
5643 void parse_cpu_command(char *optarg)
5644 {
5645         unsigned int start, end;
5646         char *next;
5647
5648         if (!strcmp(optarg, "core")) {
5649                 if (cpu_subset)
5650                         goto error;
5651                 show_core_only++;
5652                 return;
5653         }
5654         if (!strcmp(optarg, "package")) {
5655                 if (cpu_subset)
5656                         goto error;
5657                 show_pkg_only++;
5658                 return;
5659         }
5660         if (show_core_only || show_pkg_only)
5661                 goto error;
5662
5663         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5664         if (cpu_subset == NULL)
5665                 err(3, "CPU_ALLOC");
5666         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5667
5668         CPU_ZERO_S(cpu_subset_size, cpu_subset);
5669
5670         next = optarg;
5671
5672         while (next && *next) {
5673
5674                 if (*next == '-')       /* no negative cpu numbers */
5675                         goto error;
5676
5677                 start = strtoul(next, &next, 10);
5678
5679                 if (start >= CPU_SUBSET_MAXCPUS)
5680                         goto error;
5681                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
5682
5683                 if (*next == '\0')
5684                         break;
5685
5686                 if (*next == ',') {
5687                         next += 1;
5688                         continue;
5689                 }
5690
5691                 if (*next == '-') {
5692                         next += 1;      /* start range */
5693                 } else if (*next == '.') {
5694                         next += 1;
5695                         if (*next == '.')
5696                                 next += 1;      /* start range */
5697                         else
5698                                 goto error;
5699                 }
5700
5701                 end = strtoul(next, &next, 10);
5702                 if (end <= start)
5703                         goto error;
5704
5705                 while (++start <= end) {
5706                         if (start >= CPU_SUBSET_MAXCPUS)
5707                                 goto error;
5708                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
5709                 }
5710
5711                 if (*next == ',')
5712                         next += 1;
5713                 else if (*next != '\0')
5714                         goto error;
5715         }
5716
5717         return;
5718
5719 error:
5720         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5721         help();
5722         exit(-1);
5723 }
5724
5725
5726 void cmdline(int argc, char **argv)
5727 {
5728         int opt;
5729         int option_index = 0;
5730         static struct option long_options[] = {
5731                 {"add",         required_argument,      0, 'a'},
5732                 {"cpu",         required_argument,      0, 'c'},
5733                 {"Dump",        no_argument,            0, 'D'},
5734                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5735                 {"enable",      required_argument,      0, 'e'},
5736                 {"interval",    required_argument,      0, 'i'},
5737                 {"num_iterations",      required_argument,      0, 'n'},
5738                 {"help",        no_argument,            0, 'h'},
5739                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5740                 {"Joules",      no_argument,            0, 'J'},
5741                 {"list",        no_argument,            0, 'l'},
5742                 {"out",         required_argument,      0, 'o'},
5743                 {"quiet",       no_argument,            0, 'q'},
5744                 {"show",        required_argument,      0, 's'},
5745                 {"Summary",     no_argument,            0, 'S'},
5746                 {"TCC",         required_argument,      0, 'T'},
5747                 {"version",     no_argument,            0, 'v' },
5748                 {0,             0,                      0,  0 }
5749         };
5750
5751         progname = argv[0];
5752
5753         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5754                                 long_options, &option_index)) != -1) {
5755                 switch (opt) {
5756                 case 'a':
5757                         parse_add_command(optarg);
5758                         break;
5759                 case 'c':
5760                         parse_cpu_command(optarg);
5761                         break;
5762                 case 'D':
5763                         dump_only++;
5764                         break;
5765                 case 'e':
5766                         /* --enable specified counter */
5767                         bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5768                         break;
5769                 case 'd':
5770                         debug++;
5771                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5772                         break;
5773                 case 'H':
5774                         /*
5775                          * --hide: do not show those specified
5776                          *  multiple invocations simply clear more bits in enabled mask
5777                          */
5778                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5779                         break;
5780                 case 'h':
5781                 default:
5782                         help();
5783                         exit(1);
5784                 case 'i':
5785                         {
5786                                 double interval = strtod(optarg, NULL);
5787
5788                                 if (interval < 0.001) {
5789                                         fprintf(outf, "interval %f seconds is too small\n",
5790                                                 interval);
5791                                         exit(2);
5792                                 }
5793
5794                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5795                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5796                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5797                         }
5798                         break;
5799                 case 'J':
5800                         rapl_joules++;
5801                         break;
5802                 case 'l':
5803                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5804                         list_header_only++;
5805                         quiet++;
5806                         break;
5807                 case 'o':
5808                         outf = fopen_or_die(optarg, "w");
5809                         break;
5810                 case 'q':
5811                         quiet = 1;
5812                         break;
5813                 case 'n':
5814                         num_iterations = strtod(optarg, NULL);
5815
5816                         if (num_iterations <= 0) {
5817                                 fprintf(outf, "iterations %d should be positive number\n",
5818                                         num_iterations);
5819                                 exit(2);
5820                         }
5821                         break;
5822                 case 's':
5823                         /*
5824                          * --show: show only those specified
5825                          *  The 1st invocation will clear and replace the enabled mask
5826                          *  subsequent invocations can add to it.
5827                          */
5828                         if (shown == 0)
5829                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5830                         else
5831                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5832                         shown = 1;
5833                         break;
5834                 case 'S':
5835                         summary_only++;
5836                         break;
5837                 case 'T':
5838                         tcc_activation_temp_override = atoi(optarg);
5839                         break;
5840                 case 'v':
5841                         print_version();
5842                         exit(0);
5843                         break;
5844                 }
5845         }
5846 }
5847
5848 int main(int argc, char **argv)
5849 {
5850         outf = stderr;
5851         cmdline(argc, argv);
5852
5853         if (!quiet)
5854                 print_version();
5855
5856         probe_sysfs();
5857
5858         turbostat_init();
5859
5860         /* dump counters and exit */
5861         if (dump_only)
5862                 return get_and_dump_counters();
5863
5864         /* list header and exit */
5865         if (list_header_only) {
5866                 print_header(",");
5867                 flush_output_stdout();
5868                 return 0;
5869         }
5870
5871         /*
5872          * if any params left, it must be a command to fork
5873          */
5874         if (argc - optind)
5875                 return fork_it(argv + optind);
5876         else
5877                 turbostat_loop();
5878
5879         return 0;
5880 }