tools/power turbostat: fix show/hide issues resulting from mis-merge
[linux-2.6-microblaze.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/select.h>
33 #include <sys/resource.h>
34 #include <fcntl.h>
35 #include <signal.h>
36 #include <sys/time.h>
37 #include <stdlib.h>
38 #include <getopt.h>
39 #include <dirent.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <sched.h>
43 #include <time.h>
44 #include <cpuid.h>
45 #include <linux/capability.h>
46 #include <errno.h>
47
48 char *proc_stat = "/proc/stat";
49 FILE *outf;
50 int *fd_percpu;
51 struct timeval interval_tv = {5, 0};
52 struct timespec interval_ts = {5, 0};
53 struct timespec one_msec = {0, 1000000};
54 unsigned int num_iterations;
55 unsigned int debug;
56 unsigned int quiet;
57 unsigned int shown;
58 unsigned int sums_need_wide_columns;
59 unsigned int rapl_joules;
60 unsigned int summary_only;
61 unsigned int list_header_only;
62 unsigned int dump_only;
63 unsigned int do_snb_cstates;
64 unsigned int do_knl_cstates;
65 unsigned int do_slm_cstates;
66 unsigned int do_cnl_cstates;
67 unsigned int use_c1_residency_msr;
68 unsigned int has_aperf;
69 unsigned int has_epb;
70 unsigned int do_irtl_snb;
71 unsigned int do_irtl_hsw;
72 unsigned int units = 1000000;   /* MHz etc */
73 unsigned int genuine_intel;
74 unsigned int has_invariant_tsc;
75 unsigned int do_nhm_platform_info;
76 unsigned int no_MSR_MISC_PWR_MGMT;
77 unsigned int aperf_mperf_multiplier = 1;
78 double bclk;
79 double base_hz;
80 unsigned int has_base_hz;
81 double tsc_tweak = 1.0;
82 unsigned int show_pkg_only;
83 unsigned int show_core_only;
84 char *output_buffer, *outp;
85 unsigned int do_rapl;
86 unsigned int do_dts;
87 unsigned int do_ptm;
88 unsigned long long  gfx_cur_rc6_ms;
89 unsigned long long cpuidle_cur_cpu_lpi_us;
90 unsigned long long cpuidle_cur_sys_lpi_us;
91 unsigned int gfx_cur_mhz;
92 unsigned int tcc_activation_temp;
93 unsigned int tcc_activation_temp_override;
94 double rapl_power_units, rapl_time_units;
95 double rapl_dram_energy_units, rapl_energy_units;
96 double rapl_joule_counter_range;
97 unsigned int do_core_perf_limit_reasons;
98 unsigned int has_automatic_cstate_conversion;
99 unsigned int do_gfx_perf_limit_reasons;
100 unsigned int do_ring_perf_limit_reasons;
101 unsigned int crystal_hz;
102 unsigned long long tsc_hz;
103 int base_cpu;
104 double discover_bclk(unsigned int family, unsigned int model);
105 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
106                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
107 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
108 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
109 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
110 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
111 unsigned int has_misc_feature_control;
112
113 #define RAPL_PKG                (1 << 0)
114                                         /* 0x610 MSR_PKG_POWER_LIMIT */
115                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
116 #define RAPL_PKG_PERF_STATUS    (1 << 1)
117                                         /* 0x613 MSR_PKG_PERF_STATUS */
118 #define RAPL_PKG_POWER_INFO     (1 << 2)
119                                         /* 0x614 MSR_PKG_POWER_INFO */
120
121 #define RAPL_DRAM               (1 << 3)
122                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
123                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
124 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
125                                         /* 0x61b MSR_DRAM_PERF_STATUS */
126 #define RAPL_DRAM_POWER_INFO    (1 << 5)
127                                         /* 0x61c MSR_DRAM_POWER_INFO */
128
129 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
130                                         /* 0x638 MSR_PP0_POWER_LIMIT */
131 #define RAPL_CORE_POLICY        (1 << 7)
132                                         /* 0x63a MSR_PP0_POLICY */
133
134 #define RAPL_GFX                (1 << 8)
135                                         /* 0x640 MSR_PP1_POWER_LIMIT */
136                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
137                                         /* 0x642 MSR_PP1_POLICY */
138
139 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
140                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
141 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
142 #define TJMAX_DEFAULT   100
143
144 #define MAX(a, b) ((a) > (b) ? (a) : (b))
145
146 /*
147  * buffer size used by sscanf() for added column names
148  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
149  */
150 #define NAME_BYTES 20
151 #define PATH_BYTES 128
152
153 int backwards_count;
154 char *progname;
155
156 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
157 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
158 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
159 #define MAX_ADDED_COUNTERS 8
160 #define MAX_ADDED_THREAD_COUNTERS 24
161 #define BITMASK_SIZE 32
162
163 struct thread_data {
164         struct timeval tv_begin;
165         struct timeval tv_end;
166         unsigned long long tsc;
167         unsigned long long aperf;
168         unsigned long long mperf;
169         unsigned long long c1;
170         unsigned long long  irq_count;
171         unsigned int smi_count;
172         unsigned int cpu_id;
173         unsigned int flags;
174 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
175 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
176         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
177 } *thread_even, *thread_odd;
178
179 struct core_data {
180         unsigned long long c3;
181         unsigned long long c6;
182         unsigned long long c7;
183         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
184         unsigned int core_temp_c;
185         unsigned int core_id;
186         unsigned long long counter[MAX_ADDED_COUNTERS];
187 } *core_even, *core_odd;
188
189 struct pkg_data {
190         unsigned long long pc2;
191         unsigned long long pc3;
192         unsigned long long pc6;
193         unsigned long long pc7;
194         unsigned long long pc8;
195         unsigned long long pc9;
196         unsigned long long pc10;
197         unsigned long long cpu_lpi;
198         unsigned long long sys_lpi;
199         unsigned long long pkg_wtd_core_c0;
200         unsigned long long pkg_any_core_c0;
201         unsigned long long pkg_any_gfxe_c0;
202         unsigned long long pkg_both_core_gfxe_c0;
203         long long gfx_rc6_ms;
204         unsigned int gfx_mhz;
205         unsigned int package_id;
206         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
207         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
208         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
209         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
210         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
211         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
212         unsigned int pkg_temp_c;
213         unsigned long long counter[MAX_ADDED_COUNTERS];
214 } *package_even, *package_odd;
215
216 #define ODD_COUNTERS thread_odd, core_odd, package_odd
217 #define EVEN_COUNTERS thread_even, core_even, package_even
218
219 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
220         ((thread_base) +                                                      \
221          ((pkg_no) *                                                          \
222           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
223          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
224          ((core_no) * topo.threads_per_core) +                                \
225          (thread_no))
226
227 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
228         ((core_base) +                                                  \
229          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
230          ((node_no) * topo.cores_per_node) +                            \
231          (core_no))
232
233
234 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
235
236 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
237 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
238 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
239
240 struct msr_counter {
241         unsigned int msr_num;
242         char name[NAME_BYTES];
243         char path[PATH_BYTES];
244         unsigned int width;
245         enum counter_type type;
246         enum counter_format format;
247         struct msr_counter *next;
248         unsigned int flags;
249 #define FLAGS_HIDE      (1 << 0)
250 #define FLAGS_SHOW      (1 << 1)
251 #define SYSFS_PERCPU    (1 << 1)
252 };
253
254 struct sys_counters {
255         unsigned int added_thread_counters;
256         unsigned int added_core_counters;
257         unsigned int added_package_counters;
258         struct msr_counter *tp;
259         struct msr_counter *cp;
260         struct msr_counter *pp;
261 } sys;
262
263 struct system_summary {
264         struct thread_data threads;
265         struct core_data cores;
266         struct pkg_data packages;
267 } average;
268
269 struct cpu_topology {
270         int physical_package_id;
271         int logical_cpu_id;
272         int physical_node_id;
273         int logical_node_id;    /* 0-based count within the package */
274         int physical_core_id;
275         int thread_id;
276         cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
277 } *cpus;
278
279 struct topo_params {
280         int num_packages;
281         int num_cpus;
282         int num_cores;
283         int max_cpu_num;
284         int max_node_num;
285         int nodes_per_pkg;
286         int cores_per_node;
287         int threads_per_core;
288 } topo;
289
290 struct timeval tv_even, tv_odd, tv_delta;
291
292 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
293 int *irqs_per_cpu;              /* indexed by cpu_num */
294
295 void setup_all_buffers(void);
296
297 int cpu_is_not_present(int cpu)
298 {
299         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
300 }
301 /*
302  * run func(thread, core, package) in topology order
303  * skip non-present cpus
304  */
305
306 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
307         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
308 {
309         int retval, pkg_no, core_no, thread_no, node_no;
310
311         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
312                 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
313                         for (node_no = 0; node_no < topo.nodes_per_pkg;
314                              node_no++) {
315                                 for (thread_no = 0; thread_no <
316                                         topo.threads_per_core; ++thread_no) {
317                                         struct thread_data *t;
318                                         struct core_data *c;
319                                         struct pkg_data *p;
320
321                                         t = GET_THREAD(thread_base, thread_no,
322                                                        core_no, node_no,
323                                                        pkg_no);
324
325                                         if (cpu_is_not_present(t->cpu_id))
326                                                 continue;
327
328                                         c = GET_CORE(core_base, core_no,
329                                                      node_no, pkg_no);
330                                         p = GET_PKG(pkg_base, pkg_no);
331
332                                         retval = func(t, c, p);
333                                         if (retval)
334                                                 return retval;
335                                 }
336                         }
337                 }
338         }
339         return 0;
340 }
341
342 int cpu_migrate(int cpu)
343 {
344         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
345         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
346         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
347                 return -1;
348         else
349                 return 0;
350 }
351 int get_msr_fd(int cpu)
352 {
353         char pathname[32];
354         int fd;
355
356         fd = fd_percpu[cpu];
357
358         if (fd)
359                 return fd;
360
361         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
362         fd = open(pathname, O_RDONLY);
363         if (fd < 0)
364                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
365
366         fd_percpu[cpu] = fd;
367
368         return fd;
369 }
370
371 int get_msr(int cpu, off_t offset, unsigned long long *msr)
372 {
373         ssize_t retval;
374
375         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
376
377         if (retval != sizeof *msr)
378                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
379
380         return 0;
381 }
382
383 /*
384  * This list matches the column headers, except
385  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
386  * 2. Core and CPU are moved to the end, we can't have strings that contain them
387  *    matching on them for --show and --hide.
388  */
389 struct msr_counter bic[] = {
390         { 0x0, "usec" },
391         { 0x0, "Time_Of_Day_Seconds" },
392         { 0x0, "Package" },
393         { 0x0, "Node" },
394         { 0x0, "Avg_MHz" },
395         { 0x0, "Busy%" },
396         { 0x0, "Bzy_MHz" },
397         { 0x0, "TSC_MHz" },
398         { 0x0, "IRQ" },
399         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
400         { 0x0, "sysfs" },
401         { 0x0, "CPU%c1" },
402         { 0x0, "CPU%c3" },
403         { 0x0, "CPU%c6" },
404         { 0x0, "CPU%c7" },
405         { 0x0, "ThreadC" },
406         { 0x0, "CoreTmp" },
407         { 0x0, "CoreCnt" },
408         { 0x0, "PkgTmp" },
409         { 0x0, "GFX%rc6" },
410         { 0x0, "GFXMHz" },
411         { 0x0, "Pkg%pc2" },
412         { 0x0, "Pkg%pc3" },
413         { 0x0, "Pkg%pc6" },
414         { 0x0, "Pkg%pc7" },
415         { 0x0, "Pkg%pc8" },
416         { 0x0, "Pkg%pc9" },
417         { 0x0, "Pk%pc10" },
418         { 0x0, "CPU%LPI" },
419         { 0x0, "SYS%LPI" },
420         { 0x0, "PkgWatt" },
421         { 0x0, "CorWatt" },
422         { 0x0, "GFXWatt" },
423         { 0x0, "PkgCnt" },
424         { 0x0, "RAMWatt" },
425         { 0x0, "PKG_%" },
426         { 0x0, "RAM_%" },
427         { 0x0, "Pkg_J" },
428         { 0x0, "Cor_J" },
429         { 0x0, "GFX_J" },
430         { 0x0, "RAM_J" },
431         { 0x0, "Mod%c6" },
432         { 0x0, "Totl%C0" },
433         { 0x0, "Any%C0" },
434         { 0x0, "GFX%C0" },
435         { 0x0, "CPUGFX%" },
436         { 0x0, "Core" },
437         { 0x0, "CPU" },
438 };
439
440
441
442 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
443 #define BIC_USEC        (1ULL << 0)
444 #define BIC_TOD         (1ULL << 1)
445 #define BIC_Package     (1ULL << 2)
446 #define BIC_Node        (1ULL << 3)
447 #define BIC_Avg_MHz     (1ULL << 4)
448 #define BIC_Busy        (1ULL << 5)
449 #define BIC_Bzy_MHz     (1ULL << 6)
450 #define BIC_TSC_MHz     (1ULL << 7)
451 #define BIC_IRQ         (1ULL << 8)
452 #define BIC_SMI         (1ULL << 9)
453 #define BIC_sysfs       (1ULL << 10)
454 #define BIC_CPU_c1      (1ULL << 11)
455 #define BIC_CPU_c3      (1ULL << 12)
456 #define BIC_CPU_c6      (1ULL << 13)
457 #define BIC_CPU_c7      (1ULL << 14)
458 #define BIC_ThreadC     (1ULL << 15)
459 #define BIC_CoreTmp     (1ULL << 16)
460 #define BIC_CoreCnt     (1ULL << 17)
461 #define BIC_PkgTmp      (1ULL << 18)
462 #define BIC_GFX_rc6     (1ULL << 19)
463 #define BIC_GFXMHz      (1ULL << 20)
464 #define BIC_Pkgpc2      (1ULL << 21)
465 #define BIC_Pkgpc3      (1ULL << 22)
466 #define BIC_Pkgpc6      (1ULL << 23)
467 #define BIC_Pkgpc7      (1ULL << 24)
468 #define BIC_Pkgpc8      (1ULL << 25)
469 #define BIC_Pkgpc9      (1ULL << 26)
470 #define BIC_Pkgpc10     (1ULL << 27)
471 #define BIC_CPU_LPI     (1ULL << 28)
472 #define BIC_SYS_LPI     (1ULL << 29)
473 #define BIC_PkgWatt     (1ULL << 30)
474 #define BIC_CorWatt     (1ULL << 31)
475 #define BIC_GFXWatt     (1ULL << 32)
476 #define BIC_PkgCnt      (1ULL << 33)
477 #define BIC_RAMWatt     (1ULL << 34)
478 #define BIC_PKG__       (1ULL << 35)
479 #define BIC_RAM__       (1ULL << 36)
480 #define BIC_Pkg_J       (1ULL << 37)
481 #define BIC_Cor_J       (1ULL << 38)
482 #define BIC_GFX_J       (1ULL << 39)
483 #define BIC_RAM_J       (1ULL << 40)
484 #define BIC_Mod_c6      (1ULL << 41)
485 #define BIC_Totl_c0     (1ULL << 42)
486 #define BIC_Any_c0      (1ULL << 43)
487 #define BIC_GFX_c0      (1ULL << 44)
488 #define BIC_CPUGFX      (1ULL << 45)
489 #define BIC_Core        (1ULL << 46)
490 #define BIC_CPU         (1ULL << 47)
491
492 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD)
493
494 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
495 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
496
497 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
498 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
499 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
500 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
501
502
503 #define MAX_DEFERRED 16
504 char *deferred_skip_names[MAX_DEFERRED];
505 int deferred_skip_index;
506
507 /*
508  * HIDE_LIST - hide this list of counters, show the rest [default]
509  * SHOW_LIST - show this list of counters, hide the rest
510  */
511 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
512
513 void help(void)
514 {
515         fprintf(outf,
516         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
517         "\n"
518         "Turbostat forks the specified COMMAND and prints statistics\n"
519         "when COMMAND completes.\n"
520         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
521         "to print statistics, until interrupted.\n"
522         "--add          add a counter\n"
523         "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
524         "--cpu  cpu-set limit output to summary plus cpu-set:\n"
525         "               {core | package | j,k,l..m,n-p }\n"
526         "--quiet        skip decoding system configuration header\n"
527         "--interval sec.subsec  Override default 5-second measurement interval\n"
528         "--help         print this help message\n"
529         "--list         list column headers only\n"
530         "--num_iterations num   number of the measurement iterations\n"
531         "--out file     create or truncate \"file\" for all output\n"
532         "--version      print version information\n"
533         "\n"
534         "For more help, run \"man turbostat\"\n");
535 }
536
537 /*
538  * bic_lookup
539  * for all the strings in comma separate name_list,
540  * set the approprate bit in return value.
541  */
542 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
543 {
544         int i;
545         unsigned long long retval = 0;
546
547         while (name_list) {
548                 char *comma;
549
550                 comma = strchr(name_list, ',');
551
552                 if (comma)
553                         *comma = '\0';
554
555                 if (!strcmp(name_list, "all"))
556                         return ~0;
557
558                 for (i = 0; i < MAX_BIC; ++i) {
559                         if (!strcmp(name_list, bic[i].name)) {
560                                 retval |= (1ULL << i);
561                                 break;
562                         }
563                 }
564                 if (i == MAX_BIC) {
565                         if (mode == SHOW_LIST) {
566                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
567                                 exit(-1);
568                         }
569                         deferred_skip_names[deferred_skip_index++] = name_list;
570                         if (debug)
571                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
572                         if (deferred_skip_index >= MAX_DEFERRED) {
573                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
574                                         MAX_DEFERRED, name_list);
575                                 help();
576                                 exit(1);
577                         }
578                 }
579
580                 name_list = comma;
581                 if (name_list)
582                         name_list++;
583
584         }
585         return retval;
586 }
587
588
589 void print_header(char *delim)
590 {
591         struct msr_counter *mp;
592         int printed = 0;
593
594         if (DO_BIC(BIC_USEC))
595                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
596         if (DO_BIC(BIC_TOD))
597                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
598         if (DO_BIC(BIC_Package))
599                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
600         if (DO_BIC(BIC_Node))
601                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
602         if (DO_BIC(BIC_Core))
603                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
604         if (DO_BIC(BIC_CPU))
605                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
606         if (DO_BIC(BIC_Avg_MHz))
607                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
608         if (DO_BIC(BIC_Busy))
609                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
610         if (DO_BIC(BIC_Bzy_MHz))
611                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
612         if (DO_BIC(BIC_TSC_MHz))
613                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
614
615         if (DO_BIC(BIC_IRQ)) {
616                 if (sums_need_wide_columns)
617                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
618                 else
619                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
620         }
621
622         if (DO_BIC(BIC_SMI))
623                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
624
625         for (mp = sys.tp; mp; mp = mp->next) {
626
627                 if (mp->format == FORMAT_RAW) {
628                         if (mp->width == 64)
629                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
630                         else
631                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
632                 } else {
633                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
634                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
635                         else
636                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
637                 }
638         }
639
640         if (DO_BIC(BIC_CPU_c1))
641                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
642         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
643                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
644         if (DO_BIC(BIC_CPU_c6))
645                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
646         if (DO_BIC(BIC_CPU_c7))
647                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
648
649         if (DO_BIC(BIC_Mod_c6))
650                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
651
652         if (DO_BIC(BIC_CoreTmp))
653                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
654
655         for (mp = sys.cp; mp; mp = mp->next) {
656                 if (mp->format == FORMAT_RAW) {
657                         if (mp->width == 64)
658                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
659                         else
660                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
661                 } else {
662                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
663                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
664                         else
665                                 outp += sprintf(outp, "%s%s", delim, mp->name);
666                 }
667         }
668
669         if (DO_BIC(BIC_PkgTmp))
670                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
671
672         if (DO_BIC(BIC_GFX_rc6))
673                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
674
675         if (DO_BIC(BIC_GFXMHz))
676                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
677
678         if (DO_BIC(BIC_Totl_c0))
679                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
680         if (DO_BIC(BIC_Any_c0))
681                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
682         if (DO_BIC(BIC_GFX_c0))
683                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
684         if (DO_BIC(BIC_CPUGFX))
685                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
686
687         if (DO_BIC(BIC_Pkgpc2))
688                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
689         if (DO_BIC(BIC_Pkgpc3))
690                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
691         if (DO_BIC(BIC_Pkgpc6))
692                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
693         if (DO_BIC(BIC_Pkgpc7))
694                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
695         if (DO_BIC(BIC_Pkgpc8))
696                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
697         if (DO_BIC(BIC_Pkgpc9))
698                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
699         if (DO_BIC(BIC_Pkgpc10))
700                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
701         if (DO_BIC(BIC_CPU_LPI))
702                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
703         if (DO_BIC(BIC_SYS_LPI))
704                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
705
706         if (do_rapl && !rapl_joules) {
707                 if (DO_BIC(BIC_PkgWatt))
708                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
709                 if (DO_BIC(BIC_CorWatt))
710                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
711                 if (DO_BIC(BIC_GFXWatt))
712                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
713                 if (DO_BIC(BIC_RAMWatt))
714                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
715                 if (DO_BIC(BIC_PKG__))
716                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
717                 if (DO_BIC(BIC_RAM__))
718                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
719         } else if (do_rapl && rapl_joules) {
720                 if (DO_BIC(BIC_Pkg_J))
721                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
722                 if (DO_BIC(BIC_Cor_J))
723                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
724                 if (DO_BIC(BIC_GFX_J))
725                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
726                 if (DO_BIC(BIC_RAM_J))
727                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
728                 if (DO_BIC(BIC_PKG__))
729                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
730                 if (DO_BIC(BIC_RAM__))
731                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
732         }
733         for (mp = sys.pp; mp; mp = mp->next) {
734                 if (mp->format == FORMAT_RAW) {
735                         if (mp->width == 64)
736                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
737                         else
738                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
739                 } else {
740                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
741                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
742                         else
743                                 outp += sprintf(outp, "%s%s", delim, mp->name);
744                 }
745         }
746
747         outp += sprintf(outp, "\n");
748 }
749
750 int dump_counters(struct thread_data *t, struct core_data *c,
751         struct pkg_data *p)
752 {
753         int i;
754         struct msr_counter *mp;
755
756         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
757
758         if (t) {
759                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
760                         t->cpu_id, t->flags);
761                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
762                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
763                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
764                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
765
766                 if (DO_BIC(BIC_IRQ))
767                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
768                 if (DO_BIC(BIC_SMI))
769                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
770
771                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
772                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
773                                 i, mp->msr_num, t->counter[i]);
774                 }
775         }
776
777         if (c) {
778                 outp += sprintf(outp, "core: %d\n", c->core_id);
779                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
780                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
781                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
782                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
783
784                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
785                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
786                                 i, mp->msr_num, c->counter[i]);
787                 }
788                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
789         }
790
791         if (p) {
792                 outp += sprintf(outp, "package: %d\n", p->package_id);
793
794                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
795                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
796                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
797                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
798
799                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
800                 if (DO_BIC(BIC_Pkgpc3))
801                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
802                 if (DO_BIC(BIC_Pkgpc6))
803                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
804                 if (DO_BIC(BIC_Pkgpc7))
805                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
806                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
807                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
808                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
809                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
810                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
811                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
812                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
813                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
814                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
815                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
816                 outp += sprintf(outp, "Throttle PKG: %0X\n",
817                         p->rapl_pkg_perf_status);
818                 outp += sprintf(outp, "Throttle RAM: %0X\n",
819                         p->rapl_dram_perf_status);
820                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
821
822                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
823                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
824                                 i, mp->msr_num, p->counter[i]);
825                 }
826         }
827
828         outp += sprintf(outp, "\n");
829
830         return 0;
831 }
832
833 /*
834  * column formatting convention & formats
835  */
836 int format_counters(struct thread_data *t, struct core_data *c,
837         struct pkg_data *p)
838 {
839         double interval_float, tsc;
840         char *fmt8;
841         int i;
842         struct msr_counter *mp;
843         char *delim = "\t";
844         int printed = 0;
845
846          /* if showing only 1st thread in core and this isn't one, bail out */
847         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
848                 return 0;
849
850          /* if showing only 1st thread in pkg and this isn't one, bail out */
851         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
852                 return 0;
853
854         /*if not summary line and --cpu is used */
855         if ((t != &average.threads) &&
856                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
857                 return 0;
858
859         if (DO_BIC(BIC_USEC)) {
860                 /* on each row, print how many usec each timestamp took to gather */
861                 struct timeval tv;
862
863                 timersub(&t->tv_end, &t->tv_begin, &tv);
864                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
865         }
866
867         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
868         if (DO_BIC(BIC_TOD))
869                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
870
871         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
872
873         tsc = t->tsc * tsc_tweak;
874
875         /* topo columns, print blanks on 1st (average) line */
876         if (t == &average.threads) {
877                 if (DO_BIC(BIC_Package))
878                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
879                 if (DO_BIC(BIC_Node))
880                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
881                 if (DO_BIC(BIC_Core))
882                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
883                 if (DO_BIC(BIC_CPU))
884                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
885         } else {
886                 if (DO_BIC(BIC_Package)) {
887                         if (p)
888                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
889                         else
890                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
891                 }
892                 if (DO_BIC(BIC_Node)) {
893                         if (t)
894                                 outp += sprintf(outp, "%s%d",
895                                                 (printed++ ? delim : ""),
896                                               cpus[t->cpu_id].physical_node_id);
897                         else
898                                 outp += sprintf(outp, "%s-",
899                                                 (printed++ ? delim : ""));
900                 }
901                 if (DO_BIC(BIC_Core)) {
902                         if (c)
903                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
904                         else
905                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
906                 }
907                 if (DO_BIC(BIC_CPU))
908                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
909         }
910
911         if (DO_BIC(BIC_Avg_MHz))
912                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
913                         1.0 / units * t->aperf / interval_float);
914
915         if (DO_BIC(BIC_Busy))
916                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
917
918         if (DO_BIC(BIC_Bzy_MHz)) {
919                 if (has_base_hz)
920                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
921                 else
922                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
923                                 tsc / units * t->aperf / t->mperf / interval_float);
924         }
925
926         if (DO_BIC(BIC_TSC_MHz))
927                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
928
929         /* IRQ */
930         if (DO_BIC(BIC_IRQ)) {
931                 if (sums_need_wide_columns)
932                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
933                 else
934                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
935         }
936
937         /* SMI */
938         if (DO_BIC(BIC_SMI))
939                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
940
941         /* Added counters */
942         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
943                 if (mp->format == FORMAT_RAW) {
944                         if (mp->width == 32)
945                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
946                         else
947                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
948                 } else if (mp->format == FORMAT_DELTA) {
949                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
950                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
951                         else
952                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
953                 } else if (mp->format == FORMAT_PERCENT) {
954                         if (mp->type == COUNTER_USEC)
955                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
956                         else
957                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
958                 }
959         }
960
961         /* C1 */
962         if (DO_BIC(BIC_CPU_c1))
963                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
964
965
966         /* print per-core data only for 1st thread in core */
967         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
968                 goto done;
969
970         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
971                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
972         if (DO_BIC(BIC_CPU_c6))
973                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
974         if (DO_BIC(BIC_CPU_c7))
975                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
976
977         /* Mod%c6 */
978         if (DO_BIC(BIC_Mod_c6))
979                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
980
981         if (DO_BIC(BIC_CoreTmp))
982                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
983
984         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
985                 if (mp->format == FORMAT_RAW) {
986                         if (mp->width == 32)
987                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
988                         else
989                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
990                 } else if (mp->format == FORMAT_DELTA) {
991                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
992                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
993                         else
994                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
995                 } else if (mp->format == FORMAT_PERCENT) {
996                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
997                 }
998         }
999
1000         /* print per-package data only for 1st core in package */
1001         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1002                 goto done;
1003
1004         /* PkgTmp */
1005         if (DO_BIC(BIC_PkgTmp))
1006                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1007
1008         /* GFXrc6 */
1009         if (DO_BIC(BIC_GFX_rc6)) {
1010                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1011                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1012                 } else {
1013                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1014                                 p->gfx_rc6_ms / 10.0 / interval_float);
1015                 }
1016         }
1017
1018         /* GFXMHz */
1019         if (DO_BIC(BIC_GFXMHz))
1020                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1021
1022         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1023         if (DO_BIC(BIC_Totl_c0))
1024                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1025         if (DO_BIC(BIC_Any_c0))
1026                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1027         if (DO_BIC(BIC_GFX_c0))
1028                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1029         if (DO_BIC(BIC_CPUGFX))
1030                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1031
1032         if (DO_BIC(BIC_Pkgpc2))
1033                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1034         if (DO_BIC(BIC_Pkgpc3))
1035                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1036         if (DO_BIC(BIC_Pkgpc6))
1037                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1038         if (DO_BIC(BIC_Pkgpc7))
1039                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1040         if (DO_BIC(BIC_Pkgpc8))
1041                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1042         if (DO_BIC(BIC_Pkgpc9))
1043                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1044         if (DO_BIC(BIC_Pkgpc10))
1045                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1046
1047         if (DO_BIC(BIC_CPU_LPI))
1048                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1049         if (DO_BIC(BIC_SYS_LPI))
1050                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1051
1052         /*
1053          * If measurement interval exceeds minimum RAPL Joule Counter range,
1054          * indicate that results are suspect by printing "**" in fraction place.
1055          */
1056         if (interval_float < rapl_joule_counter_range)
1057                 fmt8 = "%s%.2f";
1058         else
1059                 fmt8 = "%6.0f**";
1060
1061         if (DO_BIC(BIC_PkgWatt))
1062                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1063         if (DO_BIC(BIC_CorWatt))
1064                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1065         if (DO_BIC(BIC_GFXWatt))
1066                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1067         if (DO_BIC(BIC_RAMWatt))
1068                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1069         if (DO_BIC(BIC_Pkg_J))
1070                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1071         if (DO_BIC(BIC_Cor_J))
1072                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1073         if (DO_BIC(BIC_GFX_J))
1074                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1075         if (DO_BIC(BIC_RAM_J))
1076                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1077         if (DO_BIC(BIC_PKG__))
1078                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1079         if (DO_BIC(BIC_RAM__))
1080                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1081
1082         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1083                 if (mp->format == FORMAT_RAW) {
1084                         if (mp->width == 32)
1085                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1086                         else
1087                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1088                 } else if (mp->format == FORMAT_DELTA) {
1089                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1090                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1091                         else
1092                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1093                 } else if (mp->format == FORMAT_PERCENT) {
1094                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1095                 }
1096         }
1097
1098 done:
1099         if (*(outp - 1) != '\n')
1100                 outp += sprintf(outp, "\n");
1101
1102         return 0;
1103 }
1104
1105 void flush_output_stdout(void)
1106 {
1107         FILE *filep;
1108
1109         if (outf == stderr)
1110                 filep = stdout;
1111         else
1112                 filep = outf;
1113
1114         fputs(output_buffer, filep);
1115         fflush(filep);
1116
1117         outp = output_buffer;
1118 }
1119 void flush_output_stderr(void)
1120 {
1121         fputs(output_buffer, outf);
1122         fflush(outf);
1123         outp = output_buffer;
1124 }
1125 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1126 {
1127         static int printed;
1128
1129         if (!printed || !summary_only)
1130                 print_header("\t");
1131
1132         if (topo.num_cpus > 1)
1133                 format_counters(&average.threads, &average.cores,
1134                         &average.packages);
1135
1136         printed = 1;
1137
1138         if (summary_only)
1139                 return;
1140
1141         for_all_cpus(format_counters, t, c, p);
1142 }
1143
1144 #define DELTA_WRAP32(new, old)                  \
1145         if (new > old) {                        \
1146                 old = new - old;                \
1147         } else {                                \
1148                 old = 0x100000000 + new - old;  \
1149         }
1150
1151 int
1152 delta_package(struct pkg_data *new, struct pkg_data *old)
1153 {
1154         int i;
1155         struct msr_counter *mp;
1156
1157
1158         if (DO_BIC(BIC_Totl_c0))
1159                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1160         if (DO_BIC(BIC_Any_c0))
1161                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1162         if (DO_BIC(BIC_GFX_c0))
1163                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1164         if (DO_BIC(BIC_CPUGFX))
1165                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1166
1167         old->pc2 = new->pc2 - old->pc2;
1168         if (DO_BIC(BIC_Pkgpc3))
1169                 old->pc3 = new->pc3 - old->pc3;
1170         if (DO_BIC(BIC_Pkgpc6))
1171                 old->pc6 = new->pc6 - old->pc6;
1172         if (DO_BIC(BIC_Pkgpc7))
1173                 old->pc7 = new->pc7 - old->pc7;
1174         old->pc8 = new->pc8 - old->pc8;
1175         old->pc9 = new->pc9 - old->pc9;
1176         old->pc10 = new->pc10 - old->pc10;
1177         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1178         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1179         old->pkg_temp_c = new->pkg_temp_c;
1180
1181         /* flag an error when rc6 counter resets/wraps */
1182         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1183                 old->gfx_rc6_ms = -1;
1184         else
1185                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1186
1187         old->gfx_mhz = new->gfx_mhz;
1188
1189         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1190         DELTA_WRAP32(new->energy_cores, old->energy_cores);
1191         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1192         DELTA_WRAP32(new->energy_dram, old->energy_dram);
1193         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1194         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1195
1196         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1197                 if (mp->format == FORMAT_RAW)
1198                         old->counter[i] = new->counter[i];
1199                 else
1200                         old->counter[i] = new->counter[i] - old->counter[i];
1201         }
1202
1203         return 0;
1204 }
1205
1206 void
1207 delta_core(struct core_data *new, struct core_data *old)
1208 {
1209         int i;
1210         struct msr_counter *mp;
1211
1212         old->c3 = new->c3 - old->c3;
1213         old->c6 = new->c6 - old->c6;
1214         old->c7 = new->c7 - old->c7;
1215         old->core_temp_c = new->core_temp_c;
1216         old->mc6_us = new->mc6_us - old->mc6_us;
1217
1218         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1219                 if (mp->format == FORMAT_RAW)
1220                         old->counter[i] = new->counter[i];
1221                 else
1222                         old->counter[i] = new->counter[i] - old->counter[i];
1223         }
1224 }
1225
1226 /*
1227  * old = new - old
1228  */
1229 int
1230 delta_thread(struct thread_data *new, struct thread_data *old,
1231         struct core_data *core_delta)
1232 {
1233         int i;
1234         struct msr_counter *mp;
1235
1236         /*
1237          * the timestamps from start of measurement interval are in "old"
1238          * the timestamp from end of measurement interval are in "new"
1239          * over-write old w/ new so we can print end of interval values
1240          */
1241
1242         old->tv_begin = new->tv_begin;
1243         old->tv_end = new->tv_end;
1244
1245         old->tsc = new->tsc - old->tsc;
1246
1247         /* check for TSC < 1 Mcycles over interval */
1248         if (old->tsc < (1000 * 1000))
1249                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1250                      "You can disable all c-states by booting with \"idle=poll\"\n"
1251                      "or just the deep ones with \"processor.max_cstate=1\"");
1252
1253         old->c1 = new->c1 - old->c1;
1254
1255         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1256                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1257                         old->aperf = new->aperf - old->aperf;
1258                         old->mperf = new->mperf - old->mperf;
1259                 } else {
1260                         return -1;
1261                 }
1262         }
1263
1264
1265         if (use_c1_residency_msr) {
1266                 /*
1267                  * Some models have a dedicated C1 residency MSR,
1268                  * which should be more accurate than the derivation below.
1269                  */
1270         } else {
1271                 /*
1272                  * As counter collection is not atomic,
1273                  * it is possible for mperf's non-halted cycles + idle states
1274                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1275                  */
1276                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1277                         old->c1 = 0;
1278                 else {
1279                         /* normal case, derive c1 */
1280                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1281                                 - core_delta->c6 - core_delta->c7;
1282                 }
1283         }
1284
1285         if (old->mperf == 0) {
1286                 if (debug > 1)
1287                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1288                 old->mperf = 1; /* divide by 0 protection */
1289         }
1290
1291         if (DO_BIC(BIC_IRQ))
1292                 old->irq_count = new->irq_count - old->irq_count;
1293
1294         if (DO_BIC(BIC_SMI))
1295                 old->smi_count = new->smi_count - old->smi_count;
1296
1297         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1298                 if (mp->format == FORMAT_RAW)
1299                         old->counter[i] = new->counter[i];
1300                 else
1301                         old->counter[i] = new->counter[i] - old->counter[i];
1302         }
1303         return 0;
1304 }
1305
1306 int delta_cpu(struct thread_data *t, struct core_data *c,
1307         struct pkg_data *p, struct thread_data *t2,
1308         struct core_data *c2, struct pkg_data *p2)
1309 {
1310         int retval = 0;
1311
1312         /* calculate core delta only for 1st thread in core */
1313         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1314                 delta_core(c, c2);
1315
1316         /* always calculate thread delta */
1317         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1318         if (retval)
1319                 return retval;
1320
1321         /* calculate package delta only for 1st core in package */
1322         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1323                 retval = delta_package(p, p2);
1324
1325         return retval;
1326 }
1327
1328 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1329 {
1330         int i;
1331         struct msr_counter  *mp;
1332
1333         t->tv_begin.tv_sec = 0;
1334         t->tv_begin.tv_usec = 0;
1335         t->tv_end.tv_sec = 0;
1336         t->tv_end.tv_usec = 0;
1337
1338         t->tsc = 0;
1339         t->aperf = 0;
1340         t->mperf = 0;
1341         t->c1 = 0;
1342
1343         t->irq_count = 0;
1344         t->smi_count = 0;
1345
1346         /* tells format_counters to dump all fields from this set */
1347         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1348
1349         c->c3 = 0;
1350         c->c6 = 0;
1351         c->c7 = 0;
1352         c->mc6_us = 0;
1353         c->core_temp_c = 0;
1354
1355         p->pkg_wtd_core_c0 = 0;
1356         p->pkg_any_core_c0 = 0;
1357         p->pkg_any_gfxe_c0 = 0;
1358         p->pkg_both_core_gfxe_c0 = 0;
1359
1360         p->pc2 = 0;
1361         if (DO_BIC(BIC_Pkgpc3))
1362                 p->pc3 = 0;
1363         if (DO_BIC(BIC_Pkgpc6))
1364                 p->pc6 = 0;
1365         if (DO_BIC(BIC_Pkgpc7))
1366                 p->pc7 = 0;
1367         p->pc8 = 0;
1368         p->pc9 = 0;
1369         p->pc10 = 0;
1370         p->cpu_lpi = 0;
1371         p->sys_lpi = 0;
1372
1373         p->energy_pkg = 0;
1374         p->energy_dram = 0;
1375         p->energy_cores = 0;
1376         p->energy_gfx = 0;
1377         p->rapl_pkg_perf_status = 0;
1378         p->rapl_dram_perf_status = 0;
1379         p->pkg_temp_c = 0;
1380
1381         p->gfx_rc6_ms = 0;
1382         p->gfx_mhz = 0;
1383         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1384                 t->counter[i] = 0;
1385
1386         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1387                 c->counter[i] = 0;
1388
1389         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1390                 p->counter[i] = 0;
1391 }
1392 int sum_counters(struct thread_data *t, struct core_data *c,
1393         struct pkg_data *p)
1394 {
1395         int i;
1396         struct msr_counter *mp;
1397
1398         /* remember first tv_begin */
1399         if (average.threads.tv_begin.tv_sec == 0)
1400                 average.threads.tv_begin = t->tv_begin;
1401
1402         /* remember last tv_end */
1403         average.threads.tv_end = t->tv_end;
1404
1405         average.threads.tsc += t->tsc;
1406         average.threads.aperf += t->aperf;
1407         average.threads.mperf += t->mperf;
1408         average.threads.c1 += t->c1;
1409
1410         average.threads.irq_count += t->irq_count;
1411         average.threads.smi_count += t->smi_count;
1412
1413         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1414                 if (mp->format == FORMAT_RAW)
1415                         continue;
1416                 average.threads.counter[i] += t->counter[i];
1417         }
1418
1419         /* sum per-core values only for 1st thread in core */
1420         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1421                 return 0;
1422
1423         average.cores.c3 += c->c3;
1424         average.cores.c6 += c->c6;
1425         average.cores.c7 += c->c7;
1426         average.cores.mc6_us += c->mc6_us;
1427
1428         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1429
1430         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1431                 if (mp->format == FORMAT_RAW)
1432                         continue;
1433                 average.cores.counter[i] += c->counter[i];
1434         }
1435
1436         /* sum per-pkg values only for 1st core in pkg */
1437         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1438                 return 0;
1439
1440         if (DO_BIC(BIC_Totl_c0))
1441                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1442         if (DO_BIC(BIC_Any_c0))
1443                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1444         if (DO_BIC(BIC_GFX_c0))
1445                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1446         if (DO_BIC(BIC_CPUGFX))
1447                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1448
1449         average.packages.pc2 += p->pc2;
1450         if (DO_BIC(BIC_Pkgpc3))
1451                 average.packages.pc3 += p->pc3;
1452         if (DO_BIC(BIC_Pkgpc6))
1453                 average.packages.pc6 += p->pc6;
1454         if (DO_BIC(BIC_Pkgpc7))
1455                 average.packages.pc7 += p->pc7;
1456         average.packages.pc8 += p->pc8;
1457         average.packages.pc9 += p->pc9;
1458         average.packages.pc10 += p->pc10;
1459
1460         average.packages.cpu_lpi = p->cpu_lpi;
1461         average.packages.sys_lpi = p->sys_lpi;
1462
1463         average.packages.energy_pkg += p->energy_pkg;
1464         average.packages.energy_dram += p->energy_dram;
1465         average.packages.energy_cores += p->energy_cores;
1466         average.packages.energy_gfx += p->energy_gfx;
1467
1468         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1469         average.packages.gfx_mhz = p->gfx_mhz;
1470
1471         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1472
1473         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1474         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1475
1476         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1477                 if (mp->format == FORMAT_RAW)
1478                         continue;
1479                 average.packages.counter[i] += p->counter[i];
1480         }
1481         return 0;
1482 }
1483 /*
1484  * sum the counters for all cpus in the system
1485  * compute the weighted average
1486  */
1487 void compute_average(struct thread_data *t, struct core_data *c,
1488         struct pkg_data *p)
1489 {
1490         int i;
1491         struct msr_counter *mp;
1492
1493         clear_counters(&average.threads, &average.cores, &average.packages);
1494
1495         for_all_cpus(sum_counters, t, c, p);
1496
1497         average.threads.tsc /= topo.num_cpus;
1498         average.threads.aperf /= topo.num_cpus;
1499         average.threads.mperf /= topo.num_cpus;
1500         average.threads.c1 /= topo.num_cpus;
1501
1502         if (average.threads.irq_count > 9999999)
1503                 sums_need_wide_columns = 1;
1504
1505         average.cores.c3 /= topo.num_cores;
1506         average.cores.c6 /= topo.num_cores;
1507         average.cores.c7 /= topo.num_cores;
1508         average.cores.mc6_us /= topo.num_cores;
1509
1510         if (DO_BIC(BIC_Totl_c0))
1511                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1512         if (DO_BIC(BIC_Any_c0))
1513                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1514         if (DO_BIC(BIC_GFX_c0))
1515                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1516         if (DO_BIC(BIC_CPUGFX))
1517                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1518
1519         average.packages.pc2 /= topo.num_packages;
1520         if (DO_BIC(BIC_Pkgpc3))
1521                 average.packages.pc3 /= topo.num_packages;
1522         if (DO_BIC(BIC_Pkgpc6))
1523                 average.packages.pc6 /= topo.num_packages;
1524         if (DO_BIC(BIC_Pkgpc7))
1525                 average.packages.pc7 /= topo.num_packages;
1526
1527         average.packages.pc8 /= topo.num_packages;
1528         average.packages.pc9 /= topo.num_packages;
1529         average.packages.pc10 /= topo.num_packages;
1530
1531         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1532                 if (mp->format == FORMAT_RAW)
1533                         continue;
1534                 if (mp->type == COUNTER_ITEMS) {
1535                         if (average.threads.counter[i] > 9999999)
1536                                 sums_need_wide_columns = 1;
1537                         continue;
1538                 }
1539                 average.threads.counter[i] /= topo.num_cpus;
1540         }
1541         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1542                 if (mp->format == FORMAT_RAW)
1543                         continue;
1544                 if (mp->type == COUNTER_ITEMS) {
1545                         if (average.cores.counter[i] > 9999999)
1546                                 sums_need_wide_columns = 1;
1547                 }
1548                 average.cores.counter[i] /= topo.num_cores;
1549         }
1550         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1551                 if (mp->format == FORMAT_RAW)
1552                         continue;
1553                 if (mp->type == COUNTER_ITEMS) {
1554                         if (average.packages.counter[i] > 9999999)
1555                                 sums_need_wide_columns = 1;
1556                 }
1557                 average.packages.counter[i] /= topo.num_packages;
1558         }
1559 }
1560
1561 static unsigned long long rdtsc(void)
1562 {
1563         unsigned int low, high;
1564
1565         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1566
1567         return low | ((unsigned long long)high) << 32;
1568 }
1569
1570 /*
1571  * Open a file, and exit on failure
1572  */
1573 FILE *fopen_or_die(const char *path, const char *mode)
1574 {
1575         FILE *filep = fopen(path, mode);
1576
1577         if (!filep)
1578                 err(1, "%s: open failed", path);
1579         return filep;
1580 }
1581 /*
1582  * snapshot_sysfs_counter()
1583  *
1584  * return snapshot of given counter
1585  */
1586 unsigned long long snapshot_sysfs_counter(char *path)
1587 {
1588         FILE *fp;
1589         int retval;
1590         unsigned long long counter;
1591
1592         fp = fopen_or_die(path, "r");
1593
1594         retval = fscanf(fp, "%lld", &counter);
1595         if (retval != 1)
1596                 err(1, "snapshot_sysfs_counter(%s)", path);
1597
1598         fclose(fp);
1599
1600         return counter;
1601 }
1602
1603 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1604 {
1605         if (mp->msr_num != 0) {
1606                 if (get_msr(cpu, mp->msr_num, counterp))
1607                         return -1;
1608         } else {
1609                 char path[128 + PATH_BYTES];
1610
1611                 if (mp->flags & SYSFS_PERCPU) {
1612                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1613                                  cpu, mp->path);
1614
1615                         *counterp = snapshot_sysfs_counter(path);
1616                 } else {
1617                         *counterp = snapshot_sysfs_counter(mp->path);
1618                 }
1619         }
1620
1621         return 0;
1622 }
1623
1624 /*
1625  * get_counters(...)
1626  * migrate to cpu
1627  * acquire and record local counters for that cpu
1628  */
1629 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1630 {
1631         int cpu = t->cpu_id;
1632         unsigned long long msr;
1633         int aperf_mperf_retry_count = 0;
1634         struct msr_counter *mp;
1635         int i;
1636
1637
1638         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1639
1640         if (cpu_migrate(cpu)) {
1641                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1642                 return -1;
1643         }
1644
1645 retry:
1646         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1647
1648         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1649                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1650
1651                 /*
1652                  * The TSC, APERF and MPERF must be read together for
1653                  * APERF/MPERF and MPERF/TSC to give accurate results.
1654                  *
1655                  * Unfortunately, APERF and MPERF are read by
1656                  * individual system call, so delays may occur
1657                  * between them.  If the time to read them
1658                  * varies by a large amount, we re-read them.
1659                  */
1660
1661                 /*
1662                  * This initial dummy APERF read has been seen to
1663                  * reduce jitter in the subsequent reads.
1664                  */
1665
1666                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1667                         return -3;
1668
1669                 t->tsc = rdtsc();       /* re-read close to APERF */
1670
1671                 tsc_before = t->tsc;
1672
1673                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1674                         return -3;
1675
1676                 tsc_between = rdtsc();
1677
1678                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1679                         return -4;
1680
1681                 tsc_after = rdtsc();
1682
1683                 aperf_time = tsc_between - tsc_before;
1684                 mperf_time = tsc_after - tsc_between;
1685
1686                 /*
1687                  * If the system call latency to read APERF and MPERF
1688                  * differ by more than 2x, then try again.
1689                  */
1690                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1691                         aperf_mperf_retry_count++;
1692                         if (aperf_mperf_retry_count < 5)
1693                                 goto retry;
1694                         else
1695                                 warnx("cpu%d jitter %lld %lld",
1696                                         cpu, aperf_time, mperf_time);
1697                 }
1698                 aperf_mperf_retry_count = 0;
1699
1700                 t->aperf = t->aperf * aperf_mperf_multiplier;
1701                 t->mperf = t->mperf * aperf_mperf_multiplier;
1702         }
1703
1704         if (DO_BIC(BIC_IRQ))
1705                 t->irq_count = irqs_per_cpu[cpu];
1706         if (DO_BIC(BIC_SMI)) {
1707                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1708                         return -5;
1709                 t->smi_count = msr & 0xFFFFFFFF;
1710         }
1711         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1712                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1713                         return -6;
1714         }
1715
1716         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1717                 if (get_mp(cpu, mp, &t->counter[i]))
1718                         return -10;
1719         }
1720
1721         /* collect core counters only for 1st thread in core */
1722         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1723                 goto done;
1724
1725         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1726                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1727                         return -6;
1728         }
1729
1730         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1731                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1732                         return -7;
1733         } else if (do_knl_cstates) {
1734                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1735                         return -7;
1736         }
1737
1738         if (DO_BIC(BIC_CPU_c7))
1739                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1740                         return -8;
1741
1742         if (DO_BIC(BIC_Mod_c6))
1743                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1744                         return -8;
1745
1746         if (DO_BIC(BIC_CoreTmp)) {
1747                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1748                         return -9;
1749                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1750         }
1751
1752         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1753                 if (get_mp(cpu, mp, &c->counter[i]))
1754                         return -10;
1755         }
1756
1757         /* collect package counters only for 1st core in package */
1758         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1759                 goto done;
1760
1761         if (DO_BIC(BIC_Totl_c0)) {
1762                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1763                         return -10;
1764         }
1765         if (DO_BIC(BIC_Any_c0)) {
1766                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1767                         return -11;
1768         }
1769         if (DO_BIC(BIC_GFX_c0)) {
1770                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1771                         return -12;
1772         }
1773         if (DO_BIC(BIC_CPUGFX)) {
1774                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1775                         return -13;
1776         }
1777         if (DO_BIC(BIC_Pkgpc3))
1778                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1779                         return -9;
1780         if (DO_BIC(BIC_Pkgpc6)) {
1781                 if (do_slm_cstates) {
1782                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1783                                 return -10;
1784                 } else {
1785                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1786                                 return -10;
1787                 }
1788         }
1789
1790         if (DO_BIC(BIC_Pkgpc2))
1791                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1792                         return -11;
1793         if (DO_BIC(BIC_Pkgpc7))
1794                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1795                         return -12;
1796         if (DO_BIC(BIC_Pkgpc8))
1797                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1798                         return -13;
1799         if (DO_BIC(BIC_Pkgpc9))
1800                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1801                         return -13;
1802         if (DO_BIC(BIC_Pkgpc10))
1803                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1804                         return -13;
1805
1806         if (DO_BIC(BIC_CPU_LPI))
1807                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1808         if (DO_BIC(BIC_SYS_LPI))
1809                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
1810
1811         if (do_rapl & RAPL_PKG) {
1812                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1813                         return -13;
1814                 p->energy_pkg = msr & 0xFFFFFFFF;
1815         }
1816         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1817                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1818                         return -14;
1819                 p->energy_cores = msr & 0xFFFFFFFF;
1820         }
1821         if (do_rapl & RAPL_DRAM) {
1822                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1823                         return -15;
1824                 p->energy_dram = msr & 0xFFFFFFFF;
1825         }
1826         if (do_rapl & RAPL_GFX) {
1827                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1828                         return -16;
1829                 p->energy_gfx = msr & 0xFFFFFFFF;
1830         }
1831         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1832                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1833                         return -16;
1834                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1835         }
1836         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1837                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1838                         return -16;
1839                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1840         }
1841         if (DO_BIC(BIC_PkgTmp)) {
1842                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1843                         return -17;
1844                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1845         }
1846
1847         if (DO_BIC(BIC_GFX_rc6))
1848                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1849
1850         if (DO_BIC(BIC_GFXMHz))
1851                 p->gfx_mhz = gfx_cur_mhz;
1852
1853         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1854                 if (get_mp(cpu, mp, &p->counter[i]))
1855                         return -10;
1856         }
1857 done:
1858         gettimeofday(&t->tv_end, (struct timezone *)NULL);
1859
1860         return 0;
1861 }
1862
1863 /*
1864  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1865  * If you change the values, note they are used both in comparisons
1866  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1867  */
1868
1869 #define PCLUKN 0 /* Unknown */
1870 #define PCLRSV 1 /* Reserved */
1871 #define PCL__0 2 /* PC0 */
1872 #define PCL__1 3 /* PC1 */
1873 #define PCL__2 4 /* PC2 */
1874 #define PCL__3 5 /* PC3 */
1875 #define PCL__4 6 /* PC4 */
1876 #define PCL__6 7 /* PC6 */
1877 #define PCL_6N 8 /* PC6 No Retention */
1878 #define PCL_6R 9 /* PC6 Retention */
1879 #define PCL__7 10 /* PC7 */
1880 #define PCL_7S 11 /* PC7 Shrink */
1881 #define PCL__8 12 /* PC8 */
1882 #define PCL__9 13 /* PC9 */
1883 #define PCLUNL 14 /* Unlimited */
1884
1885 int pkg_cstate_limit = PCLUKN;
1886 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1887         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1888
1889 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1890 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1891 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1892 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1893 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1894 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1895 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1896 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1897
1898
1899 static void
1900 calculate_tsc_tweak()
1901 {
1902         tsc_tweak = base_hz / tsc_hz;
1903 }
1904
1905 static void
1906 dump_nhm_platform_info(void)
1907 {
1908         unsigned long long msr;
1909         unsigned int ratio;
1910
1911         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1912
1913         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1914
1915         ratio = (msr >> 40) & 0xFF;
1916         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
1917                 ratio, bclk, ratio * bclk);
1918
1919         ratio = (msr >> 8) & 0xFF;
1920         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1921                 ratio, bclk, ratio * bclk);
1922
1923         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1924         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1925                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1926
1927         return;
1928 }
1929
1930 static void
1931 dump_hsw_turbo_ratio_limits(void)
1932 {
1933         unsigned long long msr;
1934         unsigned int ratio;
1935
1936         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1937
1938         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1939
1940         ratio = (msr >> 8) & 0xFF;
1941         if (ratio)
1942                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
1943                         ratio, bclk, ratio * bclk);
1944
1945         ratio = (msr >> 0) & 0xFF;
1946         if (ratio)
1947                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
1948                         ratio, bclk, ratio * bclk);
1949         return;
1950 }
1951
1952 static void
1953 dump_ivt_turbo_ratio_limits(void)
1954 {
1955         unsigned long long msr;
1956         unsigned int ratio;
1957
1958         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1959
1960         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1961
1962         ratio = (msr >> 56) & 0xFF;
1963         if (ratio)
1964                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
1965                         ratio, bclk, ratio * bclk);
1966
1967         ratio = (msr >> 48) & 0xFF;
1968         if (ratio)
1969                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
1970                         ratio, bclk, ratio * bclk);
1971
1972         ratio = (msr >> 40) & 0xFF;
1973         if (ratio)
1974                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
1975                         ratio, bclk, ratio * bclk);
1976
1977         ratio = (msr >> 32) & 0xFF;
1978         if (ratio)
1979                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
1980                         ratio, bclk, ratio * bclk);
1981
1982         ratio = (msr >> 24) & 0xFF;
1983         if (ratio)
1984                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
1985                         ratio, bclk, ratio * bclk);
1986
1987         ratio = (msr >> 16) & 0xFF;
1988         if (ratio)
1989                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
1990                         ratio, bclk, ratio * bclk);
1991
1992         ratio = (msr >> 8) & 0xFF;
1993         if (ratio)
1994                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
1995                         ratio, bclk, ratio * bclk);
1996
1997         ratio = (msr >> 0) & 0xFF;
1998         if (ratio)
1999                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2000                         ratio, bclk, ratio * bclk);
2001         return;
2002 }
2003 int has_turbo_ratio_group_limits(int family, int model)
2004 {
2005
2006         if (!genuine_intel)
2007                 return 0;
2008
2009         switch (model) {
2010         case INTEL_FAM6_ATOM_GOLDMONT:
2011         case INTEL_FAM6_SKYLAKE_X:
2012         case INTEL_FAM6_ATOM_DENVERTON:
2013                 return 1;
2014         }
2015         return 0;
2016 }
2017
2018 static void
2019 dump_turbo_ratio_limits(int family, int model)
2020 {
2021         unsigned long long msr, core_counts;
2022         unsigned int ratio, group_size;
2023
2024         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2025         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2026
2027         if (has_turbo_ratio_group_limits(family, model)) {
2028                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2029                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2030         } else {
2031                 core_counts = 0x0807060504030201;
2032         }
2033
2034         ratio = (msr >> 56) & 0xFF;
2035         group_size = (core_counts >> 56) & 0xFF;
2036         if (ratio)
2037                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2038                         ratio, bclk, ratio * bclk, group_size);
2039
2040         ratio = (msr >> 48) & 0xFF;
2041         group_size = (core_counts >> 48) & 0xFF;
2042         if (ratio)
2043                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2044                         ratio, bclk, ratio * bclk, group_size);
2045
2046         ratio = (msr >> 40) & 0xFF;
2047         group_size = (core_counts >> 40) & 0xFF;
2048         if (ratio)
2049                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2050                         ratio, bclk, ratio * bclk, group_size);
2051
2052         ratio = (msr >> 32) & 0xFF;
2053         group_size = (core_counts >> 32) & 0xFF;
2054         if (ratio)
2055                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2056                         ratio, bclk, ratio * bclk, group_size);
2057
2058         ratio = (msr >> 24) & 0xFF;
2059         group_size = (core_counts >> 24) & 0xFF;
2060         if (ratio)
2061                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2062                         ratio, bclk, ratio * bclk, group_size);
2063
2064         ratio = (msr >> 16) & 0xFF;
2065         group_size = (core_counts >> 16) & 0xFF;
2066         if (ratio)
2067                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2068                         ratio, bclk, ratio * bclk, group_size);
2069
2070         ratio = (msr >> 8) & 0xFF;
2071         group_size = (core_counts >> 8) & 0xFF;
2072         if (ratio)
2073                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2074                         ratio, bclk, ratio * bclk, group_size);
2075
2076         ratio = (msr >> 0) & 0xFF;
2077         group_size = (core_counts >> 0) & 0xFF;
2078         if (ratio)
2079                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2080                         ratio, bclk, ratio * bclk, group_size);
2081         return;
2082 }
2083
2084 static void
2085 dump_atom_turbo_ratio_limits(void)
2086 {
2087         unsigned long long msr;
2088         unsigned int ratio;
2089
2090         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2091         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2092
2093         ratio = (msr >> 0) & 0x3F;
2094         if (ratio)
2095                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2096                         ratio, bclk, ratio * bclk);
2097
2098         ratio = (msr >> 8) & 0x3F;
2099         if (ratio)
2100                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2101                         ratio, bclk, ratio * bclk);
2102
2103         ratio = (msr >> 16) & 0x3F;
2104         if (ratio)
2105                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2106                         ratio, bclk, ratio * bclk);
2107
2108         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2109         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2110
2111         ratio = (msr >> 24) & 0x3F;
2112         if (ratio)
2113                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2114                         ratio, bclk, ratio * bclk);
2115
2116         ratio = (msr >> 16) & 0x3F;
2117         if (ratio)
2118                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2119                         ratio, bclk, ratio * bclk);
2120
2121         ratio = (msr >> 8) & 0x3F;
2122         if (ratio)
2123                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2124                         ratio, bclk, ratio * bclk);
2125
2126         ratio = (msr >> 0) & 0x3F;
2127         if (ratio)
2128                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2129                         ratio, bclk, ratio * bclk);
2130 }
2131
2132 static void
2133 dump_knl_turbo_ratio_limits(void)
2134 {
2135         const unsigned int buckets_no = 7;
2136
2137         unsigned long long msr;
2138         int delta_cores, delta_ratio;
2139         int i, b_nr;
2140         unsigned int cores[buckets_no];
2141         unsigned int ratio[buckets_no];
2142
2143         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2144
2145         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2146                 base_cpu, msr);
2147
2148         /**
2149          * Turbo encoding in KNL is as follows:
2150          * [0] -- Reserved
2151          * [7:1] -- Base value of number of active cores of bucket 1.
2152          * [15:8] -- Base value of freq ratio of bucket 1.
2153          * [20:16] -- +ve delta of number of active cores of bucket 2.
2154          * i.e. active cores of bucket 2 =
2155          * active cores of bucket 1 + delta
2156          * [23:21] -- Negative delta of freq ratio of bucket 2.
2157          * i.e. freq ratio of bucket 2 =
2158          * freq ratio of bucket 1 - delta
2159          * [28:24]-- +ve delta of number of active cores of bucket 3.
2160          * [31:29]-- -ve delta of freq ratio of bucket 3.
2161          * [36:32]-- +ve delta of number of active cores of bucket 4.
2162          * [39:37]-- -ve delta of freq ratio of bucket 4.
2163          * [44:40]-- +ve delta of number of active cores of bucket 5.
2164          * [47:45]-- -ve delta of freq ratio of bucket 5.
2165          * [52:48]-- +ve delta of number of active cores of bucket 6.
2166          * [55:53]-- -ve delta of freq ratio of bucket 6.
2167          * [60:56]-- +ve delta of number of active cores of bucket 7.
2168          * [63:61]-- -ve delta of freq ratio of bucket 7.
2169          */
2170
2171         b_nr = 0;
2172         cores[b_nr] = (msr & 0xFF) >> 1;
2173         ratio[b_nr] = (msr >> 8) & 0xFF;
2174
2175         for (i = 16; i < 64; i += 8) {
2176                 delta_cores = (msr >> i) & 0x1F;
2177                 delta_ratio = (msr >> (i + 5)) & 0x7;
2178
2179                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2180                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2181                 b_nr++;
2182         }
2183
2184         for (i = buckets_no - 1; i >= 0; i--)
2185                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2186                         fprintf(outf,
2187                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2188                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2189 }
2190
2191 static void
2192 dump_nhm_cst_cfg(void)
2193 {
2194         unsigned long long msr;
2195
2196         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2197
2198         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2199
2200         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2201                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2202                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2203                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2204                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2205                 (msr & (1 << 15)) ? "" : "UN",
2206                 (unsigned int)msr & 0xF,
2207                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2208
2209 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2210         if (has_automatic_cstate_conversion) {
2211                 fprintf(outf, ", automatic c-state conversion=%s",
2212                         (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2213         }
2214
2215         fprintf(outf, ")\n");
2216
2217         return;
2218 }
2219
2220 static void
2221 dump_config_tdp(void)
2222 {
2223         unsigned long long msr;
2224
2225         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2226         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2227         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2228
2229         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2230         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2231         if (msr) {
2232                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2233                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2234                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2235                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2236         }
2237         fprintf(outf, ")\n");
2238
2239         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2240         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2241         if (msr) {
2242                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2243                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2244                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2245                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2246         }
2247         fprintf(outf, ")\n");
2248
2249         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2250         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2251         if ((msr) & 0x3)
2252                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2253         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2254         fprintf(outf, ")\n");
2255
2256         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2257         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2258         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2259         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2260         fprintf(outf, ")\n");
2261 }
2262
2263 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2264
2265 void print_irtl(void)
2266 {
2267         unsigned long long msr;
2268
2269         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2270         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2271         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2272                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2273
2274         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2275         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2276         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2277                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2278
2279         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2280         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2281         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2282                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2283
2284         if (!do_irtl_hsw)
2285                 return;
2286
2287         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2288         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2289         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2290                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2291
2292         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2293         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2294         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2295                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2296
2297         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2298         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2299         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2300                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2301
2302 }
2303 void free_fd_percpu(void)
2304 {
2305         int i;
2306
2307         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2308                 if (fd_percpu[i] != 0)
2309                         close(fd_percpu[i]);
2310         }
2311
2312         free(fd_percpu);
2313 }
2314
2315 void free_all_buffers(void)
2316 {
2317         int i;
2318
2319         CPU_FREE(cpu_present_set);
2320         cpu_present_set = NULL;
2321         cpu_present_setsize = 0;
2322
2323         CPU_FREE(cpu_affinity_set);
2324         cpu_affinity_set = NULL;
2325         cpu_affinity_setsize = 0;
2326
2327         free(thread_even);
2328         free(core_even);
2329         free(package_even);
2330
2331         thread_even = NULL;
2332         core_even = NULL;
2333         package_even = NULL;
2334
2335         free(thread_odd);
2336         free(core_odd);
2337         free(package_odd);
2338
2339         thread_odd = NULL;
2340         core_odd = NULL;
2341         package_odd = NULL;
2342
2343         free(output_buffer);
2344         output_buffer = NULL;
2345         outp = NULL;
2346
2347         free_fd_percpu();
2348
2349         free(irq_column_2_cpu);
2350         free(irqs_per_cpu);
2351
2352         for (i = 0; i <= topo.max_cpu_num; ++i) {
2353                 if (cpus[i].put_ids)
2354                         CPU_FREE(cpus[i].put_ids);
2355         }
2356         free(cpus);
2357 }
2358
2359
2360 /*
2361  * Parse a file containing a single int.
2362  */
2363 int parse_int_file(const char *fmt, ...)
2364 {
2365         va_list args;
2366         char path[PATH_MAX];
2367         FILE *filep;
2368         int value;
2369
2370         va_start(args, fmt);
2371         vsnprintf(path, sizeof(path), fmt, args);
2372         va_end(args);
2373         filep = fopen_or_die(path, "r");
2374         if (fscanf(filep, "%d", &value) != 1)
2375                 err(1, "%s: failed to parse number from file", path);
2376         fclose(filep);
2377         return value;
2378 }
2379
2380 /*
2381  * cpu_is_first_core_in_package(cpu)
2382  * return 1 if given CPU is 1st core in package
2383  */
2384 int cpu_is_first_core_in_package(int cpu)
2385 {
2386         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2387 }
2388
2389 int get_physical_package_id(int cpu)
2390 {
2391         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2392 }
2393
2394 int get_core_id(int cpu)
2395 {
2396         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2397 }
2398
2399 void set_node_data(void)
2400 {
2401         char path[80];
2402         FILE *filep;
2403         int pkg, node, cpu;
2404
2405         struct pkg_node_info {
2406                 int count;
2407                 int min;
2408         } *pni;
2409
2410         pni = calloc(topo.num_packages, sizeof(struct pkg_node_info));
2411         if (!pni)
2412                 err(1, "calloc pkg_node_count");
2413
2414         for (pkg = 0; pkg < topo.num_packages; pkg++)
2415                 pni[pkg].min = topo.num_cpus;
2416
2417         for (node = 0; node <= topo.max_node_num; node++) {
2418                 /* find the "first" cpu in the node */
2419                 sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node);
2420                 filep = fopen(path, "r");
2421                 if (!filep)
2422                         continue;
2423                 fscanf(filep, "%d", &cpu);
2424                 fclose(filep);
2425
2426                 pkg = cpus[cpu].physical_package_id;
2427                 pni[pkg].count++;
2428
2429                 if (node < pni[pkg].min)
2430                         pni[pkg].min = node;
2431         }
2432
2433         for (pkg = 0; pkg < topo.num_packages; pkg++)
2434                 if (pni[pkg].count > topo.nodes_per_pkg)
2435                         topo.nodes_per_pkg = pni[0].count;
2436
2437         for (cpu = 0; cpu < topo.num_cpus; cpu++) {
2438                 pkg = cpus[cpu].physical_package_id;
2439                 node = cpus[cpu].physical_node_id;
2440                 cpus[cpu].logical_node_id = node - pni[pkg].min;
2441         }
2442         free(pni);
2443
2444 }
2445
2446 int get_physical_node_id(struct cpu_topology *thiscpu)
2447 {
2448         char path[80];
2449         FILE *filep;
2450         int i;
2451         int cpu = thiscpu->logical_cpu_id;
2452
2453         for (i = 0; i <= topo.max_cpu_num; i++) {
2454                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2455                         cpu, i);
2456                 filep = fopen(path, "r");
2457                 if (!filep)
2458                         continue;
2459                 fclose(filep);
2460                 return i;
2461         }
2462         return -1;
2463 }
2464
2465 int get_thread_siblings(struct cpu_topology *thiscpu)
2466 {
2467         char path[80], character;
2468         FILE *filep;
2469         unsigned long map;
2470         int so, shift, sib_core;
2471         int cpu = thiscpu->logical_cpu_id;
2472         int offset = topo.max_cpu_num + 1;
2473         size_t size;
2474         int thread_id = 0;
2475
2476         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2477         if (thiscpu->thread_id < 0)
2478                 thiscpu->thread_id = thread_id++;
2479         if (!thiscpu->put_ids)
2480                 return -1;
2481
2482         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2483         CPU_ZERO_S(size, thiscpu->put_ids);
2484
2485         sprintf(path,
2486                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2487         filep = fopen_or_die(path, "r");
2488         do {
2489                 offset -= BITMASK_SIZE;
2490                 fscanf(filep, "%lx%c", &map, &character);
2491                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2492                         if ((map >> shift) & 0x1) {
2493                                 so = shift + offset;
2494                                 sib_core = get_core_id(so);
2495                                 if (sib_core == thiscpu->physical_core_id) {
2496                                         CPU_SET_S(so, size, thiscpu->put_ids);
2497                                         if ((so != cpu) &&
2498                                             (cpus[so].thread_id < 0))
2499                                                 cpus[so].thread_id =
2500                                                                     thread_id++;
2501                                 }
2502                         }
2503                 }
2504         } while (!strncmp(&character, ",", 1));
2505         fclose(filep);
2506
2507         return CPU_COUNT_S(size, thiscpu->put_ids);
2508 }
2509
2510 /*
2511  * run func(thread, core, package) in topology order
2512  * skip non-present cpus
2513  */
2514
2515 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2516         struct pkg_data *, struct thread_data *, struct core_data *,
2517         struct pkg_data *), struct thread_data *thread_base,
2518         struct core_data *core_base, struct pkg_data *pkg_base,
2519         struct thread_data *thread_base2, struct core_data *core_base2,
2520         struct pkg_data *pkg_base2)
2521 {
2522         int retval, pkg_no, node_no, core_no, thread_no;
2523
2524         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2525                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2526                         for (core_no = 0; core_no < topo.cores_per_node;
2527                              ++core_no) {
2528                                 for (thread_no = 0; thread_no <
2529                                         topo.threads_per_core; ++thread_no) {
2530                                         struct thread_data *t, *t2;
2531                                         struct core_data *c, *c2;
2532                                         struct pkg_data *p, *p2;
2533
2534                                         t = GET_THREAD(thread_base, thread_no,
2535                                                        core_no, node_no,
2536                                                        pkg_no);
2537
2538                                         if (cpu_is_not_present(t->cpu_id))
2539                                                 continue;
2540
2541                                         t2 = GET_THREAD(thread_base2, thread_no,
2542                                                         core_no, node_no,
2543                                                         pkg_no);
2544
2545                                         c = GET_CORE(core_base, core_no,
2546                                                      node_no, pkg_no);
2547                                         c2 = GET_CORE(core_base2, core_no,
2548                                                       node_no,
2549                                                       pkg_no);
2550
2551                                         p = GET_PKG(pkg_base, pkg_no);
2552                                         p2 = GET_PKG(pkg_base2, pkg_no);
2553
2554                                         retval = func(t, c, p, t2, c2, p2);
2555                                         if (retval)
2556                                                 return retval;
2557                                 }
2558                         }
2559                 }
2560         }
2561         return 0;
2562 }
2563
2564 /*
2565  * run func(cpu) on every cpu in /proc/stat
2566  * return max_cpu number
2567  */
2568 int for_all_proc_cpus(int (func)(int))
2569 {
2570         FILE *fp;
2571         int cpu_num;
2572         int retval;
2573
2574         fp = fopen_or_die(proc_stat, "r");
2575
2576         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2577         if (retval != 0)
2578                 err(1, "%s: failed to parse format", proc_stat);
2579
2580         while (1) {
2581                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2582                 if (retval != 1)
2583                         break;
2584
2585                 retval = func(cpu_num);
2586                 if (retval) {
2587                         fclose(fp);
2588                         return(retval);
2589                 }
2590         }
2591         fclose(fp);
2592         return 0;
2593 }
2594
2595 void re_initialize(void)
2596 {
2597         free_all_buffers();
2598         setup_all_buffers();
2599         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2600 }
2601
2602 void set_max_cpu_num(void)
2603 {
2604         FILE *filep;
2605         unsigned long dummy;
2606
2607         topo.max_cpu_num = 0;
2608         filep = fopen_or_die(
2609                         "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2610                         "r");
2611         while (fscanf(filep, "%lx,", &dummy) == 1)
2612                 topo.max_cpu_num += BITMASK_SIZE;
2613         fclose(filep);
2614         topo.max_cpu_num--; /* 0 based */
2615 }
2616
2617 /*
2618  * count_cpus()
2619  * remember the last one seen, it will be the max
2620  */
2621 int count_cpus(int cpu)
2622 {
2623         topo.num_cpus++;
2624         return 0;
2625 }
2626 int mark_cpu_present(int cpu)
2627 {
2628         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2629         return 0;
2630 }
2631
2632 int init_thread_id(int cpu)
2633 {
2634         cpus[cpu].thread_id = -1;
2635         return 0;
2636 }
2637
2638 /*
2639  * snapshot_proc_interrupts()
2640  *
2641  * read and record summary of /proc/interrupts
2642  *
2643  * return 1 if config change requires a restart, else return 0
2644  */
2645 int snapshot_proc_interrupts(void)
2646 {
2647         static FILE *fp;
2648         int column, retval;
2649
2650         if (fp == NULL)
2651                 fp = fopen_or_die("/proc/interrupts", "r");
2652         else
2653                 rewind(fp);
2654
2655         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2656         for (column = 0; column < topo.num_cpus; ++column) {
2657                 int cpu_number;
2658
2659                 retval = fscanf(fp, " CPU%d", &cpu_number);
2660                 if (retval != 1)
2661                         break;
2662
2663                 if (cpu_number > topo.max_cpu_num) {
2664                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2665                         return 1;
2666                 }
2667
2668                 irq_column_2_cpu[column] = cpu_number;
2669                 irqs_per_cpu[cpu_number] = 0;
2670         }
2671
2672         /* read /proc/interrupt count lines and sum up irqs per cpu */
2673         while (1) {
2674                 int column;
2675                 char buf[64];
2676
2677                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2678                 if (retval != 1)
2679                         break;
2680
2681                 /* read the count per cpu */
2682                 for (column = 0; column < topo.num_cpus; ++column) {
2683
2684                         int cpu_number, irq_count;
2685
2686                         retval = fscanf(fp, " %d", &irq_count);
2687                         if (retval != 1)
2688                                 break;
2689
2690                         cpu_number = irq_column_2_cpu[column];
2691                         irqs_per_cpu[cpu_number] += irq_count;
2692
2693                 }
2694
2695                 while (getc(fp) != '\n')
2696                         ;       /* flush interrupt description */
2697
2698         }
2699         return 0;
2700 }
2701 /*
2702  * snapshot_gfx_rc6_ms()
2703  *
2704  * record snapshot of
2705  * /sys/class/drm/card0/power/rc6_residency_ms
2706  *
2707  * return 1 if config change requires a restart, else return 0
2708  */
2709 int snapshot_gfx_rc6_ms(void)
2710 {
2711         FILE *fp;
2712         int retval;
2713
2714         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2715
2716         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2717         if (retval != 1)
2718                 err(1, "GFX rc6");
2719
2720         fclose(fp);
2721
2722         return 0;
2723 }
2724 /*
2725  * snapshot_gfx_mhz()
2726  *
2727  * record snapshot of
2728  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2729  *
2730  * return 1 if config change requires a restart, else return 0
2731  */
2732 int snapshot_gfx_mhz(void)
2733 {
2734         static FILE *fp;
2735         int retval;
2736
2737         if (fp == NULL)
2738                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2739         else {
2740                 rewind(fp);
2741                 fflush(fp);
2742         }
2743
2744         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2745         if (retval != 1)
2746                 err(1, "GFX MHz");
2747
2748         return 0;
2749 }
2750
2751 /*
2752  * snapshot_cpu_lpi()
2753  *
2754  * record snapshot of
2755  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2756  *
2757  * return 1 if config change requires a restart, else return 0
2758  */
2759 int snapshot_cpu_lpi_us(void)
2760 {
2761         FILE *fp;
2762         int retval;
2763
2764         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2765
2766         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2767         if (retval != 1)
2768                 err(1, "CPU LPI");
2769
2770         fclose(fp);
2771
2772         return 0;
2773 }
2774 /*
2775  * snapshot_sys_lpi()
2776  *
2777  * record snapshot of
2778  * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
2779  *
2780  * return 1 if config change requires a restart, else return 0
2781  */
2782 int snapshot_sys_lpi_us(void)
2783 {
2784         FILE *fp;
2785         int retval;
2786
2787         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
2788
2789         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2790         if (retval != 1)
2791                 err(1, "SYS LPI");
2792
2793         fclose(fp);
2794
2795         return 0;
2796 }
2797 /*
2798  * snapshot /proc and /sys files
2799  *
2800  * return 1 if configuration restart needed, else return 0
2801  */
2802 int snapshot_proc_sysfs_files(void)
2803 {
2804         if (DO_BIC(BIC_IRQ))
2805                 if (snapshot_proc_interrupts())
2806                         return 1;
2807
2808         if (DO_BIC(BIC_GFX_rc6))
2809                 snapshot_gfx_rc6_ms();
2810
2811         if (DO_BIC(BIC_GFXMHz))
2812                 snapshot_gfx_mhz();
2813
2814         if (DO_BIC(BIC_CPU_LPI))
2815                 snapshot_cpu_lpi_us();
2816
2817         if (DO_BIC(BIC_SYS_LPI))
2818                 snapshot_sys_lpi_us();
2819
2820         return 0;
2821 }
2822
2823 int exit_requested;
2824
2825 static void signal_handler (int signal)
2826 {
2827         switch (signal) {
2828         case SIGINT:
2829                 exit_requested = 1;
2830                 if (debug)
2831                         fprintf(stderr, " SIGINT\n");
2832                 break;
2833         case SIGUSR1:
2834                 if (debug > 1)
2835                         fprintf(stderr, "SIGUSR1\n");
2836                 break;
2837         }
2838         /* make sure this manually-invoked interval is at least 1ms long */
2839         nanosleep(&one_msec, NULL);
2840 }
2841
2842 void setup_signal_handler(void)
2843 {
2844         struct sigaction sa;
2845
2846         memset(&sa, 0, sizeof(sa));
2847
2848         sa.sa_handler = &signal_handler;
2849
2850         if (sigaction(SIGINT, &sa, NULL) < 0)
2851                 err(1, "sigaction SIGINT");
2852         if (sigaction(SIGUSR1, &sa, NULL) < 0)
2853                 err(1, "sigaction SIGUSR1");
2854 }
2855
2856 void do_sleep(void)
2857 {
2858         struct timeval select_timeout;
2859         fd_set readfds;
2860         int retval;
2861
2862         FD_ZERO(&readfds);
2863         FD_SET(0, &readfds);
2864
2865         if (!isatty(fileno(stdin))) {
2866                 nanosleep(&interval_ts, NULL);
2867                 return;
2868         }
2869
2870         select_timeout = interval_tv;
2871         retval = select(1, &readfds, NULL, NULL, &select_timeout);
2872
2873         if (retval == 1) {
2874                 switch (getc(stdin)) {
2875                 case 'q':
2876                         exit_requested = 1;
2877                         break;
2878                 }
2879                 /* make sure this manually-invoked interval is at least 1ms long */
2880                 nanosleep(&one_msec, NULL);
2881         }
2882 }
2883
2884 void turbostat_loop()
2885 {
2886         int retval;
2887         int restarted = 0;
2888         int done_iters = 0;
2889
2890         setup_signal_handler();
2891
2892 restart:
2893         restarted++;
2894
2895         snapshot_proc_sysfs_files();
2896         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2897         if (retval < -1) {
2898                 exit(retval);
2899         } else if (retval == -1) {
2900                 if (restarted > 1) {
2901                         exit(retval);
2902                 }
2903                 re_initialize();
2904                 goto restart;
2905         }
2906         restarted = 0;
2907         done_iters = 0;
2908         gettimeofday(&tv_even, (struct timezone *)NULL);
2909
2910         while (1) {
2911                 if (for_all_proc_cpus(cpu_is_not_present)) {
2912                         re_initialize();
2913                         goto restart;
2914                 }
2915                 do_sleep();
2916                 if (snapshot_proc_sysfs_files())
2917                         goto restart;
2918                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
2919                 if (retval < -1) {
2920                         exit(retval);
2921                 } else if (retval == -1) {
2922                         re_initialize();
2923                         goto restart;
2924                 }
2925                 gettimeofday(&tv_odd, (struct timezone *)NULL);
2926                 timersub(&tv_odd, &tv_even, &tv_delta);
2927                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2928                         re_initialize();
2929                         goto restart;
2930                 }
2931                 compute_average(EVEN_COUNTERS);
2932                 format_all_counters(EVEN_COUNTERS);
2933                 flush_output_stdout();
2934                 if (exit_requested)
2935                         break;
2936                 if (num_iterations && ++done_iters >= num_iterations)
2937                         break;
2938                 do_sleep();
2939                 if (snapshot_proc_sysfs_files())
2940                         goto restart;
2941                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2942                 if (retval < -1) {
2943                         exit(retval);
2944                 } else if (retval == -1) {
2945                         re_initialize();
2946                         goto restart;
2947                 }
2948                 gettimeofday(&tv_even, (struct timezone *)NULL);
2949                 timersub(&tv_even, &tv_odd, &tv_delta);
2950                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2951                         re_initialize();
2952                         goto restart;
2953                 }
2954                 compute_average(ODD_COUNTERS);
2955                 format_all_counters(ODD_COUNTERS);
2956                 flush_output_stdout();
2957                 if (exit_requested)
2958                         break;
2959                 if (num_iterations && ++done_iters >= num_iterations)
2960                         break;
2961         }
2962 }
2963
2964 void check_dev_msr()
2965 {
2966         struct stat sb;
2967         char pathname[32];
2968
2969         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2970         if (stat(pathname, &sb))
2971                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2972                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2973 }
2974
2975 void check_permissions()
2976 {
2977         struct __user_cap_header_struct cap_header_data;
2978         cap_user_header_t cap_header = &cap_header_data;
2979         struct __user_cap_data_struct cap_data_data;
2980         cap_user_data_t cap_data = &cap_data_data;
2981         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2982         int do_exit = 0;
2983         char pathname[32];
2984
2985         /* check for CAP_SYS_RAWIO */
2986         cap_header->pid = getpid();
2987         cap_header->version = _LINUX_CAPABILITY_VERSION;
2988         if (capget(cap_header, cap_data) < 0)
2989                 err(-6, "capget(2) failed");
2990
2991         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2992                 do_exit++;
2993                 warnx("capget(CAP_SYS_RAWIO) failed,"
2994                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
2995         }
2996
2997         /* test file permissions */
2998         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2999         if (euidaccess(pathname, R_OK)) {
3000                 do_exit++;
3001                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3002         }
3003
3004         /* if all else fails, thell them to be root */
3005         if (do_exit)
3006                 if (getuid() != 0)
3007                         warnx("... or simply run as root");
3008
3009         if (do_exit)
3010                 exit(-6);
3011 }
3012
3013 /*
3014  * NHM adds support for additional MSRs:
3015  *
3016  * MSR_SMI_COUNT                   0x00000034
3017  *
3018  * MSR_PLATFORM_INFO               0x000000ce
3019  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3020  *
3021  * MSR_MISC_PWR_MGMT               0x000001aa
3022  *
3023  * MSR_PKG_C3_RESIDENCY            0x000003f8
3024  * MSR_PKG_C6_RESIDENCY            0x000003f9
3025  * MSR_CORE_C3_RESIDENCY           0x000003fc
3026  * MSR_CORE_C6_RESIDENCY           0x000003fd
3027  *
3028  * Side effect:
3029  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3030  * sets has_misc_feature_control
3031  */
3032 int probe_nhm_msrs(unsigned int family, unsigned int model)
3033 {
3034         unsigned long long msr;
3035         unsigned int base_ratio;
3036         int *pkg_cstate_limits;
3037
3038         if (!genuine_intel)
3039                 return 0;
3040
3041         if (family != 6)
3042                 return 0;
3043
3044         bclk = discover_bclk(family, model);
3045
3046         switch (model) {
3047         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
3048         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3049         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
3050         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
3051         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
3052         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3053         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
3054                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3055                 break;
3056         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3057         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3058         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3059         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3060                 pkg_cstate_limits = snb_pkg_cstate_limits;
3061                 has_misc_feature_control = 1;
3062                 break;
3063         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3064         case INTEL_FAM6_HASWELL_X:      /* HSX */
3065         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3066         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3067         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3068         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3069         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3070         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3071         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3072         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3073         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3074         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3075         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3076                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3077                 has_misc_feature_control = 1;
3078                 break;
3079         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3080                 pkg_cstate_limits = skx_pkg_cstate_limits;
3081                 has_misc_feature_control = 1;
3082                 break;
3083         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3084                 no_MSR_MISC_PWR_MGMT = 1;
3085         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3086                 pkg_cstate_limits = slv_pkg_cstate_limits;
3087                 break;
3088         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3089                 pkg_cstate_limits = amt_pkg_cstate_limits;
3090                 no_MSR_MISC_PWR_MGMT = 1;
3091                 break;
3092         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3093         case INTEL_FAM6_XEON_PHI_KNM:
3094                 pkg_cstate_limits = phi_pkg_cstate_limits;
3095                 break;
3096         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3097         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3098         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3099                 pkg_cstate_limits = bxt_pkg_cstate_limits;
3100                 break;
3101         default:
3102                 return 0;
3103         }
3104         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3105         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3106
3107         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3108         base_ratio = (msr >> 8) & 0xFF;
3109
3110         base_hz = base_ratio * bclk * 1000000;
3111         has_base_hz = 1;
3112         return 1;
3113 }
3114 /*
3115  * SLV client has support for unique MSRs:
3116  *
3117  * MSR_CC6_DEMOTION_POLICY_CONFIG
3118  * MSR_MC6_DEMOTION_POLICY_CONFIG
3119  */
3120
3121 int has_slv_msrs(unsigned int family, unsigned int model)
3122 {
3123         if (!genuine_intel)
3124                 return 0;
3125
3126         switch (model) {
3127         case INTEL_FAM6_ATOM_SILVERMONT1:
3128         case INTEL_FAM6_ATOM_MERRIFIELD:
3129         case INTEL_FAM6_ATOM_MOOREFIELD:
3130                 return 1;
3131         }
3132         return 0;
3133 }
3134 int is_dnv(unsigned int family, unsigned int model)
3135 {
3136
3137         if (!genuine_intel)
3138                 return 0;
3139
3140         switch (model) {
3141         case INTEL_FAM6_ATOM_DENVERTON:
3142                 return 1;
3143         }
3144         return 0;
3145 }
3146 int is_bdx(unsigned int family, unsigned int model)
3147 {
3148
3149         if (!genuine_intel)
3150                 return 0;
3151
3152         switch (model) {
3153         case INTEL_FAM6_BROADWELL_X:
3154         case INTEL_FAM6_BROADWELL_XEON_D:
3155                 return 1;
3156         }
3157         return 0;
3158 }
3159 int is_skx(unsigned int family, unsigned int model)
3160 {
3161
3162         if (!genuine_intel)
3163                 return 0;
3164
3165         switch (model) {
3166         case INTEL_FAM6_SKYLAKE_X:
3167                 return 1;
3168         }
3169         return 0;
3170 }
3171
3172 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3173 {
3174         if (has_slv_msrs(family, model))
3175                 return 0;
3176
3177         switch (model) {
3178         /* Nehalem compatible, but do not include turbo-ratio limit support */
3179         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3180         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
3181         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3182         case INTEL_FAM6_XEON_PHI_KNM:
3183                 return 0;
3184         default:
3185                 return 1;
3186         }
3187 }
3188 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3189 {
3190         if (has_slv_msrs(family, model))
3191                 return 1;
3192
3193         return 0;
3194 }
3195 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3196 {
3197         if (!genuine_intel)
3198                 return 0;
3199
3200         if (family != 6)
3201                 return 0;
3202
3203         switch (model) {
3204         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3205         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3206                 return 1;
3207         default:
3208                 return 0;
3209         }
3210 }
3211 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3212 {
3213         if (!genuine_intel)
3214                 return 0;
3215
3216         if (family != 6)
3217                 return 0;
3218
3219         switch (model) {
3220         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3221                 return 1;
3222         default:
3223                 return 0;
3224         }
3225 }
3226
3227 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3228 {
3229         if (!genuine_intel)
3230                 return 0;
3231
3232         if (family != 6)
3233                 return 0;
3234
3235         switch (model) {
3236         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3237         case INTEL_FAM6_XEON_PHI_KNM:
3238                 return 1;
3239         default:
3240                 return 0;
3241         }
3242 }
3243 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3244 {
3245         if (!genuine_intel)
3246                 return 0;
3247
3248         if (family != 6)
3249                 return 0;
3250
3251         switch (model) {
3252         case INTEL_FAM6_ATOM_GOLDMONT:
3253         case INTEL_FAM6_SKYLAKE_X:
3254                 return 1;
3255         default:
3256                 return 0;
3257         }
3258 }
3259 int has_config_tdp(unsigned int family, unsigned int model)
3260 {
3261         if (!genuine_intel)
3262                 return 0;
3263
3264         if (family != 6)
3265                 return 0;
3266
3267         switch (model) {
3268         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3269         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3270         case INTEL_FAM6_HASWELL_X:      /* HSX */
3271         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3272         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3273         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3274         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3275         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3276         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3277         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3278         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3279         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3280         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3281         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3282         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3283
3284         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3285         case INTEL_FAM6_XEON_PHI_KNM:
3286                 return 1;
3287         default:
3288                 return 0;
3289         }
3290 }
3291
3292 static void
3293 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3294 {
3295         if (!do_nhm_platform_info)
3296                 return;
3297
3298         dump_nhm_platform_info();
3299
3300         if (has_hsw_turbo_ratio_limit(family, model))
3301                 dump_hsw_turbo_ratio_limits();
3302
3303         if (has_ivt_turbo_ratio_limit(family, model))
3304                 dump_ivt_turbo_ratio_limits();
3305
3306         if (has_turbo_ratio_limit(family, model))
3307                 dump_turbo_ratio_limits(family, model);
3308
3309         if (has_atom_turbo_ratio_limit(family, model))
3310                 dump_atom_turbo_ratio_limits();
3311
3312         if (has_knl_turbo_ratio_limit(family, model))
3313                 dump_knl_turbo_ratio_limits();
3314
3315         if (has_config_tdp(family, model))
3316                 dump_config_tdp();
3317
3318         dump_nhm_cst_cfg();
3319 }
3320
3321 static void
3322 dump_sysfs_cstate_config(void)
3323 {
3324         char path[64];
3325         char name_buf[16];
3326         char desc[64];
3327         FILE *input;
3328         int state;
3329         char *sp;
3330
3331         if (!DO_BIC(BIC_sysfs))
3332                 return;
3333
3334         for (state = 0; state < 10; ++state) {
3335
3336                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3337                         base_cpu, state);
3338                 input = fopen(path, "r");
3339                 if (input == NULL)
3340                         continue;
3341                 fgets(name_buf, sizeof(name_buf), input);
3342
3343                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3344                 sp = strchr(name_buf, '-');
3345                 if (!sp)
3346                         sp = strchrnul(name_buf, '\n');
3347                 *sp = '\0';
3348
3349                 fclose(input);
3350
3351                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3352                         base_cpu, state);
3353                 input = fopen(path, "r");
3354                 if (input == NULL)
3355                         continue;
3356                 fgets(desc, sizeof(desc), input);
3357
3358                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3359                 fclose(input);
3360         }
3361 }
3362 static void
3363 dump_sysfs_pstate_config(void)
3364 {
3365         char path[64];
3366         char driver_buf[64];
3367         char governor_buf[64];
3368         FILE *input;
3369         int turbo;
3370
3371         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3372                         base_cpu);
3373         input = fopen(path, "r");
3374         if (input == NULL) {
3375                 fprintf(stderr, "NSFOD %s\n", path);
3376                 return;
3377         }
3378         fgets(driver_buf, sizeof(driver_buf), input);
3379         fclose(input);
3380
3381         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3382                         base_cpu);
3383         input = fopen(path, "r");
3384         if (input == NULL) {
3385                 fprintf(stderr, "NSFOD %s\n", path);
3386                 return;
3387         }
3388         fgets(governor_buf, sizeof(governor_buf), input);
3389         fclose(input);
3390
3391         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3392         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3393
3394         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3395         input = fopen(path, "r");
3396         if (input != NULL) {
3397                 fscanf(input, "%d", &turbo);
3398                 fprintf(outf, "cpufreq boost: %d\n", turbo);
3399                 fclose(input);
3400         }
3401
3402         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3403         input = fopen(path, "r");
3404         if (input != NULL) {
3405                 fscanf(input, "%d", &turbo);
3406                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3407                 fclose(input);
3408         }
3409 }
3410
3411
3412 /*
3413  * print_epb()
3414  * Decode the ENERGY_PERF_BIAS MSR
3415  */
3416 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3417 {
3418         unsigned long long msr;
3419         char *epb_string;
3420         int cpu;
3421
3422         if (!has_epb)
3423                 return 0;
3424
3425         cpu = t->cpu_id;
3426
3427         /* EPB is per-package */
3428         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3429                 return 0;
3430
3431         if (cpu_migrate(cpu)) {
3432                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3433                 return -1;
3434         }
3435
3436         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3437                 return 0;
3438
3439         switch (msr & 0xF) {
3440         case ENERGY_PERF_BIAS_PERFORMANCE:
3441                 epb_string = "performance";
3442                 break;
3443         case ENERGY_PERF_BIAS_NORMAL:
3444                 epb_string = "balanced";
3445                 break;
3446         case ENERGY_PERF_BIAS_POWERSAVE:
3447                 epb_string = "powersave";
3448                 break;
3449         default:
3450                 epb_string = "custom";
3451                 break;
3452         }
3453         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3454
3455         return 0;
3456 }
3457 /*
3458  * print_hwp()
3459  * Decode the MSR_HWP_CAPABILITIES
3460  */
3461 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3462 {
3463         unsigned long long msr;
3464         int cpu;
3465
3466         if (!has_hwp)
3467                 return 0;
3468
3469         cpu = t->cpu_id;
3470
3471         /* MSR_HWP_CAPABILITIES is per-package */
3472         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3473                 return 0;
3474
3475         if (cpu_migrate(cpu)) {
3476                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3477                 return -1;
3478         }
3479
3480         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3481                 return 0;
3482
3483         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3484                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3485
3486         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3487         if ((msr & (1 << 0)) == 0)
3488                 return 0;
3489
3490         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3491                 return 0;
3492
3493         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3494                         "(high %d guar %d eff %d low %d)\n",
3495                         cpu, msr,
3496                         (unsigned int)HWP_HIGHEST_PERF(msr),
3497                         (unsigned int)HWP_GUARANTEED_PERF(msr),
3498                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3499                         (unsigned int)HWP_LOWEST_PERF(msr));
3500
3501         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3502                 return 0;
3503
3504         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3505                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3506                         cpu, msr,
3507                         (unsigned int)(((msr) >> 0) & 0xff),
3508                         (unsigned int)(((msr) >> 8) & 0xff),
3509                         (unsigned int)(((msr) >> 16) & 0xff),
3510                         (unsigned int)(((msr) >> 24) & 0xff),
3511                         (unsigned int)(((msr) >> 32) & 0xff3),
3512                         (unsigned int)(((msr) >> 42) & 0x1));
3513
3514         if (has_hwp_pkg) {
3515                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3516                         return 0;
3517
3518                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3519                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3520                         cpu, msr,
3521                         (unsigned int)(((msr) >> 0) & 0xff),
3522                         (unsigned int)(((msr) >> 8) & 0xff),
3523                         (unsigned int)(((msr) >> 16) & 0xff),
3524                         (unsigned int)(((msr) >> 24) & 0xff),
3525                         (unsigned int)(((msr) >> 32) & 0xff3));
3526         }
3527         if (has_hwp_notify) {
3528                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3529                         return 0;
3530
3531                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3532                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3533                         cpu, msr,
3534                         ((msr) & 0x1) ? "EN" : "Dis",
3535                         ((msr) & 0x2) ? "EN" : "Dis");
3536         }
3537         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3538                 return 0;
3539
3540         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3541                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3542                         cpu, msr,
3543                         ((msr) & 0x1) ? "" : "No-",
3544                         ((msr) & 0x2) ? "" : "No-");
3545
3546         return 0;
3547 }
3548
3549 /*
3550  * print_perf_limit()
3551  */
3552 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3553 {
3554         unsigned long long msr;
3555         int cpu;
3556
3557         cpu = t->cpu_id;
3558
3559         /* per-package */
3560         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3561                 return 0;
3562
3563         if (cpu_migrate(cpu)) {
3564                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3565                 return -1;
3566         }
3567
3568         if (do_core_perf_limit_reasons) {
3569                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3570                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3571                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3572                         (msr & 1 << 15) ? "bit15, " : "",
3573                         (msr & 1 << 14) ? "bit14, " : "",
3574                         (msr & 1 << 13) ? "Transitions, " : "",
3575                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3576                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
3577                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3578                         (msr & 1 << 9) ? "CorePwr, " : "",
3579                         (msr & 1 << 8) ? "Amps, " : "",
3580                         (msr & 1 << 6) ? "VR-Therm, " : "",
3581                         (msr & 1 << 5) ? "Auto-HWP, " : "",
3582                         (msr & 1 << 4) ? "Graphics, " : "",
3583                         (msr & 1 << 2) ? "bit2, " : "",
3584                         (msr & 1 << 1) ? "ThermStatus, " : "",
3585                         (msr & 1 << 0) ? "PROCHOT, " : "");
3586                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3587                         (msr & 1 << 31) ? "bit31, " : "",
3588                         (msr & 1 << 30) ? "bit30, " : "",
3589                         (msr & 1 << 29) ? "Transitions, " : "",
3590                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3591                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
3592                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3593                         (msr & 1 << 25) ? "CorePwr, " : "",
3594                         (msr & 1 << 24) ? "Amps, " : "",
3595                         (msr & 1 << 22) ? "VR-Therm, " : "",
3596                         (msr & 1 << 21) ? "Auto-HWP, " : "",
3597                         (msr & 1 << 20) ? "Graphics, " : "",
3598                         (msr & 1 << 18) ? "bit18, " : "",
3599                         (msr & 1 << 17) ? "ThermStatus, " : "",
3600                         (msr & 1 << 16) ? "PROCHOT, " : "");
3601
3602         }
3603         if (do_gfx_perf_limit_reasons) {
3604                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3605                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3606                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3607                         (msr & 1 << 0) ? "PROCHOT, " : "",
3608                         (msr & 1 << 1) ? "ThermStatus, " : "",
3609                         (msr & 1 << 4) ? "Graphics, " : "",
3610                         (msr & 1 << 6) ? "VR-Therm, " : "",
3611                         (msr & 1 << 8) ? "Amps, " : "",
3612                         (msr & 1 << 9) ? "GFXPwr, " : "",
3613                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3614                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3615                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3616                         (msr & 1 << 16) ? "PROCHOT, " : "",
3617                         (msr & 1 << 17) ? "ThermStatus, " : "",
3618                         (msr & 1 << 20) ? "Graphics, " : "",
3619                         (msr & 1 << 22) ? "VR-Therm, " : "",
3620                         (msr & 1 << 24) ? "Amps, " : "",
3621                         (msr & 1 << 25) ? "GFXPwr, " : "",
3622                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3623                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3624         }
3625         if (do_ring_perf_limit_reasons) {
3626                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3627                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3628                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
3629                         (msr & 1 << 0) ? "PROCHOT, " : "",
3630                         (msr & 1 << 1) ? "ThermStatus, " : "",
3631                         (msr & 1 << 6) ? "VR-Therm, " : "",
3632                         (msr & 1 << 8) ? "Amps, " : "",
3633                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3634                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3635                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3636                         (msr & 1 << 16) ? "PROCHOT, " : "",
3637                         (msr & 1 << 17) ? "ThermStatus, " : "",
3638                         (msr & 1 << 22) ? "VR-Therm, " : "",
3639                         (msr & 1 << 24) ? "Amps, " : "",
3640                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3641                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3642         }
3643         return 0;
3644 }
3645
3646 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3647 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3648
3649 double get_tdp(unsigned int model)
3650 {
3651         unsigned long long msr;
3652
3653         if (do_rapl & RAPL_PKG_POWER_INFO)
3654                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3655                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3656
3657         switch (model) {
3658         case INTEL_FAM6_ATOM_SILVERMONT1:
3659         case INTEL_FAM6_ATOM_SILVERMONT2:
3660                 return 30.0;
3661         default:
3662                 return 135.0;
3663         }
3664 }
3665
3666 /*
3667  * rapl_dram_energy_units_probe()
3668  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3669  */
3670 static double
3671 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3672 {
3673         /* only called for genuine_intel, family 6 */
3674
3675         switch (model) {
3676         case INTEL_FAM6_HASWELL_X:      /* HSX */
3677         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3678         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3679         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3680         case INTEL_FAM6_XEON_PHI_KNM:
3681                 return (rapl_dram_energy_units = 15.3 / 1000000);
3682         default:
3683                 return (rapl_energy_units);
3684         }
3685 }
3686
3687
3688 /*
3689  * rapl_probe()
3690  *
3691  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3692  */
3693 void rapl_probe(unsigned int family, unsigned int model)
3694 {
3695         unsigned long long msr;
3696         unsigned int time_unit;
3697         double tdp;
3698
3699         if (!genuine_intel)
3700                 return;
3701
3702         if (family != 6)
3703                 return;
3704
3705         switch (model) {
3706         case INTEL_FAM6_SANDYBRIDGE:
3707         case INTEL_FAM6_IVYBRIDGE:
3708         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3709         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3710         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3711         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3712         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3713                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3714                 if (rapl_joules) {
3715                         BIC_PRESENT(BIC_Pkg_J);
3716                         BIC_PRESENT(BIC_Cor_J);
3717                         BIC_PRESENT(BIC_GFX_J);
3718                 } else {
3719                         BIC_PRESENT(BIC_PkgWatt);
3720                         BIC_PRESENT(BIC_CorWatt);
3721                         BIC_PRESENT(BIC_GFXWatt);
3722                 }
3723                 break;
3724         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3725         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3726                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3727                 if (rapl_joules)
3728                         BIC_PRESENT(BIC_Pkg_J);
3729                 else
3730                         BIC_PRESENT(BIC_PkgWatt);
3731                 break;
3732         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3733         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3734         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3735         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3736         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3737                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3738                 BIC_PRESENT(BIC_PKG__);
3739                 BIC_PRESENT(BIC_RAM__);
3740                 if (rapl_joules) {
3741                         BIC_PRESENT(BIC_Pkg_J);
3742                         BIC_PRESENT(BIC_Cor_J);
3743                         BIC_PRESENT(BIC_RAM_J);
3744                         BIC_PRESENT(BIC_GFX_J);
3745                 } else {
3746                         BIC_PRESENT(BIC_PkgWatt);
3747                         BIC_PRESENT(BIC_CorWatt);
3748                         BIC_PRESENT(BIC_RAMWatt);
3749                         BIC_PRESENT(BIC_GFXWatt);
3750                 }
3751                 break;
3752         case INTEL_FAM6_HASWELL_X:      /* HSX */
3753         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3754         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3755         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3756         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3757         case INTEL_FAM6_XEON_PHI_KNM:
3758                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3759                 BIC_PRESENT(BIC_PKG__);
3760                 BIC_PRESENT(BIC_RAM__);
3761                 if (rapl_joules) {
3762                         BIC_PRESENT(BIC_Pkg_J);
3763                         BIC_PRESENT(BIC_RAM_J);
3764                 } else {
3765                         BIC_PRESENT(BIC_PkgWatt);
3766                         BIC_PRESENT(BIC_RAMWatt);
3767                 }
3768                 break;
3769         case INTEL_FAM6_SANDYBRIDGE_X:
3770         case INTEL_FAM6_IVYBRIDGE_X:
3771                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3772                 BIC_PRESENT(BIC_PKG__);
3773                 BIC_PRESENT(BIC_RAM__);
3774                 if (rapl_joules) {
3775                         BIC_PRESENT(BIC_Pkg_J);
3776                         BIC_PRESENT(BIC_Cor_J);
3777                         BIC_PRESENT(BIC_RAM_J);
3778                 } else {
3779                         BIC_PRESENT(BIC_PkgWatt);
3780                         BIC_PRESENT(BIC_CorWatt);
3781                         BIC_PRESENT(BIC_RAMWatt);
3782                 }
3783                 break;
3784         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3785         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3786                 do_rapl = RAPL_PKG | RAPL_CORES;
3787                 if (rapl_joules) {
3788                         BIC_PRESENT(BIC_Pkg_J);
3789                         BIC_PRESENT(BIC_Cor_J);
3790                 } else {
3791                         BIC_PRESENT(BIC_PkgWatt);
3792                         BIC_PRESENT(BIC_CorWatt);
3793                 }
3794                 break;
3795         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3796                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3797                 BIC_PRESENT(BIC_PKG__);
3798                 BIC_PRESENT(BIC_RAM__);
3799                 if (rapl_joules) {
3800                         BIC_PRESENT(BIC_Pkg_J);
3801                         BIC_PRESENT(BIC_Cor_J);
3802                         BIC_PRESENT(BIC_RAM_J);
3803                 } else {
3804                         BIC_PRESENT(BIC_PkgWatt);
3805                         BIC_PRESENT(BIC_CorWatt);
3806                         BIC_PRESENT(BIC_RAMWatt);
3807                 }
3808                 break;
3809         default:
3810                 return;
3811         }
3812
3813         /* units on package 0, verify later other packages match */
3814         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3815                 return;
3816
3817         rapl_power_units = 1.0 / (1 << (msr & 0xF));
3818         if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3819                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3820         else
3821                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3822
3823         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3824
3825         time_unit = msr >> 16 & 0xF;
3826         if (time_unit == 0)
3827                 time_unit = 0xA;
3828
3829         rapl_time_units = 1.0 / (1 << (time_unit));
3830
3831         tdp = get_tdp(model);
3832
3833         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3834         if (!quiet)
3835                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3836
3837         return;
3838 }
3839
3840 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3841 {
3842         if (!genuine_intel)
3843                 return;
3844
3845         if (family != 6)
3846                 return;
3847
3848         switch (model) {
3849         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3850         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3851         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3852                 do_gfx_perf_limit_reasons = 1;
3853         case INTEL_FAM6_HASWELL_X:      /* HSX */
3854                 do_core_perf_limit_reasons = 1;
3855                 do_ring_perf_limit_reasons = 1;
3856         default:
3857                 return;
3858         }
3859 }
3860
3861 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
3862 {
3863         if (is_skx(family, model) || is_bdx(family, model))
3864                 has_automatic_cstate_conversion = 1;
3865 }
3866
3867 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3868 {
3869         unsigned long long msr;
3870         unsigned int dts, dts2;
3871         int cpu;
3872
3873         if (!(do_dts || do_ptm))
3874                 return 0;
3875
3876         cpu = t->cpu_id;
3877
3878         /* DTS is per-core, no need to print for each thread */
3879         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3880                 return 0;
3881
3882         if (cpu_migrate(cpu)) {
3883                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3884                 return -1;
3885         }
3886
3887         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3888                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3889                         return 0;
3890
3891                 dts = (msr >> 16) & 0x7F;
3892                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3893                         cpu, msr, tcc_activation_temp - dts);
3894
3895                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3896                         return 0;
3897
3898                 dts = (msr >> 16) & 0x7F;
3899                 dts2 = (msr >> 8) & 0x7F;
3900                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3901                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3902         }
3903
3904
3905         if (do_dts && debug) {
3906                 unsigned int resolution;
3907
3908                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3909                         return 0;
3910
3911                 dts = (msr >> 16) & 0x7F;
3912                 resolution = (msr >> 27) & 0xF;
3913                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3914                         cpu, msr, tcc_activation_temp - dts, resolution);
3915
3916                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3917                         return 0;
3918
3919                 dts = (msr >> 16) & 0x7F;
3920                 dts2 = (msr >> 8) & 0x7F;
3921                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3922                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3923         }
3924
3925         return 0;
3926 }
3927
3928 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3929 {
3930         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3931                 cpu, label,
3932                 ((msr >> 15) & 1) ? "EN" : "DIS",
3933                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
3934                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3935                 (((msr >> 16) & 1) ? "EN" : "DIS"));
3936
3937         return;
3938 }
3939
3940 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3941 {
3942         unsigned long long msr;
3943         int cpu;
3944
3945         if (!do_rapl)
3946                 return 0;
3947
3948         /* RAPL counters are per package, so print only for 1st thread/package */
3949         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3950                 return 0;
3951
3952         cpu = t->cpu_id;
3953         if (cpu_migrate(cpu)) {
3954                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3955                 return -1;
3956         }
3957
3958         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
3959                 return -1;
3960
3961         fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
3962                 rapl_power_units, rapl_energy_units, rapl_time_units);
3963
3964         if (do_rapl & RAPL_PKG_POWER_INFO) {
3965
3966                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
3967                         return -5;
3968
3969
3970                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3971                         cpu, msr,
3972                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3973                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3974                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3975                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3976
3977         }
3978         if (do_rapl & RAPL_PKG) {
3979
3980                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
3981                         return -9;
3982
3983                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3984                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
3985
3986                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
3987                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3988                         cpu,
3989                         ((msr >> 47) & 1) ? "EN" : "DIS",
3990                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
3991                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
3992                         ((msr >> 48) & 1) ? "EN" : "DIS");
3993         }
3994
3995         if (do_rapl & RAPL_DRAM_POWER_INFO) {
3996                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
3997                         return -6;
3998
3999                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4000                         cpu, msr,
4001                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4002                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4003                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4004                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4005         }
4006         if (do_rapl & RAPL_DRAM) {
4007                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4008                         return -9;
4009                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4010                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4011
4012                 print_power_limit_msr(cpu, msr, "DRAM Limit");
4013         }
4014         if (do_rapl & RAPL_CORE_POLICY) {
4015                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4016                         return -7;
4017
4018                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4019         }
4020         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4021                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4022                         return -9;
4023                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4024                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4025                 print_power_limit_msr(cpu, msr, "Cores Limit");
4026         }
4027         if (do_rapl & RAPL_GFX) {
4028                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4029                         return -8;
4030
4031                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4032
4033                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4034                         return -9;
4035                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4036                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4037                 print_power_limit_msr(cpu, msr, "GFX Limit");
4038         }
4039         return 0;
4040 }
4041
4042 /*
4043  * SNB adds support for additional MSRs:
4044  *
4045  * MSR_PKG_C7_RESIDENCY            0x000003fa
4046  * MSR_CORE_C7_RESIDENCY           0x000003fe
4047  * MSR_PKG_C2_RESIDENCY            0x0000060d
4048  */
4049
4050 int has_snb_msrs(unsigned int family, unsigned int model)
4051 {
4052         if (!genuine_intel)
4053                 return 0;
4054
4055         switch (model) {
4056         case INTEL_FAM6_SANDYBRIDGE:
4057         case INTEL_FAM6_SANDYBRIDGE_X:
4058         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
4059         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
4060         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
4061         case INTEL_FAM6_HASWELL_X:      /* HSW */
4062         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4063         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
4064         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4065         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
4066         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4067         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
4068         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4069         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4070         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4071         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4072         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4073         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4074         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4075         case INTEL_FAM6_ATOM_GEMINI_LAKE:
4076         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
4077                 return 1;
4078         }
4079         return 0;
4080 }
4081
4082 /*
4083  * HSW adds support for additional MSRs:
4084  *
4085  * MSR_PKG_C8_RESIDENCY         0x00000630
4086  * MSR_PKG_C9_RESIDENCY         0x00000631
4087  * MSR_PKG_C10_RESIDENCY        0x00000632
4088  *
4089  * MSR_PKGC8_IRTL               0x00000633
4090  * MSR_PKGC9_IRTL               0x00000634
4091  * MSR_PKGC10_IRTL              0x00000635
4092  *
4093  */
4094 int has_hsw_msrs(unsigned int family, unsigned int model)
4095 {
4096         if (!genuine_intel)
4097                 return 0;
4098
4099         switch (model) {
4100         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4101         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4102         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4103         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4104         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4105         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4106         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4107         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4108         case INTEL_FAM6_ATOM_GEMINI_LAKE:
4109                 return 1;
4110         }
4111         return 0;
4112 }
4113
4114 /*
4115  * SKL adds support for additional MSRS:
4116  *
4117  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4118  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4119  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4120  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4121  */
4122 int has_skl_msrs(unsigned int family, unsigned int model)
4123 {
4124         if (!genuine_intel)
4125                 return 0;
4126
4127         switch (model) {
4128         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4129         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4130         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4131         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4132         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4133                 return 1;
4134         }
4135         return 0;
4136 }
4137
4138 int is_slm(unsigned int family, unsigned int model)
4139 {
4140         if (!genuine_intel)
4141                 return 0;
4142         switch (model) {
4143         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
4144         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
4145                 return 1;
4146         }
4147         return 0;
4148 }
4149
4150 int is_knl(unsigned int family, unsigned int model)
4151 {
4152         if (!genuine_intel)
4153                 return 0;
4154         switch (model) {
4155         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4156         case INTEL_FAM6_XEON_PHI_KNM:
4157                 return 1;
4158         }
4159         return 0;
4160 }
4161
4162 int is_cnl(unsigned int family, unsigned int model)
4163 {
4164         if (!genuine_intel)
4165                 return 0;
4166
4167         switch (model) {
4168         case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4169                 return 1;
4170         }
4171
4172         return 0;
4173 }
4174
4175 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4176 {
4177         if (is_knl(family, model))
4178                 return 1024;
4179         return 1;
4180 }
4181
4182 #define SLM_BCLK_FREQS 5
4183 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4184
4185 double slm_bclk(void)
4186 {
4187         unsigned long long msr = 3;
4188         unsigned int i;
4189         double freq;
4190
4191         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4192                 fprintf(outf, "SLM BCLK: unknown\n");
4193
4194         i = msr & 0xf;
4195         if (i >= SLM_BCLK_FREQS) {
4196                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4197                 i = 3;
4198         }
4199         freq = slm_freq_table[i];
4200
4201         if (!quiet)
4202                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4203
4204         return freq;
4205 }
4206
4207 double discover_bclk(unsigned int family, unsigned int model)
4208 {
4209         if (has_snb_msrs(family, model) || is_knl(family, model))
4210                 return 100.00;
4211         else if (is_slm(family, model))
4212                 return slm_bclk();
4213         else
4214                 return 133.33;
4215 }
4216
4217 /*
4218  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4219  * the Thermal Control Circuit (TCC) activates.
4220  * This is usually equal to tjMax.
4221  *
4222  * Older processors do not have this MSR, so there we guess,
4223  * but also allow cmdline over-ride with -T.
4224  *
4225  * Several MSR temperature values are in units of degrees-C
4226  * below this value, including the Digital Thermal Sensor (DTS),
4227  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4228  */
4229 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4230 {
4231         unsigned long long msr;
4232         unsigned int target_c_local;
4233         int cpu;
4234
4235         /* tcc_activation_temp is used only for dts or ptm */
4236         if (!(do_dts || do_ptm))
4237                 return 0;
4238
4239         /* this is a per-package concept */
4240         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4241                 return 0;
4242
4243         cpu = t->cpu_id;
4244         if (cpu_migrate(cpu)) {
4245                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4246                 return -1;
4247         }
4248
4249         if (tcc_activation_temp_override != 0) {
4250                 tcc_activation_temp = tcc_activation_temp_override;
4251                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4252                         cpu, tcc_activation_temp);
4253                 return 0;
4254         }
4255
4256         /* Temperature Target MSR is Nehalem and newer only */
4257         if (!do_nhm_platform_info)
4258                 goto guess;
4259
4260         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4261                 goto guess;
4262
4263         target_c_local = (msr >> 16) & 0xFF;
4264
4265         if (!quiet)
4266                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4267                         cpu, msr, target_c_local);
4268
4269         if (!target_c_local)
4270                 goto guess;
4271
4272         tcc_activation_temp = target_c_local;
4273
4274         return 0;
4275
4276 guess:
4277         tcc_activation_temp = TJMAX_DEFAULT;
4278         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4279                 cpu, tcc_activation_temp);
4280
4281         return 0;
4282 }
4283
4284 void decode_feature_control_msr(void)
4285 {
4286         unsigned long long msr;
4287
4288         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4289                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4290                         base_cpu, msr,
4291                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4292                         msr & (1 << 18) ? "SGX" : "");
4293 }
4294
4295 void decode_misc_enable_msr(void)
4296 {
4297         unsigned long long msr;
4298
4299         if (!genuine_intel)
4300                 return;
4301
4302         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4303                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4304                         base_cpu, msr,
4305                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4306                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4307                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4308                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4309                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4310 }
4311
4312 void decode_misc_feature_control(void)
4313 {
4314         unsigned long long msr;
4315
4316         if (!has_misc_feature_control)
4317                 return;
4318
4319         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4320                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4321                         base_cpu, msr,
4322                         msr & (0 << 0) ? "No-" : "",
4323                         msr & (1 << 0) ? "No-" : "",
4324                         msr & (2 << 0) ? "No-" : "",
4325                         msr & (3 << 0) ? "No-" : "");
4326 }
4327 /*
4328  * Decode MSR_MISC_PWR_MGMT
4329  *
4330  * Decode the bits according to the Nehalem documentation
4331  * bit[0] seems to continue to have same meaning going forward
4332  * bit[1] less so...
4333  */
4334 void decode_misc_pwr_mgmt_msr(void)
4335 {
4336         unsigned long long msr;
4337
4338         if (!do_nhm_platform_info)
4339                 return;
4340
4341         if (no_MSR_MISC_PWR_MGMT)
4342                 return;
4343
4344         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4345                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4346                         base_cpu, msr,
4347                         msr & (1 << 0) ? "DIS" : "EN",
4348                         msr & (1 << 1) ? "EN" : "DIS",
4349                         msr & (1 << 8) ? "EN" : "DIS");
4350 }
4351 /*
4352  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4353  *
4354  * This MSRs are present on Silvermont processors,
4355  * Intel Atom processor E3000 series (Baytrail), and friends.
4356  */
4357 void decode_c6_demotion_policy_msr(void)
4358 {
4359         unsigned long long msr;
4360
4361         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4362                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4363                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4364
4365         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4366                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4367                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4368 }
4369
4370 void process_cpuid()
4371 {
4372         unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
4373         unsigned int fms, family, model, stepping;
4374         unsigned int has_turbo;
4375
4376         eax = ebx = ecx = edx = 0;
4377
4378         __cpuid(0, max_level, ebx, ecx, edx);
4379
4380         if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
4381                 genuine_intel = 1;
4382
4383         if (!quiet)
4384                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4385                         (char *)&ebx, (char *)&edx, (char *)&ecx);
4386
4387         __cpuid(1, fms, ebx, ecx, edx);
4388         family = (fms >> 8) & 0xf;
4389         model = (fms >> 4) & 0xf;
4390         stepping = fms & 0xf;
4391         if (family == 6 || family == 0xf)
4392                 model += ((fms >> 16) & 0xf) << 4;
4393
4394         if (!quiet) {
4395                 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4396                         max_level, family, model, stepping, family, model, stepping);
4397                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
4398                         ecx & (1 << 0) ? "SSE3" : "-",
4399                         ecx & (1 << 3) ? "MONITOR" : "-",
4400                         ecx & (1 << 6) ? "SMX" : "-",
4401                         ecx & (1 << 7) ? "EIST" : "-",
4402                         ecx & (1 << 8) ? "TM2" : "-",
4403                         edx & (1 << 4) ? "TSC" : "-",
4404                         edx & (1 << 5) ? "MSR" : "-",
4405                         edx & (1 << 22) ? "ACPI-TM" : "-",
4406                         edx & (1 << 29) ? "TM" : "-");
4407         }
4408
4409         if (!(edx & (1 << 5)))
4410                 errx(1, "CPUID: no MSR");
4411
4412         /*
4413          * check max extended function levels of CPUID.
4414          * This is needed to check for invariant TSC.
4415          * This check is valid for both Intel and AMD.
4416          */
4417         ebx = ecx = edx = 0;
4418         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4419
4420         if (max_extended_level >= 0x80000007) {
4421
4422                 /*
4423                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4424                  * this check is valid for both Intel and AMD
4425                  */
4426                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4427                 has_invariant_tsc = edx & (1 << 8);
4428         }
4429
4430         /*
4431          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4432          * this check is valid for both Intel and AMD
4433          */
4434
4435         __cpuid(0x6, eax, ebx, ecx, edx);
4436         has_aperf = ecx & (1 << 0);
4437         if (has_aperf) {
4438                 BIC_PRESENT(BIC_Avg_MHz);
4439                 BIC_PRESENT(BIC_Busy);
4440                 BIC_PRESENT(BIC_Bzy_MHz);
4441         }
4442         do_dts = eax & (1 << 0);
4443         if (do_dts)
4444                 BIC_PRESENT(BIC_CoreTmp);
4445         has_turbo = eax & (1 << 1);
4446         do_ptm = eax & (1 << 6);
4447         if (do_ptm)
4448                 BIC_PRESENT(BIC_PkgTmp);
4449         has_hwp = eax & (1 << 7);
4450         has_hwp_notify = eax & (1 << 8);
4451         has_hwp_activity_window = eax & (1 << 9);
4452         has_hwp_epp = eax & (1 << 10);
4453         has_hwp_pkg = eax & (1 << 11);
4454         has_epb = ecx & (1 << 3);
4455
4456         if (!quiet)
4457                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4458                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4459                         has_aperf ? "" : "No-",
4460                         has_turbo ? "" : "No-",
4461                         do_dts ? "" : "No-",
4462                         do_ptm ? "" : "No-",
4463                         has_hwp ? "" : "No-",
4464                         has_hwp_notify ? "" : "No-",
4465                         has_hwp_activity_window ? "" : "No-",
4466                         has_hwp_epp ? "" : "No-",
4467                         has_hwp_pkg ? "" : "No-",
4468                         has_epb ? "" : "No-");
4469
4470         if (!quiet)
4471                 decode_misc_enable_msr();
4472
4473
4474         if (max_level >= 0x7 && !quiet) {
4475                 int has_sgx;
4476
4477                 ecx = 0;
4478
4479                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4480
4481                 has_sgx = ebx & (1 << 2);
4482                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4483
4484                 if (has_sgx)
4485                         decode_feature_control_msr();
4486         }
4487
4488         if (max_level >= 0x15) {
4489                 unsigned int eax_crystal;
4490                 unsigned int ebx_tsc;
4491
4492                 /*
4493                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4494                  */
4495                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4496                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4497
4498                 if (ebx_tsc != 0) {
4499
4500                         if (!quiet && (ebx != 0))
4501                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4502                                         eax_crystal, ebx_tsc, crystal_hz);
4503
4504                         if (crystal_hz == 0)
4505                                 switch(model) {
4506                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4507                                 case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4508                                 case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4509                                 case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4510                                         crystal_hz = 24000000;  /* 24.0 MHz */
4511                                         break;
4512                                 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
4513                                         crystal_hz = 25000000;  /* 25.0 MHz */
4514                                         break;
4515                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4516                                 case INTEL_FAM6_ATOM_GEMINI_LAKE:
4517                                         crystal_hz = 19200000;  /* 19.2 MHz */
4518                                         break;
4519                                 default:
4520                                         crystal_hz = 0;
4521                         }
4522
4523                         if (crystal_hz) {
4524                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4525                                 if (!quiet)
4526                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4527                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4528                         }
4529                 }
4530         }
4531         if (max_level >= 0x16) {
4532                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
4533
4534                 /*
4535                  * CPUID 16H Base MHz, Max MHz, Bus MHz
4536                  */
4537                 base_mhz = max_mhz = bus_mhz = edx = 0;
4538
4539                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4540                 if (!quiet)
4541                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4542                                 base_mhz, max_mhz, bus_mhz);
4543         }
4544
4545         if (has_aperf)
4546                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4547
4548         BIC_PRESENT(BIC_IRQ);
4549         BIC_PRESENT(BIC_TSC_MHz);
4550
4551         if (probe_nhm_msrs(family, model)) {
4552                 do_nhm_platform_info = 1;
4553                 BIC_PRESENT(BIC_CPU_c1);
4554                 BIC_PRESENT(BIC_CPU_c3);
4555                 BIC_PRESENT(BIC_CPU_c6);
4556                 BIC_PRESENT(BIC_SMI);
4557         }
4558         do_snb_cstates = has_snb_msrs(family, model);
4559
4560         if (do_snb_cstates)
4561                 BIC_PRESENT(BIC_CPU_c7);
4562
4563         do_irtl_snb = has_snb_msrs(family, model);
4564         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4565                 BIC_PRESENT(BIC_Pkgpc2);
4566         if (pkg_cstate_limit >= PCL__3)
4567                 BIC_PRESENT(BIC_Pkgpc3);
4568         if (pkg_cstate_limit >= PCL__6)
4569                 BIC_PRESENT(BIC_Pkgpc6);
4570         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4571                 BIC_PRESENT(BIC_Pkgpc7);
4572         if (has_slv_msrs(family, model)) {
4573                 BIC_NOT_PRESENT(BIC_Pkgpc2);
4574                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4575                 BIC_PRESENT(BIC_Pkgpc6);
4576                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4577                 BIC_PRESENT(BIC_Mod_c6);
4578                 use_c1_residency_msr = 1;
4579         }
4580         if (is_dnv(family, model)) {
4581                 BIC_PRESENT(BIC_CPU_c1);
4582                 BIC_NOT_PRESENT(BIC_CPU_c3);
4583                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4584                 BIC_NOT_PRESENT(BIC_CPU_c7);
4585                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4586                 use_c1_residency_msr = 1;
4587         }
4588         if (is_skx(family, model)) {
4589                 BIC_NOT_PRESENT(BIC_CPU_c3);
4590                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4591                 BIC_NOT_PRESENT(BIC_CPU_c7);
4592                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4593         }
4594         if (is_bdx(family, model)) {
4595                 BIC_NOT_PRESENT(BIC_CPU_c7);
4596                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4597         }
4598         if (has_hsw_msrs(family, model)) {
4599                 BIC_PRESENT(BIC_Pkgpc8);
4600                 BIC_PRESENT(BIC_Pkgpc9);
4601                 BIC_PRESENT(BIC_Pkgpc10);
4602         }
4603         do_irtl_hsw = has_hsw_msrs(family, model);
4604         if (has_skl_msrs(family, model)) {
4605                 BIC_PRESENT(BIC_Totl_c0);
4606                 BIC_PRESENT(BIC_Any_c0);
4607                 BIC_PRESENT(BIC_GFX_c0);
4608                 BIC_PRESENT(BIC_CPUGFX);
4609         }
4610         do_slm_cstates = is_slm(family, model);
4611         do_knl_cstates  = is_knl(family, model);
4612         do_cnl_cstates = is_cnl(family, model);
4613
4614         if (!quiet)
4615                 decode_misc_pwr_mgmt_msr();
4616
4617         if (!quiet && has_slv_msrs(family, model))
4618                 decode_c6_demotion_policy_msr();
4619
4620         rapl_probe(family, model);
4621         perf_limit_reasons_probe(family, model);
4622         automatic_cstate_conversion_probe(family, model);
4623
4624         if (!quiet)
4625                 dump_cstate_pstate_config_info(family, model);
4626
4627         if (!quiet)
4628                 dump_sysfs_cstate_config();
4629         if (!quiet)
4630                 dump_sysfs_pstate_config();
4631
4632         if (has_skl_msrs(family, model))
4633                 calculate_tsc_tweak();
4634
4635         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4636                 BIC_PRESENT(BIC_GFX_rc6);
4637
4638         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4639                 BIC_PRESENT(BIC_GFXMHz);
4640
4641         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4642                 BIC_PRESENT(BIC_CPU_LPI);
4643         else
4644                 BIC_NOT_PRESENT(BIC_CPU_LPI);
4645
4646         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
4647                 BIC_PRESENT(BIC_SYS_LPI);
4648         else
4649                 BIC_NOT_PRESENT(BIC_SYS_LPI);
4650
4651         if (!quiet)
4652                 decode_misc_feature_control();
4653
4654         return;
4655 }
4656
4657
4658 /*
4659  * in /dev/cpu/ return success for names that are numbers
4660  * ie. filter out ".", "..", "microcode".
4661  */
4662 int dir_filter(const struct dirent *dirp)
4663 {
4664         if (isdigit(dirp->d_name[0]))
4665                 return 1;
4666         else
4667                 return 0;
4668 }
4669
4670 int open_dev_cpu_msr(int dummy1)
4671 {
4672         return 0;
4673 }
4674
4675 void topology_probe()
4676 {
4677         int i;
4678         int max_core_id = 0;
4679         int max_package_id = 0;
4680         int max_siblings = 0;
4681
4682         /* Initialize num_cpus, max_cpu_num */
4683         set_max_cpu_num();
4684         topo.num_cpus = 0;
4685         for_all_proc_cpus(count_cpus);
4686         if (!summary_only && topo.num_cpus > 1)
4687                 BIC_PRESENT(BIC_CPU);
4688
4689         if (debug > 1)
4690                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4691
4692         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4693         if (cpus == NULL)
4694                 err(1, "calloc cpus");
4695
4696         /*
4697          * Allocate and initialize cpu_present_set
4698          */
4699         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4700         if (cpu_present_set == NULL)
4701                 err(3, "CPU_ALLOC");
4702         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4703         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4704         for_all_proc_cpus(mark_cpu_present);
4705
4706         /*
4707          * Validate that all cpus in cpu_subset are also in cpu_present_set
4708          */
4709         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4710                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4711                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4712                                 err(1, "cpu%d not present", i);
4713         }
4714
4715         /*
4716          * Allocate and initialize cpu_affinity_set
4717          */
4718         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4719         if (cpu_affinity_set == NULL)
4720                 err(3, "CPU_ALLOC");
4721         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4722         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4723
4724         for_all_proc_cpus(init_thread_id);
4725
4726         /*
4727          * For online cpus
4728          * find max_core_id, max_package_id
4729          */
4730         for (i = 0; i <= topo.max_cpu_num; ++i) {
4731                 int siblings;
4732
4733                 if (cpu_is_not_present(i)) {
4734                         if (debug > 1)
4735                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4736                         continue;
4737                 }
4738
4739                 cpus[i].logical_cpu_id = i;
4740
4741                 /* get package information */
4742                 cpus[i].physical_package_id = get_physical_package_id(i);
4743                 if (cpus[i].physical_package_id > max_package_id)
4744                         max_package_id = cpus[i].physical_package_id;
4745
4746                 /* get numa node information */
4747                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
4748                 if (cpus[i].physical_node_id > topo.max_node_num)
4749                         topo.max_node_num = cpus[i].physical_node_id;
4750
4751                 /* get core information */
4752                 cpus[i].physical_core_id = get_core_id(i);
4753                 if (cpus[i].physical_core_id > max_core_id)
4754                         max_core_id = cpus[i].physical_core_id;
4755
4756                 /* get thread information */
4757                 siblings = get_thread_siblings(&cpus[i]);
4758                 if (siblings > max_siblings)
4759                         max_siblings = siblings;
4760                 if (cpus[i].thread_id != -1)
4761                         topo.num_cores++;
4762
4763                 if (debug > 1)
4764                         fprintf(outf,
4765                                 "cpu %d pkg %d node %d core %d thread %d\n",
4766                                 i, cpus[i].physical_package_id,
4767                                 cpus[i].physical_node_id,
4768                                 cpus[i].physical_core_id,
4769                                 cpus[i].thread_id);
4770         }
4771
4772         topo.cores_per_node = max_core_id + 1;
4773         if (debug > 1)
4774                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4775                         max_core_id, topo.cores_per_node);
4776         if (!summary_only && topo.cores_per_node > 1)
4777                 BIC_PRESENT(BIC_Core);
4778
4779         topo.num_packages = max_package_id + 1;
4780         if (debug > 1)
4781                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4782                         max_package_id, topo.num_packages);
4783         if (!summary_only && topo.num_packages > 1)
4784                 BIC_PRESENT(BIC_Package);
4785
4786         set_node_data();
4787         if (debug > 1)
4788                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
4789         if (!summary_only && topo.nodes_per_pkg > 1)
4790                 BIC_PRESENT(BIC_Node);
4791
4792         topo.threads_per_core = max_siblings;
4793         if (debug > 1)
4794                 fprintf(outf, "max_siblings %d\n", max_siblings);
4795 }
4796
4797 void
4798 allocate_counters(struct thread_data **t, struct core_data **c,
4799                   struct pkg_data **p)
4800 {
4801         int i;
4802         int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
4803                         topo.num_packages;
4804         int num_threads = topo.threads_per_core * num_cores;
4805
4806         *t = calloc(num_threads, sizeof(struct thread_data));
4807         if (*t == NULL)
4808                 goto error;
4809
4810         for (i = 0; i < num_threads; i++)
4811                 (*t)[i].cpu_id = -1;
4812
4813         *c = calloc(num_cores, sizeof(struct core_data));
4814         if (*c == NULL)
4815                 goto error;
4816
4817         for (i = 0; i < num_cores; i++)
4818                 (*c)[i].core_id = -1;
4819
4820         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
4821         if (*p == NULL)
4822                 goto error;
4823
4824         for (i = 0; i < topo.num_packages; i++)
4825                 (*p)[i].package_id = i;
4826
4827         return;
4828 error:
4829         err(1, "calloc counters");
4830 }
4831 /*
4832  * init_counter()
4833  *
4834  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4835  */
4836 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4837         struct pkg_data *pkg_base, int cpu_id)
4838 {
4839         int pkg_id = cpus[cpu_id].physical_package_id;
4840         int node_id = cpus[cpu_id].logical_node_id;
4841         int core_id = cpus[cpu_id].physical_core_id;
4842         int thread_id = cpus[cpu_id].thread_id;
4843         struct thread_data *t;
4844         struct core_data *c;
4845         struct pkg_data *p;
4846
4847         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
4848         c = GET_CORE(core_base, core_id, node_id, pkg_id);
4849         p = GET_PKG(pkg_base, pkg_id);
4850
4851         t->cpu_id = cpu_id;
4852         if (thread_id == 0) {
4853                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4854                 if (cpu_is_first_core_in_package(cpu_id))
4855                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4856         }
4857
4858         c->core_id = core_id;
4859         p->package_id = pkg_id;
4860 }
4861
4862
4863 int initialize_counters(int cpu_id)
4864 {
4865         init_counter(EVEN_COUNTERS, cpu_id);
4866         init_counter(ODD_COUNTERS, cpu_id);
4867         return 0;
4868 }
4869
4870 void allocate_output_buffer()
4871 {
4872         output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4873         outp = output_buffer;
4874         if (outp == NULL)
4875                 err(-1, "calloc output buffer");
4876 }
4877 void allocate_fd_percpu(void)
4878 {
4879         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4880         if (fd_percpu == NULL)
4881                 err(-1, "calloc fd_percpu");
4882 }
4883 void allocate_irq_buffers(void)
4884 {
4885         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4886         if (irq_column_2_cpu == NULL)
4887                 err(-1, "calloc %d", topo.num_cpus);
4888
4889         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4890         if (irqs_per_cpu == NULL)
4891                 err(-1, "calloc %d", topo.max_cpu_num + 1);
4892 }
4893 void setup_all_buffers(void)
4894 {
4895         topology_probe();
4896         allocate_irq_buffers();
4897         allocate_fd_percpu();
4898         allocate_counters(&thread_even, &core_even, &package_even);
4899         allocate_counters(&thread_odd, &core_odd, &package_odd);
4900         allocate_output_buffer();
4901         for_all_proc_cpus(initialize_counters);
4902 }
4903
4904 void set_base_cpu(void)
4905 {
4906         base_cpu = sched_getcpu();
4907         if (base_cpu < 0)
4908                 err(-ENODEV, "No valid cpus found");
4909
4910         if (debug > 1)
4911                 fprintf(outf, "base_cpu = %d\n", base_cpu);
4912 }
4913
4914 void turbostat_init()
4915 {
4916         setup_all_buffers();
4917         set_base_cpu();
4918         check_dev_msr();
4919         check_permissions();
4920         process_cpuid();
4921
4922
4923         if (!quiet)
4924                 for_all_cpus(print_hwp, ODD_COUNTERS);
4925
4926         if (!quiet)
4927                 for_all_cpus(print_epb, ODD_COUNTERS);
4928
4929         if (!quiet)
4930                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
4931
4932         if (!quiet)
4933                 for_all_cpus(print_rapl, ODD_COUNTERS);
4934
4935         for_all_cpus(set_temperature_target, ODD_COUNTERS);
4936
4937         if (!quiet)
4938                 for_all_cpus(print_thermal, ODD_COUNTERS);
4939
4940         if (!quiet && do_irtl_snb)
4941                 print_irtl();
4942 }
4943
4944 int fork_it(char **argv)
4945 {
4946         pid_t child_pid;
4947         int status;
4948
4949         snapshot_proc_sysfs_files();
4950         status = for_all_cpus(get_counters, EVEN_COUNTERS);
4951         if (status)
4952                 exit(status);
4953         /* clear affinity side-effect of get_counters() */
4954         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
4955         gettimeofday(&tv_even, (struct timezone *)NULL);
4956
4957         child_pid = fork();
4958         if (!child_pid) {
4959                 /* child */
4960                 execvp(argv[0], argv);
4961                 err(errno, "exec %s", argv[0]);
4962         } else {
4963
4964                 /* parent */
4965                 if (child_pid == -1)
4966                         err(1, "fork");
4967
4968                 signal(SIGINT, SIG_IGN);
4969                 signal(SIGQUIT, SIG_IGN);
4970                 if (waitpid(child_pid, &status, 0) == -1)
4971                         err(status, "waitpid");
4972         }
4973         /*
4974          * n.b. fork_it() does not check for errors from for_all_cpus()
4975          * because re-starting is problematic when forking
4976          */
4977         snapshot_proc_sysfs_files();
4978         for_all_cpus(get_counters, ODD_COUNTERS);
4979         gettimeofday(&tv_odd, (struct timezone *)NULL);
4980         timersub(&tv_odd, &tv_even, &tv_delta);
4981         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
4982                 fprintf(outf, "%s: Counter reset detected\n", progname);
4983         else {
4984                 compute_average(EVEN_COUNTERS);
4985                 format_all_counters(EVEN_COUNTERS);
4986         }
4987
4988         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
4989
4990         flush_output_stderr();
4991
4992         return status;
4993 }
4994
4995 int get_and_dump_counters(void)
4996 {
4997         int status;
4998
4999         snapshot_proc_sysfs_files();
5000         status = for_all_cpus(get_counters, ODD_COUNTERS);
5001         if (status)
5002                 return status;
5003
5004         status = for_all_cpus(dump_counters, ODD_COUNTERS);
5005         if (status)
5006                 return status;
5007
5008         flush_output_stdout();
5009
5010         return status;
5011 }
5012
5013 void print_version() {
5014         fprintf(outf, "turbostat version 18.06.01"
5015                 " - Len Brown <lenb@kernel.org>\n");
5016 }
5017
5018 int add_counter(unsigned int msr_num, char *path, char *name,
5019         unsigned int width, enum counter_scope scope,
5020         enum counter_type type, enum counter_format format, int flags)
5021 {
5022         struct msr_counter *msrp;
5023
5024         msrp = calloc(1, sizeof(struct msr_counter));
5025         if (msrp == NULL) {
5026                 perror("calloc");
5027                 exit(1);
5028         }
5029
5030         msrp->msr_num = msr_num;
5031         strncpy(msrp->name, name, NAME_BYTES);
5032         if (path)
5033                 strncpy(msrp->path, path, PATH_BYTES);
5034         msrp->width = width;
5035         msrp->type = type;
5036         msrp->format = format;
5037         msrp->flags = flags;
5038
5039         switch (scope) {
5040
5041         case SCOPE_CPU:
5042                 msrp->next = sys.tp;
5043                 sys.tp = msrp;
5044                 sys.added_thread_counters++;
5045                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5046                         fprintf(stderr, "exceeded max %d added thread counters\n",
5047                                 MAX_ADDED_COUNTERS);
5048                         exit(-1);
5049                 }
5050                 break;
5051
5052         case SCOPE_CORE:
5053                 msrp->next = sys.cp;
5054                 sys.cp = msrp;
5055                 sys.added_core_counters++;
5056                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5057                         fprintf(stderr, "exceeded max %d added core counters\n",
5058                                 MAX_ADDED_COUNTERS);
5059                         exit(-1);
5060                 }
5061                 break;
5062
5063         case SCOPE_PACKAGE:
5064                 msrp->next = sys.pp;
5065                 sys.pp = msrp;
5066                 sys.added_package_counters++;
5067                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5068                         fprintf(stderr, "exceeded max %d added package counters\n",
5069                                 MAX_ADDED_COUNTERS);
5070                         exit(-1);
5071                 }
5072                 break;
5073         }
5074
5075         return 0;
5076 }
5077
5078 void parse_add_command(char *add_command)
5079 {
5080         int msr_num = 0;
5081         char *path = NULL;
5082         char name_buffer[NAME_BYTES] = "";
5083         int width = 64;
5084         int fail = 0;
5085         enum counter_scope scope = SCOPE_CPU;
5086         enum counter_type type = COUNTER_CYCLES;
5087         enum counter_format format = FORMAT_DELTA;
5088
5089         while (add_command) {
5090
5091                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5092                         goto next;
5093
5094                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
5095                         goto next;
5096
5097                 if (*add_command == '/') {
5098                         path = add_command;
5099                         goto next;
5100                 }
5101
5102                 if (sscanf(add_command, "u%d", &width) == 1) {
5103                         if ((width == 32) || (width == 64))
5104                                 goto next;
5105                         width = 64;
5106                 }
5107                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5108                         scope = SCOPE_CPU;
5109                         goto next;
5110                 }
5111                 if (!strncmp(add_command, "core", strlen("core"))) {
5112                         scope = SCOPE_CORE;
5113                         goto next;
5114                 }
5115                 if (!strncmp(add_command, "package", strlen("package"))) {
5116                         scope = SCOPE_PACKAGE;
5117                         goto next;
5118                 }
5119                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5120                         type = COUNTER_CYCLES;
5121                         goto next;
5122                 }
5123                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5124                         type = COUNTER_SECONDS;
5125                         goto next;
5126                 }
5127                 if (!strncmp(add_command, "usec", strlen("usec"))) {
5128                         type = COUNTER_USEC;
5129                         goto next;
5130                 }
5131                 if (!strncmp(add_command, "raw", strlen("raw"))) {
5132                         format = FORMAT_RAW;
5133                         goto next;
5134                 }
5135                 if (!strncmp(add_command, "delta", strlen("delta"))) {
5136                         format = FORMAT_DELTA;
5137                         goto next;
5138                 }
5139                 if (!strncmp(add_command, "percent", strlen("percent"))) {
5140                         format = FORMAT_PERCENT;
5141                         goto next;
5142                 }
5143
5144                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
5145                         char *eos;
5146
5147                         eos = strchr(name_buffer, ',');
5148                         if (eos)
5149                                 *eos = '\0';
5150                         goto next;
5151                 }
5152
5153 next:
5154                 add_command = strchr(add_command, ',');
5155                 if (add_command) {
5156                         *add_command = '\0';
5157                         add_command++;
5158                 }
5159
5160         }
5161         if ((msr_num == 0) && (path == NULL)) {
5162                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5163                 fail++;
5164         }
5165
5166         /* generate default column header */
5167         if (*name_buffer == '\0') {
5168                 if (width == 32)
5169                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5170                 else
5171                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5172         }
5173
5174         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5175                 fail++;
5176
5177         if (fail) {
5178                 help();
5179                 exit(1);
5180         }
5181 }
5182
5183 int is_deferred_skip(char *name)
5184 {
5185         int i;
5186
5187         for (i = 0; i < deferred_skip_index; ++i)
5188                 if (!strcmp(name, deferred_skip_names[i]))
5189                         return 1;
5190         return 0;
5191 }
5192
5193 void probe_sysfs(void)
5194 {
5195         char path[64];
5196         char name_buf[16];
5197         FILE *input;
5198         int state;
5199         char *sp;
5200
5201         if (!DO_BIC(BIC_sysfs))
5202                 return;
5203
5204         for (state = 10; state >= 0; --state) {
5205
5206                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5207                         base_cpu, state);
5208                 input = fopen(path, "r");
5209                 if (input == NULL)
5210                         continue;
5211                 fgets(name_buf, sizeof(name_buf), input);
5212
5213                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5214                 sp = strchr(name_buf, '-');
5215                 if (!sp)
5216                         sp = strchrnul(name_buf, '\n');
5217                 *sp = '%';
5218                 *(sp + 1) = '\0';
5219
5220                 fclose(input);
5221
5222                 sprintf(path, "cpuidle/state%d/time", state);
5223
5224                 if (is_deferred_skip(name_buf))
5225                         continue;
5226
5227                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5228                                 FORMAT_PERCENT, SYSFS_PERCPU);
5229         }
5230
5231         for (state = 10; state >= 0; --state) {
5232
5233                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5234                         base_cpu, state);
5235                 input = fopen(path, "r");
5236                 if (input == NULL)
5237                         continue;
5238                 fgets(name_buf, sizeof(name_buf), input);
5239                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5240                 sp = strchr(name_buf, '-');
5241                 if (!sp)
5242                         sp = strchrnul(name_buf, '\n');
5243                 *sp = '\0';
5244                 fclose(input);
5245
5246                 sprintf(path, "cpuidle/state%d/usage", state);
5247
5248                 if (is_deferred_skip(name_buf))
5249                         continue;
5250
5251                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5252                                 FORMAT_DELTA, SYSFS_PERCPU);
5253         }
5254
5255 }
5256
5257
5258 /*
5259  * parse cpuset with following syntax
5260  * 1,2,4..6,8-10 and set bits in cpu_subset
5261  */
5262 void parse_cpu_command(char *optarg)
5263 {
5264         unsigned int start, end;
5265         char *next;
5266
5267         if (!strcmp(optarg, "core")) {
5268                 if (cpu_subset)
5269                         goto error;
5270                 show_core_only++;
5271                 return;
5272         }
5273         if (!strcmp(optarg, "package")) {
5274                 if (cpu_subset)
5275                         goto error;
5276                 show_pkg_only++;
5277                 return;
5278         }
5279         if (show_core_only || show_pkg_only)
5280                 goto error;
5281
5282         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5283         if (cpu_subset == NULL)
5284                 err(3, "CPU_ALLOC");
5285         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5286
5287         CPU_ZERO_S(cpu_subset_size, cpu_subset);
5288
5289         next = optarg;
5290
5291         while (next && *next) {
5292
5293                 if (*next == '-')       /* no negative cpu numbers */
5294                         goto error;
5295
5296                 start = strtoul(next, &next, 10);
5297
5298                 if (start >= CPU_SUBSET_MAXCPUS)
5299                         goto error;
5300                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
5301
5302                 if (*next == '\0')
5303                         break;
5304
5305                 if (*next == ',') {
5306                         next += 1;
5307                         continue;
5308                 }
5309
5310                 if (*next == '-') {
5311                         next += 1;      /* start range */
5312                 } else if (*next == '.') {
5313                         next += 1;
5314                         if (*next == '.')
5315                                 next += 1;      /* start range */
5316                         else
5317                                 goto error;
5318                 }
5319
5320                 end = strtoul(next, &next, 10);
5321                 if (end <= start)
5322                         goto error;
5323
5324                 while (++start <= end) {
5325                         if (start >= CPU_SUBSET_MAXCPUS)
5326                                 goto error;
5327                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
5328                 }
5329
5330                 if (*next == ',')
5331                         next += 1;
5332                 else if (*next != '\0')
5333                         goto error;
5334         }
5335
5336         return;
5337
5338 error:
5339         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5340         help();
5341         exit(-1);
5342 }
5343
5344
5345 void cmdline(int argc, char **argv)
5346 {
5347         int opt;
5348         int option_index = 0;
5349         static struct option long_options[] = {
5350                 {"add",         required_argument,      0, 'a'},
5351                 {"cpu",         required_argument,      0, 'c'},
5352                 {"Dump",        no_argument,            0, 'D'},
5353                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5354                 {"enable",      required_argument,      0, 'e'},
5355                 {"interval",    required_argument,      0, 'i'},
5356                 {"num_iterations",      required_argument,      0, 'n'},
5357                 {"help",        no_argument,            0, 'h'},
5358                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5359                 {"Joules",      no_argument,            0, 'J'},
5360                 {"list",        no_argument,            0, 'l'},
5361                 {"out",         required_argument,      0, 'o'},
5362                 {"quiet",       no_argument,            0, 'q'},
5363                 {"show",        required_argument,      0, 's'},
5364                 {"Summary",     no_argument,            0, 'S'},
5365                 {"TCC",         required_argument,      0, 'T'},
5366                 {"version",     no_argument,            0, 'v' },
5367                 {0,             0,                      0,  0 }
5368         };
5369
5370         progname = argv[0];
5371
5372         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5373                                 long_options, &option_index)) != -1) {
5374                 switch (opt) {
5375                 case 'a':
5376                         parse_add_command(optarg);
5377                         break;
5378                 case 'c':
5379                         parse_cpu_command(optarg);
5380                         break;
5381                 case 'D':
5382                         dump_only++;
5383                         break;
5384                 case 'e':
5385                         /* --enable specified counter */
5386                         bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5387                         break;
5388                 case 'd':
5389                         debug++;
5390                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5391                         break;
5392                 case 'H':
5393                         /*
5394                          * --hide: do not show those specified
5395                          *  multiple invocations simply clear more bits in enabled mask
5396                          */
5397                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5398                         break;
5399                 case 'h':
5400                 default:
5401                         help();
5402                         exit(1);
5403                 case 'i':
5404                         {
5405                                 double interval = strtod(optarg, NULL);
5406
5407                                 if (interval < 0.001) {
5408                                         fprintf(outf, "interval %f seconds is too small\n",
5409                                                 interval);
5410                                         exit(2);
5411                                 }
5412
5413                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5414                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5415                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5416                         }
5417                         break;
5418                 case 'J':
5419                         rapl_joules++;
5420                         break;
5421                 case 'l':
5422                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5423                         list_header_only++;
5424                         quiet++;
5425                         break;
5426                 case 'o':
5427                         outf = fopen_or_die(optarg, "w");
5428                         break;
5429                 case 'q':
5430                         quiet = 1;
5431                         break;
5432                 case 'n':
5433                         num_iterations = strtod(optarg, NULL);
5434
5435                         if (num_iterations <= 0) {
5436                                 fprintf(outf, "iterations %d should be positive number\n",
5437                                         num_iterations);
5438                                 exit(2);
5439                         }
5440                         break;
5441                 case 's':
5442                         /*
5443                          * --show: show only those specified
5444                          *  The 1st invocation will clear and replace the enabled mask
5445                          *  subsequent invocations can add to it.
5446                          */
5447                         if (shown == 0)
5448                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5449                         else
5450                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5451                         shown = 1;
5452                         break;
5453                 case 'S':
5454                         summary_only++;
5455                         break;
5456                 case 'T':
5457                         tcc_activation_temp_override = atoi(optarg);
5458                         break;
5459                 case 'v':
5460                         print_version();
5461                         exit(0);
5462                         break;
5463                 }
5464         }
5465 }
5466
5467 int main(int argc, char **argv)
5468 {
5469         outf = stderr;
5470
5471         cmdline(argc, argv);
5472
5473         if (!quiet)
5474                 print_version();
5475
5476         probe_sysfs();
5477
5478         turbostat_init();
5479
5480         /* dump counters and exit */
5481         if (dump_only)
5482                 return get_and_dump_counters();
5483
5484         /* list header and exit */
5485         if (list_header_only) {
5486                 print_header(",");
5487                 flush_output_stdout();
5488                 return 0;
5489         }
5490
5491         /*
5492          * if any params left, it must be a command to fork
5493          */
5494         if (argc - optind)
5495                 return fork_it(argv + optind);
5496         else
5497                 turbostat_loop();
5498
5499         return 0;
5500 }