tools/power turbostat: add TCC Offset support
[linux-2.6-microblaze.git] / tools / power / x86 / turbostat / turbostat.c
index a7c4f07..6326bee 100644 (file)
 #include <sys/capability.h>
 #include <errno.h>
 #include <math.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <stdbool.h>
 
 char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
+int *fd_instr_count_percpu;
 struct timeval interval_tv = {5, 0};
 struct timespec interval_ts = {5, 0};
+
+/* Save original CPU model */
+unsigned int model_orig;
+
 unsigned int num_iterations;
 unsigned int debug;
 unsigned int quiet;
@@ -75,6 +83,7 @@ char *output_buffer, *outp;
 unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
+unsigned int do_ipc;
 unsigned long long  gfx_cur_rc6_ms;
 unsigned long long cpuidle_cur_cpu_lpi_us;
 unsigned long long cpuidle_cur_sys_lpi_us;
@@ -82,11 +91,13 @@ unsigned int gfx_cur_mhz;
 unsigned int gfx_act_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
+int tcc_offset_bits;
 double rapl_power_units, rapl_time_units;
 double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
 unsigned int do_core_perf_limit_reasons;
 unsigned int has_automatic_cstate_conversion;
+unsigned int dis_cstate_prewake;
 unsigned int do_gfx_perf_limit_reasons;
 unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
@@ -173,12 +184,14 @@ struct thread_data {
        unsigned long long aperf;
        unsigned long long mperf;
        unsigned long long c1;
+       unsigned long long instr_count;
        unsigned long long  irq_count;
        unsigned int smi_count;
        unsigned int cpu_id;
        unsigned int apic_id;
        unsigned int x2apic_id;
        unsigned int flags;
+       bool is_atom;
 #define CPU_IS_FIRST_THREAD_IN_CORE    0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
        unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
@@ -291,13 +304,16 @@ struct msr_sum_array {
 /* The percpu MSR sum array.*/
 struct msr_sum_array *per_cpu_msr_sum;
 
-int idx_to_offset(int idx)
+off_t idx_to_offset(int idx)
 {
-       int offset;
+       off_t offset;
 
        switch (idx) {
        case IDX_PKG_ENERGY:
-               offset = MSR_PKG_ENERGY_STATUS;
+               if (do_rapl & RAPL_AMD_F17H)
+                       offset = MSR_PKG_ENERGY_STAT;
+               else
+                       offset = MSR_PKG_ENERGY_STATUS;
                break;
        case IDX_DRAM_ENERGY:
                offset = MSR_DRAM_ENERGY_STATUS;
@@ -320,12 +336,13 @@ int idx_to_offset(int idx)
        return offset;
 }
 
-int offset_to_idx(int offset)
+int offset_to_idx(off_t offset)
 {
        int idx;
 
        switch (offset) {
        case MSR_PKG_ENERGY_STATUS:
+       case MSR_PKG_ENERGY_STAT:
                idx = IDX_PKG_ENERGY;
                break;
        case MSR_DRAM_ENERGY_STATUS:
@@ -353,7 +370,7 @@ int idx_valid(int idx)
 {
        switch (idx) {
        case IDX_PKG_ENERGY:
-               return do_rapl & RAPL_PKG;
+               return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
        case IDX_DRAM_ENERGY:
                return do_rapl & RAPL_DRAM;
        case IDX_PP0_ENERGY:
@@ -490,6 +507,39 @@ int get_msr_fd(int cpu)
        return fd;
 }
 
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
+{
+       return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+}
+
+static int perf_instr_count_open(int cpu_num)
+{
+       struct perf_event_attr pea;
+       int fd;
+
+       memset(&pea, 0, sizeof(struct perf_event_attr));
+       pea.type = PERF_TYPE_HARDWARE;
+       pea.size = sizeof(struct perf_event_attr);
+       pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+
+       /* counter for cpu_num, including user + kernel and all processes */
+       fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
+       if (fd == -1) 
+               err(-1, "cpu%d: perf instruction counter\n", cpu_num);
+
+       return fd;
+}
+
+int get_instr_count_fd(int cpu)
+{
+       if (fd_instr_count_percpu[cpu])
+               return fd_instr_count_percpu[cpu];
+
+       fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+
+       return fd_instr_count_percpu[cpu];
+}
+
 int get_msr(int cpu, off_t offset, unsigned long long *msr)
 {
        ssize_t retval;
@@ -561,6 +611,7 @@ struct msr_counter bic[] = {
        { 0x0, "X2APIC" },
        { 0x0, "Die" },
        { 0x0, "GFXAMHz" },
+       { 0x0, "IPC" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -616,6 +667,7 @@ struct msr_counter bic[] = {
 #define        BIC_X2APIC      (1ULL << 49)
 #define        BIC_Die         (1ULL << 50)
 #define        BIC_GFXACTMHz   (1ULL << 51)
+#define        BIC_IPC         (1ULL << 52)
 
 #define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
@@ -627,6 +679,7 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
 
 
 #define MAX_DEFERRED 16
@@ -764,6 +817,9 @@ void print_header(char *delim)
        if (DO_BIC(BIC_TSC_MHz))
                outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
 
+       if (DO_BIC(BIC_IPC))
+               outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
+
        if (DO_BIC(BIC_IRQ)) {
                if (sums_need_wide_columns)
                        outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
@@ -926,6 +982,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
                outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
+               if (DO_BIC(BIC_IPC))
+                       outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
+
                if (DO_BIC(BIC_IRQ))
                        outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
                if (DO_BIC(BIC_SMI))
@@ -1105,6 +1164,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
        if (DO_BIC(BIC_TSC_MHz))
                outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
 
+       if (DO_BIC(BIC_IPC))
+               outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
+
        /* IRQ */
        if (DO_BIC(BIC_IRQ)) {
                if (sums_need_wide_columns)
@@ -1482,6 +1544,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
                old->mperf = 1; /* divide by 0 protection */
        }
 
+       if (DO_BIC(BIC_IPC))
+               old->instr_count = new->instr_count - old->instr_count;
+
        if (DO_BIC(BIC_IRQ))
                old->irq_count = new->irq_count - old->irq_count;
 
@@ -1536,6 +1601,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        t->mperf = 0;
        t->c1 = 0;
 
+       t->instr_count = 0;
+
        t->irq_count = 0;
        t->smi_count = 0;
 
@@ -1611,6 +1678,8 @@ int sum_counters(struct thread_data *t, struct core_data *c,
        average.threads.mperf += t->mperf;
        average.threads.c1 += t->c1;
 
+       average.threads.instr_count += t->instr_count;
+
        average.threads.irq_count += t->irq_count;
        average.threads.smi_count += t->smi_count;
 
@@ -1707,6 +1776,7 @@ void compute_average(struct thread_data *t, struct core_data *c,
        average.threads.tsc /= topo.num_cpus;
        average.threads.aperf /= topo.num_cpus;
        average.threads.mperf /= topo.num_cpus;
+       average.threads.instr_count /= topo.num_cpus;
        average.threads.c1 /= topo.num_cpus;
 
        if (average.threads.irq_count > 9999999)
@@ -1989,6 +2059,10 @@ retry:
                t->mperf = t->mperf * aperf_mperf_multiplier;
        }
 
+       if (DO_BIC(BIC_IPC))
+               if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
+                       return -4;
+
        if (DO_BIC(BIC_IRQ))
                t->irq_count = irqs_per_cpu[cpu];
        if (DO_BIC(BIC_SMI)) {
@@ -2023,9 +2097,19 @@ retry:
                        return -7;
        }
 
-       if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7))
+       if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
                if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
                        return -8;
+               else if (t->is_atom) {
+                       /*
+                        * For Atom CPUs that has core cstate deeper than c6,
+                        * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
+                        * Minus CC7 (and deeper cstates) residency to get
+                        * accturate cc6 residency.
+                        */
+                       c->c6 -= c->c7;
+               }
+       }
 
        if (DO_BIC(BIC_Mod_c6))
                if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
@@ -2197,7 +2281,7 @@ int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV,
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-
+int icx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
 static void
 calculate_tsc_tweak()
@@ -2205,6 +2289,8 @@ calculate_tsc_tweak()
        tsc_tweak = base_hz / tsc_hz;
 }
 
+void prewake_cstate_probe(unsigned int family, unsigned int model);
+
 static void
 dump_nhm_platform_info(void)
 {
@@ -2227,6 +2313,11 @@ dump_nhm_platform_info(void)
        fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
 
+       /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
+       if (dis_cstate_prewake)
+               fprintf(outf, "C-state Pre-wake: %sabled\n",
+                       msr & 0x40000000 ? "DIS" : "EN");
+
        return;
 }
 
@@ -2312,6 +2403,7 @@ int has_turbo_ratio_group_limits(int family, int model)
        switch (model) {
        case INTEL_FAM6_ATOM_GOLDMONT:
        case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_ICELAKE_X:
        case INTEL_FAM6_ATOM_GOLDMONT_D:
        case INTEL_FAM6_ATOM_TREMONT_D:
                return 1;
@@ -2449,7 +2541,7 @@ dump_knl_turbo_ratio_limits(void)
        fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
                base_cpu, msr);
 
-       /**
+       /*
         * Turbo encoding in KNL is as follows:
         * [0] -- Reserved
         * [7:1] -- Base value of number of active cores of bucket 1.
@@ -3272,7 +3364,7 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
 
        for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
                unsigned long long msr_cur, msr_last;
-               int offset;
+               off_t offset;
 
                if (!idx_valid(i))
                        continue;
@@ -3281,7 +3373,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
                        continue;
                ret = get_msr(cpu, offset, &msr_cur);
                if (ret) {
-                       fprintf(outf, "Can not update msr(0x%x)\n", offset);
+                       fprintf(outf, "Can not update msr(0x%llx)\n",
+                               (unsigned long long)offset);
                        continue;
                }
 
@@ -3551,6 +3644,10 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
                pkg_cstate_limits = skx_pkg_cstate_limits;
                has_misc_feature_control = 1;
                break;
+       case INTEL_FAM6_ICELAKE_X:      /* ICX */
+               pkg_cstate_limits = icx_pkg_cstate_limits;
+               has_misc_feature_control = 1;
+               break;
        case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
                no_MSR_MISC_PWR_MGMT = 1;
        case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
@@ -3639,6 +3736,20 @@ int is_skx(unsigned int family, unsigned int model)
        }
        return 0;
 }
+
+int is_icx(unsigned int family, unsigned int model)
+{
+
+       if (!genuine_intel)
+               return 0;
+
+       switch (model) {
+       case INTEL_FAM6_ICELAKE_X:
+               return 1;
+       }
+       return 0;
+}
+
 int is_ehl(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
@@ -3741,6 +3852,7 @@ int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
        switch (model) {
        case INTEL_FAM6_ATOM_GOLDMONT:
        case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_ICELAKE_X:
                return 1;
        default:
                return 0;
@@ -3766,6 +3878,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+       case INTEL_FAM6_ICELAKE_X:      /* ICX */
 
        case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
                return 1;
@@ -3774,6 +3887,40 @@ int has_config_tdp(unsigned int family, unsigned int model)
        }
 }
 
+/*
+ * tcc_offset_bits:
+ * 0: Tcc Offset not supported (Default)
+ * 6: Bit 29:24 of MSR_PLATFORM_INFO
+ * 4: Bit 27:24 of MSR_PLATFORM_INFO
+ */
+void check_tcc_offset(int model)
+{
+       unsigned long long msr;
+
+       if (!genuine_intel)
+               return;
+
+       switch (model) {
+       case INTEL_FAM6_SKYLAKE_L:
+       case INTEL_FAM6_SKYLAKE:
+       case INTEL_FAM6_KABYLAKE_L:
+       case INTEL_FAM6_KABYLAKE:
+       case INTEL_FAM6_ICELAKE_L:
+       case INTEL_FAM6_ICELAKE:
+       case INTEL_FAM6_TIGERLAKE_L:
+       case INTEL_FAM6_TIGERLAKE:
+       case INTEL_FAM6_COMETLAKE:
+               if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
+                       msr = (msr >> 30) & 1;
+                       if (msr)
+                               tcc_offset_bits = 6;
+               }
+               return;
+       default:
+               return;
+       }
+}
+
 static void
 remove_underbar(char *s)
 {
@@ -4210,6 +4357,7 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
        switch (model) {
        case INTEL_FAM6_HASWELL_X:      /* HSX */
        case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
                return (rapl_dram_energy_units = 15.3 / 1000000);
        default:
@@ -4295,6 +4443,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
        case INTEL_FAM6_HASWELL_X:      /* HSX */
        case INTEL_FAM6_BROADWELL_X:    /* BDX */
        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+       case INTEL_FAM6_ICELAKE_X:      /* ICX */
        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
                BIC_PRESENT(BIC_PKG__);
@@ -4451,10 +4600,17 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 
 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
 {
-       if (is_skx(family, model) || is_bdx(family, model))
+       if (is_skx(family, model) || is_bdx(family, model) ||
+           is_icx(family, model))
                has_automatic_cstate_conversion = 1;
 }
 
+void prewake_cstate_probe(unsigned int family, unsigned int model)
+{
+       if (is_icx(family, model))
+               dis_cstate_prewake = 1;
+}
+
 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        unsigned long long msr;
@@ -4666,6 +4822,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
        case INTEL_FAM6_SKYLAKE_L:              /* SKL */
        case INTEL_FAM6_CANNONLAKE_L:           /* CNL */
        case INTEL_FAM6_SKYLAKE_X:              /* SKX */
+       case INTEL_FAM6_ICELAKE_X:              /* ICX */
        case INTEL_FAM6_ATOM_GOLDMONT:          /* BXT */
        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
        case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
@@ -4805,6 +4962,28 @@ double discover_bclk(unsigned int family, unsigned int model)
                return 133.33;
 }
 
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       if (!genuine_intel)
+               return 0;
+
+       if (cpu_migrate(t->cpu_id)) {
+               fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+               return -1;
+       }
+
+       if (max_level < 0x1a)
+               return 0;
+
+       __cpuid(0x1a, eax, ebx, ecx, edx);
+       eax = (eax >> 24) & 0xFF;
+       if (eax == 0x20 )
+               t->is_atom = true;
+       return 0;
+}
+
 /*
  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
  * the Thermal Control Circuit (TCC) activates.
@@ -4817,33 +4996,12 @@ double discover_bclk(unsigned int family, unsigned int model)
  * below this value, including the Digital Thermal Sensor (DTS),
  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
  */
-int read_tcc_activation_temp()
+int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        unsigned long long msr;
-       unsigned int tcc, target_c, offset_c;
-
-       /* Temperature Target MSR is Nehalem and newer only */
-       if (!do_nhm_platform_info)
-               return 0;
-
-       if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
-               return 0;
-
-       target_c = (msr >> 16) & 0xFF;
-
-       offset_c = (msr >> 24) & 0xF;
-
-       tcc = target_c - offset_c;
-
-       if (!quiet)
-               fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
-                       base_cpu, msr, tcc, target_c, offset_c);
-
-       return tcc;
-}
+       unsigned int target_c_local, tcc_offset;
+       int cpu;
 
-int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
        /* tcc_activation_temp is used only for dts or ptm */
        if (!(do_dts || do_ptm))
                return 0;
@@ -4852,18 +5010,58 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
                return 0;
 
+       cpu = t->cpu_id;
+       if (cpu_migrate(cpu)) {
+               fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+               return -1;
+       }
+
        if (tcc_activation_temp_override != 0) {
                tcc_activation_temp = tcc_activation_temp_override;
-               fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp);
+               fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
+                       cpu, tcc_activation_temp);
                return 0;
        }
 
-       tcc_activation_temp = read_tcc_activation_temp();
-       if (tcc_activation_temp)
-               return 0;
+       /* Temperature Target MSR is Nehalem and newer only */
+       if (!do_nhm_platform_info)
+               goto guess;
+
+       if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
+               goto guess;
+
+       target_c_local = (msr >> 16) & 0xFF;
+
+       if (!quiet) {
+               switch (tcc_offset_bits) {
+               case 4:
+                       tcc_offset = (msr >> 24) & 0xF;
+                       fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
+                       cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset);
+                       break;
+               case 6:
+                       tcc_offset = (msr >> 24) & 0x3F;
+                       fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
+                       cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset);
+                       break;
+               default:
+                       fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
+                       cpu, msr, target_c_local);
+                       break;
+               }
+       }
+
+       if (!target_c_local)
+               goto guess;
+
+       tcc_activation_temp = target_c_local;
 
+       return 0;
+
+guess:
        tcc_activation_temp = TJMAX_DEFAULT;
-       fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp);
+       fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
+               cpu, tcc_activation_temp);
 
        return 0;
 }
@@ -4994,14 +5192,15 @@ unsigned int intel_model_duplicates(unsigned int model)
        case INTEL_FAM6_ROCKETLAKE:
        case INTEL_FAM6_LAKEFIELD:
        case INTEL_FAM6_ALDERLAKE:
+       case INTEL_FAM6_ALDERLAKE_L:
                return INTEL_FAM6_CANNONLAKE_L;
 
        case INTEL_FAM6_ATOM_TREMONT_L:
                return INTEL_FAM6_ATOM_TREMONT;
 
-       case INTEL_FAM6_ICELAKE_X:
+       case INTEL_FAM6_ICELAKE_D:
        case INTEL_FAM6_SAPPHIRERAPIDS_X:
-               return INTEL_FAM6_SKYLAKE_X;
+               return INTEL_FAM6_ICELAKE_X;
        }
        return model;
 }
@@ -5031,11 +5230,32 @@ void print_dev_latency(void)
        close(fd);
 }
 
+
+/*
+ * Linux-perf manages the the HW instructions-retired counter
+ * by enabling when requested, and hiding rollover
+ */
+void linux_perf_init(void)
+{
+       if (!BIC_IS_ENABLED(BIC_IPC))
+               return;
+
+       if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
+               return;
+
+       fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+       if (fd_instr_count_percpu == NULL)
+               err(-1, "calloc fd_instr_count_percpu");
+
+       BIC_PRESENT(BIC_IPC);
+}
+
 void process_cpuid()
 {
        unsigned int eax, ebx, ecx, edx;
        unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
        unsigned int has_turbo;
+       unsigned long long ucode_patch = 0;
 
        eax = ebx = ecx = edx = 0;
 
@@ -5049,8 +5269,8 @@ void process_cpuid()
                hygon_genuine = 1;
 
        if (!quiet)
-               fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
-                       (char *)&ebx, (char *)&edx, (char *)&ecx);
+               fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
+                       (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
 
        __cpuid(1, fms, ebx, ecx, edx);
        family = (fms >> 8) & 0xf;
@@ -5063,6 +5283,9 @@ void process_cpuid()
        ecx_flags = ecx;
        edx_flags = edx;
 
+       if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+               warnx("get_msr(UCODE)\n");
+
        /*
         * check max extended function levels of CPUID.
         * This is needed to check for invariant TSC.
@@ -5072,8 +5295,9 @@ void process_cpuid()
        __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
 
        if (!quiet) {
-               fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
-                       max_level, max_extended_level, family, model, stepping, family, model, stepping);
+               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
+                       family, model, stepping, family, model, stepping, (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
                fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
                        ecx_flags & (1 << 0) ? "SSE3" : "-",
                        ecx_flags & (1 << 3) ? "MONITOR" : "-",
@@ -5086,8 +5310,10 @@ void process_cpuid()
                        edx_flags & (1 << 28) ? "HT" : "-",
                        edx_flags & (1 << 29) ? "TM" : "-");
        }
-       if (genuine_intel)
+       if (genuine_intel) {
+               model_orig = model;
                model = intel_model_duplicates(model);
+       }
 
        if (!(edx_flags & (1 << 5)))
                errx(1, "CPUID: no MSR");
@@ -5265,7 +5491,7 @@ void process_cpuid()
                BIC_NOT_PRESENT(BIC_Pkgpc7);
                use_c1_residency_msr = 1;
        }
-       if (is_skx(family, model)) {
+       if (is_skx(family, model) || is_icx(family, model)) {
                BIC_NOT_PRESENT(BIC_CPU_c3);
                BIC_NOT_PRESENT(BIC_Pkgpc3);
                BIC_NOT_PRESENT(BIC_CPU_c7);
@@ -5307,6 +5533,8 @@ void process_cpuid()
        perf_limit_reasons_probe(family, model);
        automatic_cstate_conversion_probe(family, model);
 
+       check_tcc_offset(model_orig);
+
        if (!quiet)
                dump_cstate_pstate_config_info(family, model);
 
@@ -5317,7 +5545,7 @@ void process_cpuid()
        if (!quiet)
                dump_sysfs_pstate_config();
 
-       if (has_skl_msrs(family, model))
+       if (has_skl_msrs(family, model) || is_ehl(family, model))
                calculate_tsc_tweak();
 
        if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
@@ -5642,6 +5870,7 @@ void turbostat_init()
        check_dev_msr();
        check_permissions();
        process_cpuid();
+       linux_perf_init();
 
 
        if (!quiet)
@@ -5658,6 +5887,9 @@ void turbostat_init()
 
        for_all_cpus(set_temperature_target, ODD_COUNTERS);
 
+       for_all_cpus(get_cpu_type, ODD_COUNTERS);
+       for_all_cpus(get_cpu_type, EVEN_COUNTERS);
+
        if (!quiet)
                for_all_cpus(print_thermal, ODD_COUNTERS);
 
@@ -5739,7 +5971,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(outf, "turbostat version 20.09.30"
+       fprintf(outf, "turbostat version 21.03.12"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -6087,6 +6319,7 @@ void cmdline(int argc, char **argv)
                {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
                {"enable",      required_argument,      0, 'e'},
                {"interval",    required_argument,      0, 'i'},
+               {"IPC", no_argument,                    0, 'I'},
                {"num_iterations",      required_argument,      0, 'n'},
                {"help",        no_argument,            0, 'h'},
                {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help