perf stat: Support new per thread TopDown metrics

author Andi Kleen <ak@linux.intel.com>

Fri, 11 Sep 2020 14:48:07 +0000 (07:48 -0700)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Thu, 17 Sep 2020 18:48:08 +0000 (15:48 -0300)
author Andi Kleen <ak@linux.intel.com>
Fri, 11 Sep 2020 14:48:07 +0000 (07:48 -0700)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Thu, 17 Sep 2020 18:48:08 +0000 (15:48 -0300)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt

index 7d18694..5bf3d7a 100644 (file)
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -363,6 +363,11 @@ if the workload is actually bound by the CPU and not by something else.
  For best results it is usually a good idea to use it with interval
  mode like -I 1000, as the bottleneck of workloads can change often.
  
+This enables --metric-only, unless overridden with --no-metric-only.
+
+The following restrictions only apply to older Intel CPUs and Atom,
+on newer CPUs (IceLake and later) TopDown can be collected for any thread:
+
  The top down metrics are collected per core instead of per
  CPU thread. Per core mode is automatically enabled
  and -a (global monitoring) is needed, requiring root rights or
@@ -374,8 +379,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
  for best results. Otherwise the bottlenecks may be inconsistent
  on workload with changing phases.
  
-This enables --metric-only, unless overridden with --no-metric-only.
-
  To interpret the results it is usually needed to know on which
  CPUs the workload runs on. If needed the CPUs can be forced using
  taskset.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index a39bb28..17fbbd0 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -128,6 +128,15 @@ static const char * topdown_attrs[] = {
         NULL,
  };
  
+static const char *topdown_metric_attrs[] = {
+       "slots",
+       "topdown-retiring",
+       "topdown-bad-spec",
+       "topdown-fe-bound",
+       "topdown-be-bound",
+       NULL,
+};
+
  static const char *smi_cost_attrs = {
         "{"
         "msr/aperf/,"
@@ -1677,6 +1686,24 @@ static int add_default_attributes(void)
                 char *str = NULL;
                 bool warn = false;
  
+               if (!force_metric_only)
+                       stat_config.metric_only = true;
+
+               if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
+                       pr_err("Out of memory\n");
+                       return -1;
+               }
+               if (topdown_metric_attrs[0] && str) {
+                       if (!stat_config.interval && !stat_config.metric_only) {
+                               fprintf(stat_config.output,
+                                       "Topdown accuracy may decrease when measuring long periods.\n"
+                                       "Please print the result regularly, e.g. -I1000\n");
+                       }
+                       goto setup_metrics;
+               }
+
+               zfree(&str);
+
                 if (stat_config.aggr_mode != AGGR_GLOBAL &&
                     stat_config.aggr_mode != AGGR_CORE) {
                         pr_err("top down event configuration requires --per-core mode\n");
@@ -1688,8 +1715,6 @@ static int add_default_attributes(void)
                         return -1;
                 }
  
-               if (!force_metric_only)
-                       stat_config.metric_only = true;
                 if (topdown_filter_events(topdown_attrs, &str,
                                 arch_topdown_check_group(&warn)) < 0) {
                         pr_err("Out of memory\n");
@@ -1698,6 +1723,7 @@ static int add_default_attributes(void)
                 if (topdown_attrs[0] && str) {
                         if (warn)
                                 arch_topdown_group_warn();
+setup_metrics:
                         err = parse_events(evsel_list, str, &errinfo);
                         if (err) {
                                 fprintf(stderr,
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c

index 924b54d..9012651 100644 (file)
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -241,6 +241,18 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
         else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
                 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
                                     ctx, cpu, count);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
+               update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
+                                   ctx, cpu, count);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
+               update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
+                                   ctx, cpu, count);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
+               update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
+                                   ctx, cpu, count);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
+               update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
+                                   ctx, cpu, count);
         else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
                 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
                                     ctx, cpu, count);
@@ -705,6 +717,47 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
         return sanitize_val(1.0 - sum);
  }
  
+/*
+ * Kernel reports metrics multiplied with slots. To get back
+ * the ratios we need to recreate the sum.
+ */
+
+static double td_metric_ratio(int ctx, int cpu,
+                             enum stat_type type,
+                             struct runtime_stat *stat)
+{
+       double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
+               runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
+               runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
+               runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
+       double d = runtime_stat_avg(stat, type, ctx, cpu);
+
+       if (sum)
+               return d / sum;
+       return 0;
+}
+
+/*
+ * ... but only if most of the values are actually available.
+ * We allow two missing.
+ */
+
+static bool full_td(int ctx, int cpu,
+                   struct runtime_stat *stat)
+{
+       int c = 0;
+
+       if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
+               c++;
+       if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
+               c++;
+       if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
+               c++;
+       if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
+               c++;
+       return c >= 2;
+}
+
  static void print_smi_cost(struct perf_stat_config *config,
                            int cpu, struct evsel *evsel,
                            struct perf_stat_output_ctx *out,
@@ -1073,6 +1126,42 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
                                         be_bound * 100.);
                 else
                         print_metric(config, ctxp, NULL, NULL, name, 0);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
+                       full_td(ctx, cpu, st)) {
+               double retiring = td_metric_ratio(ctx, cpu,
+                                                 STAT_TOPDOWN_RETIRING, st);
+
+               if (retiring > 0.7)
+                       color = PERF_COLOR_GREEN;
+               print_metric(config, ctxp, color, "%8.1f%%", "retiring",
+                               retiring * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
+                       full_td(ctx, cpu, st)) {
+               double fe_bound = td_metric_ratio(ctx, cpu,
+                                                 STAT_TOPDOWN_FE_BOUND, st);
+
+               if (fe_bound > 0.2)
+                       color = PERF_COLOR_RED;
+               print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
+                               fe_bound * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
+                       full_td(ctx, cpu, st)) {
+               double be_bound = td_metric_ratio(ctx, cpu,
+                                                 STAT_TOPDOWN_BE_BOUND, st);
+
+               if (be_bound > 0.2)
+                       color = PERF_COLOR_RED;
+               print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
+                               be_bound * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
+                       full_td(ctx, cpu, st)) {
+               double bad_spec = td_metric_ratio(ctx, cpu,
+                                                 STAT_TOPDOWN_BAD_SPEC, st);
+
+               if (bad_spec > 0.1)
+                       color = PERF_COLOR_RED;
+               print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
+                               bad_spec * 100.);
         } else if (evsel->metric_expr) {
                 generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
                                 evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c

index cdb1543..bd0decd 100644 (file)
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -95,6 +95,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
         ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
         ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
         ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+       ID(TOPDOWN_RETIRING, topdown-retiring),
+       ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
+       ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
+       ID(TOPDOWN_BE_BOUND, topdown-be-bound),
         ID(SMI_NUM, msr/smi/),
         ID(APERF, msr/aperf/),
  };
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h

index 9911fc6..f36c8c9 100644 (file)
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -28,6 +28,10 @@ enum perf_stat_evsel_id {
         PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
         PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
         PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+       PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
+       PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
+       PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
+       PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
         PERF_STAT_EVSEL_ID__SMI_NUM,
         PERF_STAT_EVSEL_ID__APERF,
         PERF_STAT_EVSEL_ID__MAX,
@@ -82,6 +86,10 @@ enum stat_type {
         STAT_TOPDOWN_SLOTS_RETIRED,
         STAT_TOPDOWN_FETCH_BUBBLES,
         STAT_TOPDOWN_RECOVERY_BUBBLES,
+       STAT_TOPDOWN_RETIRING,
+       STAT_TOPDOWN_BAD_SPEC,
+       STAT_TOPDOWN_FE_BOUND,
+       STAT_TOPDOWN_BE_BOUND,
         STAT_SMI_NUM,
         STAT_APERF,
         STAT_MAX
author	Andi Kleen <ak@linux.intel.com>
	Fri, 11 Sep 2020 14:48:07 +0000 (07:48 -0700)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Thu, 17 Sep 2020 18:48:08 +0000 (15:48 -0300)
tools/perf/Documentation/perf-stat.txt		patch \| blob \| history
tools/perf/builtin-stat.c		patch \| blob \| history
tools/perf/util/stat-shadow.c		patch \| blob \| history
tools/perf/util/stat.c		patch \| blob \| history
tools/perf/util/stat.h		patch \| blob \| history