Merge tag 'trace-v5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[linux-2.6-microblaze.git] / tools / perf / util / stat-shadow.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stdio.h>
3 #include "evsel.h"
4 #include "stat.h"
5 #include "color.h"
6 #include "pmu.h"
7 #include "rblist.h"
8 #include "evlist.h"
9 #include "expr.h"
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
12
13 /*
14  * AGGR_GLOBAL: Use CPU 0
15  * AGGR_SOCKET: Use first CPU of socket
16  * AGGR_DIE: Use first CPU of die
17  * AGGR_CORE: Use first CPU of core
18  * AGGR_NONE: Use matching CPU
19  * AGGR_THREAD: Not supported?
20  */
21
22 struct runtime_stat rt_stat;
23 struct stats walltime_nsecs_stats;
24
25 struct saved_value {
26         struct rb_node rb_node;
27         struct evsel *evsel;
28         enum stat_type type;
29         int ctx;
30         int cpu;
31         struct runtime_stat *stat;
32         struct stats stats;
33         u64 metric_total;
34         int metric_other;
35 };
36
37 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
38 {
39         struct saved_value *a = container_of(rb_node,
40                                              struct saved_value,
41                                              rb_node);
42         const struct saved_value *b = entry;
43
44         if (a->cpu != b->cpu)
45                 return a->cpu - b->cpu;
46
47         /*
48          * Previously the rbtree was used to link generic metrics.
49          * The keys were evsel/cpu. Now the rbtree is extended to support
50          * per-thread shadow stats. For shadow stats case, the keys
51          * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
52          * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
53          */
54         if (a->type != b->type)
55                 return a->type - b->type;
56
57         if (a->ctx != b->ctx)
58                 return a->ctx - b->ctx;
59
60         if (a->evsel == NULL && b->evsel == NULL) {
61                 if (a->stat == b->stat)
62                         return 0;
63
64                 if ((char *)a->stat < (char *)b->stat)
65                         return -1;
66
67                 return 1;
68         }
69
70         if (a->evsel == b->evsel)
71                 return 0;
72         if ((char *)a->evsel < (char *)b->evsel)
73                 return -1;
74         return +1;
75 }
76
77 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
78                                      const void *entry)
79 {
80         struct saved_value *nd = malloc(sizeof(struct saved_value));
81
82         if (!nd)
83                 return NULL;
84         memcpy(nd, entry, sizeof(struct saved_value));
85         return &nd->rb_node;
86 }
87
88 static void saved_value_delete(struct rblist *rblist __maybe_unused,
89                                struct rb_node *rb_node)
90 {
91         struct saved_value *v;
92
93         BUG_ON(!rb_node);
94         v = container_of(rb_node, struct saved_value, rb_node);
95         free(v);
96 }
97
98 static struct saved_value *saved_value_lookup(struct evsel *evsel,
99                                               int cpu,
100                                               bool create,
101                                               enum stat_type type,
102                                               int ctx,
103                                               struct runtime_stat *st)
104 {
105         struct rblist *rblist;
106         struct rb_node *nd;
107         struct saved_value dm = {
108                 .cpu = cpu,
109                 .evsel = evsel,
110                 .type = type,
111                 .ctx = ctx,
112                 .stat = st,
113         };
114
115         rblist = &st->value_list;
116
117         nd = rblist__find(rblist, &dm);
118         if (nd)
119                 return container_of(nd, struct saved_value, rb_node);
120         if (create) {
121                 rblist__add_node(rblist, &dm);
122                 nd = rblist__find(rblist, &dm);
123                 if (nd)
124                         return container_of(nd, struct saved_value, rb_node);
125         }
126         return NULL;
127 }
128
129 void runtime_stat__init(struct runtime_stat *st)
130 {
131         struct rblist *rblist = &st->value_list;
132
133         rblist__init(rblist);
134         rblist->node_cmp = saved_value_cmp;
135         rblist->node_new = saved_value_new;
136         rblist->node_delete = saved_value_delete;
137 }
138
139 void runtime_stat__exit(struct runtime_stat *st)
140 {
141         rblist__exit(&st->value_list);
142 }
143
144 void perf_stat__init_shadow_stats(void)
145 {
146         runtime_stat__init(&rt_stat);
147 }
148
149 static int evsel_context(struct evsel *evsel)
150 {
151         int ctx = 0;
152
153         if (evsel->core.attr.exclude_kernel)
154                 ctx |= CTX_BIT_KERNEL;
155         if (evsel->core.attr.exclude_user)
156                 ctx |= CTX_BIT_USER;
157         if (evsel->core.attr.exclude_hv)
158                 ctx |= CTX_BIT_HV;
159         if (evsel->core.attr.exclude_host)
160                 ctx |= CTX_BIT_HOST;
161         if (evsel->core.attr.exclude_idle)
162                 ctx |= CTX_BIT_IDLE;
163
164         return ctx;
165 }
166
167 static void reset_stat(struct runtime_stat *st)
168 {
169         struct rblist *rblist;
170         struct rb_node *pos, *next;
171
172         rblist = &st->value_list;
173         next = rb_first_cached(&rblist->entries);
174         while (next) {
175                 pos = next;
176                 next = rb_next(pos);
177                 memset(&container_of(pos, struct saved_value, rb_node)->stats,
178                        0,
179                        sizeof(struct stats));
180         }
181 }
182
183 void perf_stat__reset_shadow_stats(void)
184 {
185         reset_stat(&rt_stat);
186         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
187 }
188
189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
190 {
191         reset_stat(st);
192 }
193
194 static void update_runtime_stat(struct runtime_stat *st,
195                                 enum stat_type type,
196                                 int ctx, int cpu, u64 count)
197 {
198         struct saved_value *v = saved_value_lookup(NULL, cpu, true,
199                                                    type, ctx, st);
200
201         if (v)
202                 update_stats(&v->stats, count);
203 }
204
205 /*
206  * Update various tracking values we maintain to print
207  * more semantic information such as miss/hit ratios,
208  * instruction rates, etc:
209  */
210 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
211                                     int cpu, struct runtime_stat *st)
212 {
213         int ctx = evsel_context(counter);
214         u64 count_ns = count;
215         struct saved_value *v;
216
217         count *= counter->scale;
218
219         if (evsel__is_clock(counter))
220                 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
221         else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
222                 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
223         else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
224                 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
225         else if (perf_stat_evsel__is(counter, TRANSACTION_START))
226                 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
227         else if (perf_stat_evsel__is(counter, ELISION_START))
228                 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
229         else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
230                 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
231                                     ctx, cpu, count);
232         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
233                 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
234                                     ctx, cpu, count);
235         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
236                 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
237                                     ctx, cpu, count);
238         else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
239                 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
240                                     ctx, cpu, count);
241         else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
242                 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
243                                     ctx, cpu, count);
244         else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
245                 update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
246                                     ctx, cpu, count);
247         else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
248                 update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
249                                     ctx, cpu, count);
250         else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
251                 update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
252                                     ctx, cpu, count);
253         else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
254                 update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
255                                     ctx, cpu, count);
256         else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
257                 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
258                                     ctx, cpu, count);
259         else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
260                 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
261                                     ctx, cpu, count);
262         else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
263                 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
264         else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
265                 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
266         else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
267                 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
268         else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
269                 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
270         else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
271                 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
272         else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
273                 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
274         else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
275                 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
276         else if (perf_stat_evsel__is(counter, SMI_NUM))
277                 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
278         else if (perf_stat_evsel__is(counter, APERF))
279                 update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
280
281         if (counter->collect_stat) {
282                 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
283                 update_stats(&v->stats, count);
284                 if (counter->metric_leader)
285                         v->metric_total += count;
286         } else if (counter->metric_leader) {
287                 v = saved_value_lookup(counter->metric_leader,
288                                        cpu, true, STAT_NONE, 0, st);
289                 v->metric_total += count;
290                 v->metric_other++;
291         }
292 }
293
294 /* used for get_ratio_color() */
295 enum grc_type {
296         GRC_STALLED_CYCLES_FE,
297         GRC_STALLED_CYCLES_BE,
298         GRC_CACHE_MISSES,
299         GRC_MAX_NR
300 };
301
302 static const char *get_ratio_color(enum grc_type type, double ratio)
303 {
304         static const double grc_table[GRC_MAX_NR][3] = {
305                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
306                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
307                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
308         };
309         const char *color = PERF_COLOR_NORMAL;
310
311         if (ratio > grc_table[type][0])
312                 color = PERF_COLOR_RED;
313         else if (ratio > grc_table[type][1])
314                 color = PERF_COLOR_MAGENTA;
315         else if (ratio > grc_table[type][2])
316                 color = PERF_COLOR_YELLOW;
317
318         return color;
319 }
320
321 static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
322                                                 const char *name)
323 {
324         struct evsel *c2;
325
326         evlist__for_each_entry (evsel_list, c2) {
327                 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
328                         return c2;
329         }
330         return NULL;
331 }
332
333 /* Mark MetricExpr target events and link events using them to them. */
334 void perf_stat__collect_metric_expr(struct evlist *evsel_list)
335 {
336         struct evsel *counter, *leader, **metric_events, *oc;
337         bool found;
338         struct expr_parse_ctx ctx;
339         struct hashmap_entry *cur;
340         size_t bkt;
341         int i;
342
343         expr__ctx_init(&ctx);
344         evlist__for_each_entry(evsel_list, counter) {
345                 bool invalid = false;
346
347                 leader = counter->leader;
348                 if (!counter->metric_expr)
349                         continue;
350
351                 expr__ctx_clear(&ctx);
352                 metric_events = counter->metric_events;
353                 if (!metric_events) {
354                         if (expr__find_other(counter->metric_expr,
355                                              counter->name,
356                                              &ctx, 1) < 0)
357                                 continue;
358
359                         metric_events = calloc(sizeof(struct evsel *),
360                                                hashmap__size(&ctx.ids) + 1);
361                         if (!metric_events) {
362                                 expr__ctx_clear(&ctx);
363                                 return;
364                         }
365                         counter->metric_events = metric_events;
366                 }
367
368                 i = 0;
369                 hashmap__for_each_entry((&ctx.ids), cur, bkt) {
370                         const char *metric_name = (const char *)cur->key;
371
372                         found = false;
373                         if (leader) {
374                                 /* Search in group */
375                                 for_each_group_member (oc, leader) {
376                                         if (!strcasecmp(oc->name,
377                                                         metric_name) &&
378                                                 !oc->collect_stat) {
379                                                 found = true;
380                                                 break;
381                                         }
382                                 }
383                         }
384                         if (!found) {
385                                 /* Search ignoring groups */
386                                 oc = perf_stat__find_event(evsel_list,
387                                                            metric_name);
388                         }
389                         if (!oc) {
390                                 /* Deduping one is good enough to handle duplicated PMUs. */
391                                 static char *printed;
392
393                                 /*
394                                  * Adding events automatically would be difficult, because
395                                  * it would risk creating groups that are not schedulable.
396                                  * perf stat doesn't understand all the scheduling constraints
397                                  * of events. So we ask the user instead to add the missing
398                                  * events.
399                                  */
400                                 if (!printed ||
401                                     strcasecmp(printed, metric_name)) {
402                                         fprintf(stderr,
403                                                 "Add %s event to groups to get metric expression for %s\n",
404                                                 metric_name,
405                                                 counter->name);
406                                         printed = strdup(metric_name);
407                                 }
408                                 invalid = true;
409                                 continue;
410                         }
411                         metric_events[i++] = oc;
412                         oc->collect_stat = true;
413                 }
414                 metric_events[i] = NULL;
415                 if (invalid) {
416                         free(metric_events);
417                         counter->metric_events = NULL;
418                         counter->metric_expr = NULL;
419                 }
420         }
421         expr__ctx_clear(&ctx);
422 }
423
424 static double runtime_stat_avg(struct runtime_stat *st,
425                                enum stat_type type, int ctx, int cpu)
426 {
427         struct saved_value *v;
428
429         v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
430         if (!v)
431                 return 0.0;
432
433         return avg_stats(&v->stats);
434 }
435
436 static double runtime_stat_n(struct runtime_stat *st,
437                              enum stat_type type, int ctx, int cpu)
438 {
439         struct saved_value *v;
440
441         v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
442         if (!v)
443                 return 0.0;
444
445         return v->stats.n;
446 }
447
448 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
449                                           int cpu,
450                                           struct evsel *evsel, double avg,
451                                           struct perf_stat_output_ctx *out,
452                                           struct runtime_stat *st)
453 {
454         double total, ratio = 0.0;
455         const char *color;
456         int ctx = evsel_context(evsel);
457
458         total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
459
460         if (total)
461                 ratio = avg / total * 100.0;
462
463         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
464
465         if (ratio)
466                 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
467                                   ratio);
468         else
469                 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
470 }
471
472 static void print_stalled_cycles_backend(struct perf_stat_config *config,
473                                          int cpu,
474                                          struct evsel *evsel, double avg,
475                                          struct perf_stat_output_ctx *out,
476                                          struct runtime_stat *st)
477 {
478         double total, ratio = 0.0;
479         const char *color;
480         int ctx = evsel_context(evsel);
481
482         total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
483
484         if (total)
485                 ratio = avg / total * 100.0;
486
487         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
488
489         out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
490 }
491
492 static void print_branch_misses(struct perf_stat_config *config,
493                                 int cpu,
494                                 struct evsel *evsel,
495                                 double avg,
496                                 struct perf_stat_output_ctx *out,
497                                 struct runtime_stat *st)
498 {
499         double total, ratio = 0.0;
500         const char *color;
501         int ctx = evsel_context(evsel);
502
503         total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
504
505         if (total)
506                 ratio = avg / total * 100.0;
507
508         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
509
510         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
511 }
512
513 static void print_l1_dcache_misses(struct perf_stat_config *config,
514                                    int cpu,
515                                    struct evsel *evsel,
516                                    double avg,
517                                    struct perf_stat_output_ctx *out,
518                                    struct runtime_stat *st)
519
520 {
521         double total, ratio = 0.0;
522         const char *color;
523         int ctx = evsel_context(evsel);
524
525         total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
526
527         if (total)
528                 ratio = avg / total * 100.0;
529
530         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
531
532         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
533 }
534
535 static void print_l1_icache_misses(struct perf_stat_config *config,
536                                    int cpu,
537                                    struct evsel *evsel,
538                                    double avg,
539                                    struct perf_stat_output_ctx *out,
540                                    struct runtime_stat *st)
541
542 {
543         double total, ratio = 0.0;
544         const char *color;
545         int ctx = evsel_context(evsel);
546
547         total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
548
549         if (total)
550                 ratio = avg / total * 100.0;
551
552         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
553         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
554 }
555
556 static void print_dtlb_cache_misses(struct perf_stat_config *config,
557                                     int cpu,
558                                     struct evsel *evsel,
559                                     double avg,
560                                     struct perf_stat_output_ctx *out,
561                                     struct runtime_stat *st)
562 {
563         double total, ratio = 0.0;
564         const char *color;
565         int ctx = evsel_context(evsel);
566
567         total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
568
569         if (total)
570                 ratio = avg / total * 100.0;
571
572         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
573         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
574 }
575
576 static void print_itlb_cache_misses(struct perf_stat_config *config,
577                                     int cpu,
578                                     struct evsel *evsel,
579                                     double avg,
580                                     struct perf_stat_output_ctx *out,
581                                     struct runtime_stat *st)
582 {
583         double total, ratio = 0.0;
584         const char *color;
585         int ctx = evsel_context(evsel);
586
587         total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
588
589         if (total)
590                 ratio = avg / total * 100.0;
591
592         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
593         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
594 }
595
596 static void print_ll_cache_misses(struct perf_stat_config *config,
597                                   int cpu,
598                                   struct evsel *evsel,
599                                   double avg,
600                                   struct perf_stat_output_ctx *out,
601                                   struct runtime_stat *st)
602 {
603         double total, ratio = 0.0;
604         const char *color;
605         int ctx = evsel_context(evsel);
606
607         total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
608
609         if (total)
610                 ratio = avg / total * 100.0;
611
612         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
613         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
614 }
615
616 /*
617  * High level "TopDown" CPU core pipe line bottleneck break down.
618  *
619  * Basic concept following
620  * Yasin, A Top Down Method for Performance analysis and Counter architecture
621  * ISPASS14
622  *
623  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
624  *
625  * Frontend -> Backend -> Retiring
626  * BadSpeculation in addition means out of order execution that is thrown away
627  * (for example branch mispredictions)
628  * Frontend is instruction decoding.
629  * Backend is execution, like computation and accessing data in memory
630  * Retiring is good execution that is not directly bottlenecked
631  *
632  * The formulas are computed in slots.
633  * A slot is an entry in the pipeline each for the pipeline width
634  * (for example a 4-wide pipeline has 4 slots for each cycle)
635  *
636  * Formulas:
637  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
638  *                      TotalSlots
639  * Retiring = SlotsRetired / TotalSlots
640  * FrontendBound = FetchBubbles / TotalSlots
641  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
642  *
643  * The kernel provides the mapping to the low level CPU events and any scaling
644  * needed for the CPU pipeline width, for example:
645  *
646  * TotalSlots = Cycles * 4
647  *
648  * The scaling factor is communicated in the sysfs unit.
649  *
650  * In some cases the CPU may not be able to measure all the formulas due to
651  * missing events. In this case multiple formulas are combined, as possible.
652  *
653  * Full TopDown supports more levels to sub-divide each area: for example
654  * BackendBound into computing bound and memory bound. For now we only
655  * support Level 1 TopDown.
656  */
657
658 static double sanitize_val(double x)
659 {
660         if (x < 0 && x >= -0.02)
661                 return 0.0;
662         return x;
663 }
664
665 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
666 {
667         return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
668 }
669
670 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
671 {
672         double bad_spec = 0;
673         double total_slots;
674         double total;
675
676         total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
677                 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
678                 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
679
680         total_slots = td_total_slots(ctx, cpu, st);
681         if (total_slots)
682                 bad_spec = total / total_slots;
683         return sanitize_val(bad_spec);
684 }
685
686 static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
687 {
688         double retiring = 0;
689         double total_slots = td_total_slots(ctx, cpu, st);
690         double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
691                                             ctx, cpu);
692
693         if (total_slots)
694                 retiring = ret_slots / total_slots;
695         return retiring;
696 }
697
698 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
699 {
700         double fe_bound = 0;
701         double total_slots = td_total_slots(ctx, cpu, st);
702         double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
703                                             ctx, cpu);
704
705         if (total_slots)
706                 fe_bound = fetch_bub / total_slots;
707         return fe_bound;
708 }
709
710 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
711 {
712         double sum = (td_fe_bound(ctx, cpu, st) +
713                       td_bad_spec(ctx, cpu, st) +
714                       td_retiring(ctx, cpu, st));
715         if (sum == 0)
716                 return 0;
717         return sanitize_val(1.0 - sum);
718 }
719
720 /*
721  * Kernel reports metrics multiplied with slots. To get back
722  * the ratios we need to recreate the sum.
723  */
724
725 static double td_metric_ratio(int ctx, int cpu,
726                               enum stat_type type,
727                               struct runtime_stat *stat)
728 {
729         double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
730                 runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
731                 runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
732                 runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
733         double d = runtime_stat_avg(stat, type, ctx, cpu);
734
735         if (sum)
736                 return d / sum;
737         return 0;
738 }
739
740 /*
741  * ... but only if most of the values are actually available.
742  * We allow two missing.
743  */
744
745 static bool full_td(int ctx, int cpu,
746                     struct runtime_stat *stat)
747 {
748         int c = 0;
749
750         if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
751                 c++;
752         if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
753                 c++;
754         if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
755                 c++;
756         if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
757                 c++;
758         return c >= 2;
759 }
760
761 static void print_smi_cost(struct perf_stat_config *config,
762                            int cpu, struct evsel *evsel,
763                            struct perf_stat_output_ctx *out,
764                            struct runtime_stat *st)
765 {
766         double smi_num, aperf, cycles, cost = 0.0;
767         int ctx = evsel_context(evsel);
768         const char *color = NULL;
769
770         smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
771         aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
772         cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
773
774         if ((cycles == 0) || (aperf == 0))
775                 return;
776
777         if (smi_num)
778                 cost = (aperf - cycles) / aperf * 100.00;
779
780         if (cost > 10)
781                 color = PERF_COLOR_RED;
782         out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
783         out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
784 }
785
786 static int prepare_metric(struct evsel **metric_events,
787                           struct metric_ref *metric_refs,
788                           struct expr_parse_ctx *pctx,
789                           int cpu,
790                           struct runtime_stat *st)
791 {
792         double scale;
793         char *n, *pn;
794         int i, j, ret;
795
796         expr__ctx_init(pctx);
797         for (i = 0; metric_events[i]; i++) {
798                 struct saved_value *v;
799                 struct stats *stats;
800                 u64 metric_total = 0;
801
802                 if (!strcmp(metric_events[i]->name, "duration_time")) {
803                         stats = &walltime_nsecs_stats;
804                         scale = 1e-9;
805                 } else {
806                         v = saved_value_lookup(metric_events[i], cpu, false,
807                                                STAT_NONE, 0, st);
808                         if (!v)
809                                 break;
810                         stats = &v->stats;
811                         scale = 1.0;
812
813                         if (v->metric_other)
814                                 metric_total = v->metric_total;
815                 }
816
817                 n = strdup(metric_events[i]->name);
818                 if (!n)
819                         return -ENOMEM;
820                 /*
821                  * This display code with --no-merge adds [cpu] postfixes.
822                  * These are not supported by the parser. Remove everything
823                  * after the space.
824                  */
825                 pn = strchr(n, ' ');
826                 if (pn)
827                         *pn = 0;
828
829                 if (metric_total)
830                         expr__add_id_val(pctx, n, metric_total);
831                 else
832                         expr__add_id_val(pctx, n, avg_stats(stats)*scale);
833         }
834
835         for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
836                 ret = expr__add_ref(pctx, &metric_refs[j]);
837                 if (ret)
838                         return ret;
839         }
840
841         return i;
842 }
843
844 static void generic_metric(struct perf_stat_config *config,
845                            const char *metric_expr,
846                            struct evsel **metric_events,
847                            struct metric_ref *metric_refs,
848                            char *name,
849                            const char *metric_name,
850                            const char *metric_unit,
851                            int runtime,
852                            int cpu,
853                            struct perf_stat_output_ctx *out,
854                            struct runtime_stat *st)
855 {
856         print_metric_t print_metric = out->print_metric;
857         struct expr_parse_ctx pctx;
858         double ratio, scale;
859         int i;
860         void *ctxp = out->ctx;
861
862         i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
863         if (i < 0)
864                 return;
865
866         if (!metric_events[i]) {
867                 if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
868                         char *unit;
869                         char metric_bf[64];
870
871                         if (metric_unit && metric_name) {
872                                 if (perf_pmu__convert_scale(metric_unit,
873                                         &unit, &scale) >= 0) {
874                                         ratio *= scale;
875                                 }
876                                 if (strstr(metric_expr, "?"))
877                                         scnprintf(metric_bf, sizeof(metric_bf),
878                                           "%s  %s_%d", unit, metric_name, runtime);
879                                 else
880                                         scnprintf(metric_bf, sizeof(metric_bf),
881                                           "%s  %s", unit, metric_name);
882
883                                 print_metric(config, ctxp, NULL, "%8.1f",
884                                              metric_bf, ratio);
885                         } else {
886                                 print_metric(config, ctxp, NULL, "%8.2f",
887                                         metric_name ?
888                                         metric_name :
889                                         out->force_header ?  name : "",
890                                         ratio);
891                         }
892                 } else {
893                         print_metric(config, ctxp, NULL, NULL,
894                                      out->force_header ?
895                                      (metric_name ? metric_name : name) : "", 0);
896                 }
897         } else {
898                 print_metric(config, ctxp, NULL, NULL,
899                              out->force_header ?
900                              (metric_name ? metric_name : name) : "", 0);
901         }
902
903         expr__ctx_clear(&pctx);
904 }
905
906 double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
907 {
908         struct expr_parse_ctx pctx;
909         double ratio = 0.0;
910
911         if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
912                 goto out;
913
914         if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
915                 ratio = 0.0;
916
917 out:
918         expr__ctx_clear(&pctx);
919         return ratio;
920 }
921
922 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
923                                    struct evsel *evsel,
924                                    double avg, int cpu,
925                                    struct perf_stat_output_ctx *out,
926                                    struct rblist *metric_events,
927                                    struct runtime_stat *st)
928 {
929         void *ctxp = out->ctx;
930         print_metric_t print_metric = out->print_metric;
931         double total, ratio = 0.0, total2;
932         const char *color = NULL;
933         int ctx = evsel_context(evsel);
934         struct metric_event *me;
935         int num = 1;
936
937         if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
938                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
939
940                 if (total) {
941                         ratio = avg / total;
942                         print_metric(config, ctxp, NULL, "%7.2f ",
943                                         "insn per cycle", ratio);
944                 } else {
945                         print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
946                 }
947
948                 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
949                                          ctx, cpu);
950
951                 total = max(total, runtime_stat_avg(st,
952                                                     STAT_STALLED_CYCLES_BACK,
953                                                     ctx, cpu));
954
955                 if (total && avg) {
956                         out->new_line(config, ctxp);
957                         ratio = total / avg;
958                         print_metric(config, ctxp, NULL, "%7.2f ",
959                                         "stalled cycles per insn",
960                                         ratio);
961                 }
962         } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
963                 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
964                         print_branch_misses(config, cpu, evsel, avg, out, st);
965                 else
966                         print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
967         } else if (
968                 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
969                 evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
970                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
971                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
972
973                 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
974                         print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
975                 else
976                         print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
977         } else if (
978                 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
979                 evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
980                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
981                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
982
983                 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
984                         print_l1_icache_misses(config, cpu, evsel, avg, out, st);
985                 else
986                         print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
987         } else if (
988                 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
989                 evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
990                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
991                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
992
993                 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
994                         print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
995                 else
996                         print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
997         } else if (
998                 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
999                 evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
1000                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1001                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1002
1003                 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
1004                         print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
1005                 else
1006                         print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
1007         } else if (
1008                 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1009                 evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
1010                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1011                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1012
1013                 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
1014                         print_ll_cache_misses(config, cpu, evsel, avg, out, st);
1015                 else
1016                         print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
1017         } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
1018                 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
1019
1020                 if (total)
1021                         ratio = avg * 100 / total;
1022
1023                 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
1024                         print_metric(config, ctxp, NULL, "%8.3f %%",
1025                                      "of all cache refs", ratio);
1026                 else
1027                         print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
1028         } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1029                 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
1030         } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1031                 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
1032         } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1033                 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1034
1035                 if (total) {
1036                         ratio = avg / total;
1037                         print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
1038                 } else {
1039                         print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
1040                 }
1041         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
1042                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
1043
1044                 if (total)
1045                         print_metric(config, ctxp, NULL,
1046                                         "%7.2f%%", "transactional cycles",
1047                                         100.0 * (avg / total));
1048                 else
1049                         print_metric(config, ctxp, NULL, NULL, "transactional cycles",
1050                                      0);
1051         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
1052                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
1053                 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
1054
1055                 if (total2 < avg)
1056                         total2 = avg;
1057                 if (total)
1058                         print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
1059                                 100.0 * ((total2-avg) / total));
1060                 else
1061                         print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
1062         } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
1063                 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
1064                                          ctx, cpu);
1065
1066                 if (avg)
1067                         ratio = total / avg;
1068
1069                 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
1070                         print_metric(config, ctxp, NULL, "%8.0f",
1071                                      "cycles / transaction", ratio);
1072                 else
1073                         print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
1074                                       0);
1075         } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
1076                 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
1077                                          ctx, cpu);
1078
1079                 if (avg)
1080                         ratio = total / avg;
1081
1082                 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
1083         } else if (evsel__is_clock(evsel)) {
1084                 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
1085                         print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
1086                                      avg / (ratio * evsel->scale));
1087                 else
1088                         print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
1089         } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
1090                 double fe_bound = td_fe_bound(ctx, cpu, st);
1091
1092                 if (fe_bound > 0.2)
1093                         color = PERF_COLOR_RED;
1094                 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1095                                 fe_bound * 100.);
1096         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
1097                 double retiring = td_retiring(ctx, cpu, st);
1098
1099                 if (retiring > 0.7)
1100                         color = PERF_COLOR_GREEN;
1101                 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1102                                 retiring * 100.);
1103         } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
1104                 double bad_spec = td_bad_spec(ctx, cpu, st);
1105
1106                 if (bad_spec > 0.1)
1107                         color = PERF_COLOR_RED;
1108                 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1109                                 bad_spec * 100.);
1110         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
1111                 double be_bound = td_be_bound(ctx, cpu, st);
1112                 const char *name = "backend bound";
1113                 static int have_recovery_bubbles = -1;
1114
1115                 /* In case the CPU does not support topdown-recovery-bubbles */
1116                 if (have_recovery_bubbles < 0)
1117                         have_recovery_bubbles = pmu_have_event("cpu",
1118                                         "topdown-recovery-bubbles");
1119                 if (!have_recovery_bubbles)
1120                         name = "backend bound/bad spec";
1121
1122                 if (be_bound > 0.2)
1123                         color = PERF_COLOR_RED;
1124                 if (td_total_slots(ctx, cpu, st) > 0)
1125                         print_metric(config, ctxp, color, "%8.1f%%", name,
1126                                         be_bound * 100.);
1127                 else
1128                         print_metric(config, ctxp, NULL, NULL, name, 0);
1129         } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
1130                         full_td(ctx, cpu, st)) {
1131                 double retiring = td_metric_ratio(ctx, cpu,
1132                                                   STAT_TOPDOWN_RETIRING, st);
1133
1134                 if (retiring > 0.7)
1135                         color = PERF_COLOR_GREEN;
1136                 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1137                                 retiring * 100.);
1138         } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
1139                         full_td(ctx, cpu, st)) {
1140                 double fe_bound = td_metric_ratio(ctx, cpu,
1141                                                   STAT_TOPDOWN_FE_BOUND, st);
1142
1143                 if (fe_bound > 0.2)
1144                         color = PERF_COLOR_RED;
1145                 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1146                                 fe_bound * 100.);
1147         } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
1148                         full_td(ctx, cpu, st)) {
1149                 double be_bound = td_metric_ratio(ctx, cpu,
1150                                                   STAT_TOPDOWN_BE_BOUND, st);
1151
1152                 if (be_bound > 0.2)
1153                         color = PERF_COLOR_RED;
1154                 print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
1155                                 be_bound * 100.);
1156         } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
1157                         full_td(ctx, cpu, st)) {
1158                 double bad_spec = td_metric_ratio(ctx, cpu,
1159                                                   STAT_TOPDOWN_BAD_SPEC, st);
1160
1161                 if (bad_spec > 0.1)
1162                         color = PERF_COLOR_RED;
1163                 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1164                                 bad_spec * 100.);
1165         } else if (evsel->metric_expr) {
1166                 generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
1167                                 evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
1168         } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
1169                 char unit = 'M';
1170                 char unit_buf[10];
1171
1172                 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1173
1174                 if (total)
1175                         ratio = 1000.0 * avg / total;
1176                 if (ratio < 0.001) {
1177                         ratio *= 1000;
1178                         unit = 'K';
1179                 }
1180                 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1181                 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1182         } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1183                 print_smi_cost(config, cpu, evsel, out, st);
1184         } else {
1185                 num = 0;
1186         }
1187
1188         if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1189                 struct metric_expr *mexp;
1190
1191                 list_for_each_entry (mexp, &me->head, nd) {
1192                         if (num++ > 0)
1193                                 out->new_line(config, ctxp);
1194                         generic_metric(config, mexp->metric_expr, mexp->metric_events,
1195                                         mexp->metric_refs, evsel->name, mexp->metric_name,
1196                                         mexp->metric_unit, mexp->runtime, cpu, out, st);
1197                 }
1198         }
1199         if (num == 0)
1200                 print_metric(config, ctxp, NULL, NULL, NULL, 0);
1201 }