Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu
[linux-2.6-microblaze.git] / tools / perf / util / stat-shadow.c
1 #include <stdio.h>
2 #include "evsel.h"
3 #include "stat.h"
4 #include "color.h"
5 #include "pmu.h"
6 #include "rblist.h"
7 #include "evlist.h"
8 #include "expr.h"
9
10 enum {
11         CTX_BIT_USER    = 1 << 0,
12         CTX_BIT_KERNEL  = 1 << 1,
13         CTX_BIT_HV      = 1 << 2,
14         CTX_BIT_HOST    = 1 << 3,
15         CTX_BIT_IDLE    = 1 << 4,
16         CTX_BIT_MAX     = 1 << 5,
17 };
18
19 #define NUM_CTX CTX_BIT_MAX
20
21 /*
22  * AGGR_GLOBAL: Use CPU 0
23  * AGGR_SOCKET: Use first CPU of socket
24  * AGGR_CORE: Use first CPU of core
25  * AGGR_NONE: Use matching CPU
26  * AGGR_THREAD: Not supported?
27  */
28 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
29 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
30 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
31 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
32 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
33 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
34 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
35 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
36 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
37 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
38 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
39 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
40 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
41 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
42 static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
43 static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
44 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
45 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
46 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
47 static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
48 static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
49 static struct rblist runtime_saved_values;
50 static bool have_frontend_stalled;
51
52 struct stats walltime_nsecs_stats;
53
54 struct saved_value {
55         struct rb_node rb_node;
56         struct perf_evsel *evsel;
57         int cpu;
58         int ctx;
59         struct stats stats;
60 };
61
62 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
63 {
64         struct saved_value *a = container_of(rb_node,
65                                              struct saved_value,
66                                              rb_node);
67         const struct saved_value *b = entry;
68
69         if (a->ctx != b->ctx)
70                 return a->ctx - b->ctx;
71         if (a->cpu != b->cpu)
72                 return a->cpu - b->cpu;
73         if (a->evsel == b->evsel)
74                 return 0;
75         if ((char *)a->evsel < (char *)b->evsel)
76                 return -1;
77         return +1;
78 }
79
80 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
81                                      const void *entry)
82 {
83         struct saved_value *nd = malloc(sizeof(struct saved_value));
84
85         if (!nd)
86                 return NULL;
87         memcpy(nd, entry, sizeof(struct saved_value));
88         return &nd->rb_node;
89 }
90
91 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
92                                               int cpu, int ctx,
93                                               bool create)
94 {
95         struct rb_node *nd;
96         struct saved_value dm = {
97                 .cpu = cpu,
98                 .ctx = ctx,
99                 .evsel = evsel,
100         };
101         nd = rblist__find(&runtime_saved_values, &dm);
102         if (nd)
103                 return container_of(nd, struct saved_value, rb_node);
104         if (create) {
105                 rblist__add_node(&runtime_saved_values, &dm);
106                 nd = rblist__find(&runtime_saved_values, &dm);
107                 if (nd)
108                         return container_of(nd, struct saved_value, rb_node);
109         }
110         return NULL;
111 }
112
113 void perf_stat__init_shadow_stats(void)
114 {
115         have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
116         rblist__init(&runtime_saved_values);
117         runtime_saved_values.node_cmp = saved_value_cmp;
118         runtime_saved_values.node_new = saved_value_new;
119         /* No delete for now */
120 }
121
122 static int evsel_context(struct perf_evsel *evsel)
123 {
124         int ctx = 0;
125
126         if (evsel->attr.exclude_kernel)
127                 ctx |= CTX_BIT_KERNEL;
128         if (evsel->attr.exclude_user)
129                 ctx |= CTX_BIT_USER;
130         if (evsel->attr.exclude_hv)
131                 ctx |= CTX_BIT_HV;
132         if (evsel->attr.exclude_host)
133                 ctx |= CTX_BIT_HOST;
134         if (evsel->attr.exclude_idle)
135                 ctx |= CTX_BIT_IDLE;
136
137         return ctx;
138 }
139
140 void perf_stat__reset_shadow_stats(void)
141 {
142         struct rb_node *pos, *next;
143
144         memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
145         memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
146         memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
147         memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
148         memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
149         memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
150         memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
151         memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
152         memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
153         memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
154         memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
155         memset(runtime_cycles_in_tx_stats, 0,
156                         sizeof(runtime_cycles_in_tx_stats));
157         memset(runtime_transaction_stats, 0,
158                 sizeof(runtime_transaction_stats));
159         memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
160         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
161         memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
162         memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
163         memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
164         memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
165         memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
166         memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
167         memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
168
169         next = rb_first(&runtime_saved_values.entries);
170         while (next) {
171                 pos = next;
172                 next = rb_next(pos);
173                 memset(&container_of(pos, struct saved_value, rb_node)->stats,
174                        0,
175                        sizeof(struct stats));
176         }
177 }
178
179 /*
180  * Update various tracking values we maintain to print
181  * more semantic information such as miss/hit ratios,
182  * instruction rates, etc:
183  */
184 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
185                                     int cpu)
186 {
187         int ctx = evsel_context(counter);
188
189         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
190             perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
191                 update_stats(&runtime_nsecs_stats[cpu], count[0]);
192         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
193                 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
194         else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
195                 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
196         else if (perf_stat_evsel__is(counter, TRANSACTION_START))
197                 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
198         else if (perf_stat_evsel__is(counter, ELISION_START))
199                 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
200         else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
201                 update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
202         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
203                 update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
204         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
205                 update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
206         else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
207                 update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
208         else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
209                 update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
210         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
211                 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
212         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
213                 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
214         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
215                 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
216         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
217                 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
218         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
219                 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
220         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
221                 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
222         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
223                 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
224         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
225                 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
226         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
227                 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
228         else if (perf_stat_evsel__is(counter, SMI_NUM))
229                 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
230         else if (perf_stat_evsel__is(counter, APERF))
231                 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
232
233         if (counter->collect_stat) {
234                 struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
235                                                            true);
236                 update_stats(&v->stats, count[0]);
237         }
238 }
239
240 /* used for get_ratio_color() */
241 enum grc_type {
242         GRC_STALLED_CYCLES_FE,
243         GRC_STALLED_CYCLES_BE,
244         GRC_CACHE_MISSES,
245         GRC_MAX_NR
246 };
247
248 static const char *get_ratio_color(enum grc_type type, double ratio)
249 {
250         static const double grc_table[GRC_MAX_NR][3] = {
251                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
252                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
253                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
254         };
255         const char *color = PERF_COLOR_NORMAL;
256
257         if (ratio > grc_table[type][0])
258                 color = PERF_COLOR_RED;
259         else if (ratio > grc_table[type][1])
260                 color = PERF_COLOR_MAGENTA;
261         else if (ratio > grc_table[type][2])
262                 color = PERF_COLOR_YELLOW;
263
264         return color;
265 }
266
267 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
268                                                 const char *name)
269 {
270         struct perf_evsel *c2;
271
272         evlist__for_each_entry (evsel_list, c2) {
273                 if (!strcasecmp(c2->name, name))
274                         return c2;
275         }
276         return NULL;
277 }
278
279 /* Mark MetricExpr target events and link events using them to them. */
280 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
281 {
282         struct perf_evsel *counter, *leader, **metric_events, *oc;
283         bool found;
284         const char **metric_names;
285         int i;
286         int num_metric_names;
287
288         evlist__for_each_entry(evsel_list, counter) {
289                 bool invalid = false;
290
291                 leader = counter->leader;
292                 if (!counter->metric_expr)
293                         continue;
294                 metric_events = counter->metric_events;
295                 if (!metric_events) {
296                         if (expr__find_other(counter->metric_expr, counter->name,
297                                                 &metric_names, &num_metric_names) < 0)
298                                 continue;
299
300                         metric_events = calloc(sizeof(struct perf_evsel *),
301                                                num_metric_names + 1);
302                         if (!metric_events)
303                                 return;
304                         counter->metric_events = metric_events;
305                 }
306
307                 for (i = 0; i < num_metric_names; i++) {
308                         found = false;
309                         if (leader) {
310                                 /* Search in group */
311                                 for_each_group_member (oc, leader) {
312                                         if (!strcasecmp(oc->name, metric_names[i])) {
313                                                 found = true;
314                                                 break;
315                                         }
316                                 }
317                         }
318                         if (!found) {
319                                 /* Search ignoring groups */
320                                 oc = perf_stat__find_event(evsel_list, metric_names[i]);
321                         }
322                         if (!oc) {
323                                 /* Deduping one is good enough to handle duplicated PMUs. */
324                                 static char *printed;
325
326                                 /*
327                                  * Adding events automatically would be difficult, because
328                                  * it would risk creating groups that are not schedulable.
329                                  * perf stat doesn't understand all the scheduling constraints
330                                  * of events. So we ask the user instead to add the missing
331                                  * events.
332                                  */
333                                 if (!printed || strcasecmp(printed, metric_names[i])) {
334                                         fprintf(stderr,
335                                                 "Add %s event to groups to get metric expression for %s\n",
336                                                 metric_names[i],
337                                                 counter->name);
338                                         printed = strdup(metric_names[i]);
339                                 }
340                                 invalid = true;
341                                 continue;
342                         }
343                         metric_events[i] = oc;
344                         oc->collect_stat = true;
345                 }
346                 metric_events[i] = NULL;
347                 free(metric_names);
348                 if (invalid) {
349                         free(metric_events);
350                         counter->metric_events = NULL;
351                         counter->metric_expr = NULL;
352                 }
353         }
354 }
355
356 static void print_stalled_cycles_frontend(int cpu,
357                                           struct perf_evsel *evsel, double avg,
358                                           struct perf_stat_output_ctx *out)
359 {
360         double total, ratio = 0.0;
361         const char *color;
362         int ctx = evsel_context(evsel);
363
364         total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
365
366         if (total)
367                 ratio = avg / total * 100.0;
368
369         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
370
371         if (ratio)
372                 out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
373                                   ratio);
374         else
375                 out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
376 }
377
378 static void print_stalled_cycles_backend(int cpu,
379                                          struct perf_evsel *evsel, double avg,
380                                          struct perf_stat_output_ctx *out)
381 {
382         double total, ratio = 0.0;
383         const char *color;
384         int ctx = evsel_context(evsel);
385
386         total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
387
388         if (total)
389                 ratio = avg / total * 100.0;
390
391         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
392
393         out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
394 }
395
396 static void print_branch_misses(int cpu,
397                                 struct perf_evsel *evsel,
398                                 double avg,
399                                 struct perf_stat_output_ctx *out)
400 {
401         double total, ratio = 0.0;
402         const char *color;
403         int ctx = evsel_context(evsel);
404
405         total = avg_stats(&runtime_branches_stats[ctx][cpu]);
406
407         if (total)
408                 ratio = avg / total * 100.0;
409
410         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
411
412         out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
413 }
414
415 static void print_l1_dcache_misses(int cpu,
416                                    struct perf_evsel *evsel,
417                                    double avg,
418                                    struct perf_stat_output_ctx *out)
419 {
420         double total, ratio = 0.0;
421         const char *color;
422         int ctx = evsel_context(evsel);
423
424         total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
425
426         if (total)
427                 ratio = avg / total * 100.0;
428
429         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
430
431         out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
432 }
433
434 static void print_l1_icache_misses(int cpu,
435                                    struct perf_evsel *evsel,
436                                    double avg,
437                                    struct perf_stat_output_ctx *out)
438 {
439         double total, ratio = 0.0;
440         const char *color;
441         int ctx = evsel_context(evsel);
442
443         total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
444
445         if (total)
446                 ratio = avg / total * 100.0;
447
448         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
449         out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
450 }
451
452 static void print_dtlb_cache_misses(int cpu,
453                                     struct perf_evsel *evsel,
454                                     double avg,
455                                     struct perf_stat_output_ctx *out)
456 {
457         double total, ratio = 0.0;
458         const char *color;
459         int ctx = evsel_context(evsel);
460
461         total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
462
463         if (total)
464                 ratio = avg / total * 100.0;
465
466         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
467         out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
468 }
469
470 static void print_itlb_cache_misses(int cpu,
471                                     struct perf_evsel *evsel,
472                                     double avg,
473                                     struct perf_stat_output_ctx *out)
474 {
475         double total, ratio = 0.0;
476         const char *color;
477         int ctx = evsel_context(evsel);
478
479         total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
480
481         if (total)
482                 ratio = avg / total * 100.0;
483
484         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
485         out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
486 }
487
488 static void print_ll_cache_misses(int cpu,
489                                   struct perf_evsel *evsel,
490                                   double avg,
491                                   struct perf_stat_output_ctx *out)
492 {
493         double total, ratio = 0.0;
494         const char *color;
495         int ctx = evsel_context(evsel);
496
497         total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
498
499         if (total)
500                 ratio = avg / total * 100.0;
501
502         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
503         out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
504 }
505
506 /*
507  * High level "TopDown" CPU core pipe line bottleneck break down.
508  *
509  * Basic concept following
510  * Yasin, A Top Down Method for Performance analysis and Counter architecture
511  * ISPASS14
512  *
513  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
514  *
515  * Frontend -> Backend -> Retiring
516  * BadSpeculation in addition means out of order execution that is thrown away
517  * (for example branch mispredictions)
518  * Frontend is instruction decoding.
519  * Backend is execution, like computation and accessing data in memory
520  * Retiring is good execution that is not directly bottlenecked
521  *
522  * The formulas are computed in slots.
523  * A slot is an entry in the pipeline each for the pipeline width
524  * (for example a 4-wide pipeline has 4 slots for each cycle)
525  *
526  * Formulas:
527  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
528  *                      TotalSlots
529  * Retiring = SlotsRetired / TotalSlots
530  * FrontendBound = FetchBubbles / TotalSlots
531  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
532  *
533  * The kernel provides the mapping to the low level CPU events and any scaling
534  * needed for the CPU pipeline width, for example:
535  *
536  * TotalSlots = Cycles * 4
537  *
538  * The scaling factor is communicated in the sysfs unit.
539  *
540  * In some cases the CPU may not be able to measure all the formulas due to
541  * missing events. In this case multiple formulas are combined, as possible.
542  *
543  * Full TopDown supports more levels to sub-divide each area: for example
544  * BackendBound into computing bound and memory bound. For now we only
545  * support Level 1 TopDown.
546  */
547
548 static double sanitize_val(double x)
549 {
550         if (x < 0 && x >= -0.02)
551                 return 0.0;
552         return x;
553 }
554
555 static double td_total_slots(int ctx, int cpu)
556 {
557         return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
558 }
559
560 static double td_bad_spec(int ctx, int cpu)
561 {
562         double bad_spec = 0;
563         double total_slots;
564         double total;
565
566         total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
567                 avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
568                 avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
569         total_slots = td_total_slots(ctx, cpu);
570         if (total_slots)
571                 bad_spec = total / total_slots;
572         return sanitize_val(bad_spec);
573 }
574
575 static double td_retiring(int ctx, int cpu)
576 {
577         double retiring = 0;
578         double total_slots = td_total_slots(ctx, cpu);
579         double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
580
581         if (total_slots)
582                 retiring = ret_slots / total_slots;
583         return retiring;
584 }
585
586 static double td_fe_bound(int ctx, int cpu)
587 {
588         double fe_bound = 0;
589         double total_slots = td_total_slots(ctx, cpu);
590         double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
591
592         if (total_slots)
593                 fe_bound = fetch_bub / total_slots;
594         return fe_bound;
595 }
596
597 static double td_be_bound(int ctx, int cpu)
598 {
599         double sum = (td_fe_bound(ctx, cpu) +
600                       td_bad_spec(ctx, cpu) +
601                       td_retiring(ctx, cpu));
602         if (sum == 0)
603                 return 0;
604         return sanitize_val(1.0 - sum);
605 }
606
607 static void print_smi_cost(int cpu, struct perf_evsel *evsel,
608                            struct perf_stat_output_ctx *out)
609 {
610         double smi_num, aperf, cycles, cost = 0.0;
611         int ctx = evsel_context(evsel);
612         const char *color = NULL;
613
614         smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
615         aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
616         cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
617
618         if ((cycles == 0) || (aperf == 0))
619                 return;
620
621         if (smi_num)
622                 cost = (aperf - cycles) / aperf * 100.00;
623
624         if (cost > 10)
625                 color = PERF_COLOR_RED;
626         out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
627         out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
628 }
629
630 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
631                                    double avg, int cpu,
632                                    struct perf_stat_output_ctx *out)
633 {
634         void *ctxp = out->ctx;
635         print_metric_t print_metric = out->print_metric;
636         double total, ratio = 0.0, total2;
637         const char *color = NULL;
638         int ctx = evsel_context(evsel);
639
640         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
641                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
642                 if (total) {
643                         ratio = avg / total;
644                         print_metric(ctxp, NULL, "%7.2f ",
645                                         "insn per cycle", ratio);
646                 } else {
647                         print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
648                 }
649                 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
650                 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
651
652                 if (total && avg) {
653                         out->new_line(ctxp);
654                         ratio = total / avg;
655                         print_metric(ctxp, NULL, "%7.2f ",
656                                         "stalled cycles per insn",
657                                         ratio);
658                 } else if (have_frontend_stalled) {
659                         print_metric(ctxp, NULL, NULL,
660                                      "stalled cycles per insn", 0);
661                 }
662         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
663                 if (runtime_branches_stats[ctx][cpu].n != 0)
664                         print_branch_misses(cpu, evsel, avg, out);
665                 else
666                         print_metric(ctxp, NULL, NULL, "of all branches", 0);
667         } else if (
668                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
669                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
670                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
671                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
672                 if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
673                         print_l1_dcache_misses(cpu, evsel, avg, out);
674                 else
675                         print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
676         } else if (
677                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
678                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
679                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
680                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
681                 if (runtime_l1_icache_stats[ctx][cpu].n != 0)
682                         print_l1_icache_misses(cpu, evsel, avg, out);
683                 else
684                         print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
685         } else if (
686                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
687                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
688                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
689                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
690                 if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
691                         print_dtlb_cache_misses(cpu, evsel, avg, out);
692                 else
693                         print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
694         } else if (
695                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
696                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
697                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
698                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
699                 if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
700                         print_itlb_cache_misses(cpu, evsel, avg, out);
701                 else
702                         print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
703         } else if (
704                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
705                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
706                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
707                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
708                 if (runtime_ll_cache_stats[ctx][cpu].n != 0)
709                         print_ll_cache_misses(cpu, evsel, avg, out);
710                 else
711                         print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
712         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
713                 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
714
715                 if (total)
716                         ratio = avg * 100 / total;
717
718                 if (runtime_cacherefs_stats[ctx][cpu].n != 0)
719                         print_metric(ctxp, NULL, "%8.3f %%",
720                                      "of all cache refs", ratio);
721                 else
722                         print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
723         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
724                 print_stalled_cycles_frontend(cpu, evsel, avg, out);
725         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
726                 print_stalled_cycles_backend(cpu, evsel, avg, out);
727         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
728                 total = avg_stats(&runtime_nsecs_stats[cpu]);
729
730                 if (total) {
731                         ratio = avg / total;
732                         print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
733                 } else {
734                         print_metric(ctxp, NULL, NULL, "Ghz", 0);
735                 }
736         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
737                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
738                 if (total)
739                         print_metric(ctxp, NULL,
740                                         "%7.2f%%", "transactional cycles",
741                                         100.0 * (avg / total));
742                 else
743                         print_metric(ctxp, NULL, NULL, "transactional cycles",
744                                      0);
745         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
746                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
747                 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
748                 if (total2 < avg)
749                         total2 = avg;
750                 if (total)
751                         print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
752                                 100.0 * ((total2-avg) / total));
753                 else
754                         print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
755         } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
756                 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
757
758                 if (avg)
759                         ratio = total / avg;
760
761                 if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
762                         print_metric(ctxp, NULL, "%8.0f",
763                                      "cycles / transaction", ratio);
764                 else
765                         print_metric(ctxp, NULL, NULL, "cycles / transaction",
766                                      0);
767         } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
768                 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
769
770                 if (avg)
771                         ratio = total / avg;
772
773                 print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
774         } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
775                    perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
776                 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
777                         print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
778                                      avg / ratio);
779                 else
780                         print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
781         } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
782                 double fe_bound = td_fe_bound(ctx, cpu);
783
784                 if (fe_bound > 0.2)
785                         color = PERF_COLOR_RED;
786                 print_metric(ctxp, color, "%8.1f%%", "frontend bound",
787                                 fe_bound * 100.);
788         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
789                 double retiring = td_retiring(ctx, cpu);
790
791                 if (retiring > 0.7)
792                         color = PERF_COLOR_GREEN;
793                 print_metric(ctxp, color, "%8.1f%%", "retiring",
794                                 retiring * 100.);
795         } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
796                 double bad_spec = td_bad_spec(ctx, cpu);
797
798                 if (bad_spec > 0.1)
799                         color = PERF_COLOR_RED;
800                 print_metric(ctxp, color, "%8.1f%%", "bad speculation",
801                                 bad_spec * 100.);
802         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
803                 double be_bound = td_be_bound(ctx, cpu);
804                 const char *name = "backend bound";
805                 static int have_recovery_bubbles = -1;
806
807                 /* In case the CPU does not support topdown-recovery-bubbles */
808                 if (have_recovery_bubbles < 0)
809                         have_recovery_bubbles = pmu_have_event("cpu",
810                                         "topdown-recovery-bubbles");
811                 if (!have_recovery_bubbles)
812                         name = "backend bound/bad spec";
813
814                 if (be_bound > 0.2)
815                         color = PERF_COLOR_RED;
816                 if (td_total_slots(ctx, cpu) > 0)
817                         print_metric(ctxp, color, "%8.1f%%", name,
818                                         be_bound * 100.);
819                 else
820                         print_metric(ctxp, NULL, NULL, name, 0);
821         } else if (evsel->metric_expr) {
822                 struct parse_ctx pctx;
823                 int i;
824
825                 expr__ctx_init(&pctx);
826                 expr__add_id(&pctx, evsel->name, avg);
827                 for (i = 0; evsel->metric_events[i]; i++) {
828                         struct saved_value *v;
829
830                         v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
831                         if (!v)
832                                 break;
833                         expr__add_id(&pctx, evsel->metric_events[i]->name,
834                                              avg_stats(&v->stats));
835                 }
836                 if (!evsel->metric_events[i]) {
837                         const char *p = evsel->metric_expr;
838
839                         if (expr__parse(&ratio, &pctx, &p) == 0)
840                                 print_metric(ctxp, NULL, "%8.1f",
841                                         evsel->metric_name ?
842                                         evsel->metric_name :
843                                         out->force_header ?  evsel->name : "",
844                                         ratio);
845                         else
846                                 print_metric(ctxp, NULL, NULL, "", 0);
847                 } else
848                         print_metric(ctxp, NULL, NULL, "", 0);
849         } else if (runtime_nsecs_stats[cpu].n != 0) {
850                 char unit = 'M';
851                 char unit_buf[10];
852
853                 total = avg_stats(&runtime_nsecs_stats[cpu]);
854
855                 if (total)
856                         ratio = 1000.0 * avg / total;
857                 if (ratio < 0.001) {
858                         ratio *= 1000;
859                         unit = 'K';
860                 }
861                 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
862                 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
863         } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
864                 print_smi_cost(cpu, evsel, out);
865         } else {
866                 print_metric(ctxp, NULL, NULL, NULL, 0);
867         }
868 }