Merge branches 'clk-of-refcount', 'clk-mmio-fixed-clock', 'clk-remove-clps', 'clk...
[linux-2.6-microblaze.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ./hackbench 10
10
11   Time: 0.118
12
13   Performance counter stats for './hackbench 10':
14
15        1708.761321 task-clock                #   11.037 CPUs utilized
16             41,190 context-switches          #    0.024 M/sec
17              6,735 CPU-migrations            #    0.004 M/sec
18             17,318 page-faults               #    0.010 M/sec
19      5,205,202,243 cycles                    #    3.046 GHz
20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
22      2,603,501,247 instructions              #    0.50  insns per cycle
23                                              #    1.48  stalled cycles per insn
24        484,357,498 branches                  #  283.455 M/sec
25          6,388,934 branch-misses             #    1.32% of all branches
26
27         0.154822978  seconds time elapsed
28
29  *
30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31  *
32  * Improvements and fixes by:
33  *
34  *   Arjan van de Ven <arjan@linux.intel.com>
35  *   Yanmin Zhang <yanmin.zhang@intel.com>
36  *   Wu Fengguang <fengguang.wu@intel.com>
37  *   Mike Galbraith <efault@gmx.de>
38  *   Paul Mackerras <paulus@samba.org>
39  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40  *
41  * Released under the GPL v2. (and only v2, not any later version)
42  */
43
44 #include "perf.h"
45 #include "builtin.h"
46 #include "util/cgroup.h"
47 #include "util/util.h"
48 #include <subcmd/parse-options.h>
49 #include "util/parse-events.h"
50 #include "util/pmu.h"
51 #include "util/event.h"
52 #include "util/evlist.h"
53 #include "util/evsel.h"
54 #include "util/debug.h"
55 #include "util/drv_configs.h"
56 #include "util/color.h"
57 #include "util/stat.h"
58 #include "util/header.h"
59 #include "util/cpumap.h"
60 #include "util/thread.h"
61 #include "util/thread_map.h"
62 #include "util/counts.h"
63 #include "util/group.h"
64 #include "util/session.h"
65 #include "util/tool.h"
66 #include "util/string2.h"
67 #include "util/metricgroup.h"
68 #include "util/top.h"
69 #include "asm/bug.h"
70
71 #include <linux/time64.h>
72 #include <api/fs/fs.h>
73 #include <errno.h>
74 #include <signal.h>
75 #include <stdlib.h>
76 #include <sys/prctl.h>
77 #include <inttypes.h>
78 #include <locale.h>
79 #include <math.h>
80 #include <sys/types.h>
81 #include <sys/stat.h>
82 #include <sys/wait.h>
83 #include <unistd.h>
84 #include <sys/time.h>
85 #include <sys/resource.h>
86 #include <sys/wait.h>
87
88 #include "sane_ctype.h"
89
90 #define DEFAULT_SEPARATOR       " "
91 #define FREEZE_ON_SMI_PATH      "devices/cpu/freeze_on_smi"
92
93 static void print_counters(struct timespec *ts, int argc, const char **argv);
94
95 /* Default events used for perf stat -T */
96 static const char *transaction_attrs = {
97         "task-clock,"
98         "{"
99         "instructions,"
100         "cycles,"
101         "cpu/cycles-t/,"
102         "cpu/tx-start/,"
103         "cpu/el-start/,"
104         "cpu/cycles-ct/"
105         "}"
106 };
107
108 /* More limited version when the CPU does not have all events. */
109 static const char * transaction_limited_attrs = {
110         "task-clock,"
111         "{"
112         "instructions,"
113         "cycles,"
114         "cpu/cycles-t/,"
115         "cpu/tx-start/"
116         "}"
117 };
118
119 static const char * topdown_attrs[] = {
120         "topdown-total-slots",
121         "topdown-slots-retired",
122         "topdown-recovery-bubbles",
123         "topdown-fetch-bubbles",
124         "topdown-slots-issued",
125         NULL,
126 };
127
128 static const char *smi_cost_attrs = {
129         "{"
130         "msr/aperf/,"
131         "msr/smi/,"
132         "cycles"
133         "}"
134 };
135
136 static struct perf_evlist       *evsel_list;
137
138 static struct target target = {
139         .uid    = UINT_MAX,
140 };
141
142 #define METRIC_ONLY_LEN 20
143
144 static volatile pid_t           child_pid                       = -1;
145 static int                      detailed_run                    =  0;
146 static bool                     transaction_run;
147 static bool                     topdown_run                     = false;
148 static bool                     smi_cost                        = false;
149 static bool                     smi_reset                       = false;
150 static int                      big_num_opt                     =  -1;
151 static bool                     group                           = false;
152 static const char               *pre_cmd                        = NULL;
153 static const char               *post_cmd                       = NULL;
154 static bool                     sync_run                        = false;
155 static bool                     forever                         = false;
156 static bool                     force_metric_only               = false;
157 static struct timespec          ref_time;
158 static bool                     append_file;
159 static bool                     interval_count;
160 static const char               *output_name;
161 static int                      output_fd;
162
163 struct perf_stat {
164         bool                     record;
165         struct perf_data         data;
166         struct perf_session     *session;
167         u64                      bytes_written;
168         struct perf_tool         tool;
169         bool                     maps_allocated;
170         struct cpu_map          *cpus;
171         struct thread_map       *threads;
172         enum aggr_mode           aggr_mode;
173 };
174
175 static struct perf_stat         perf_stat;
176 #define STAT_RECORD             perf_stat.record
177
178 static volatile int done = 0;
179
180 static struct perf_stat_config stat_config = {
181         .aggr_mode              = AGGR_GLOBAL,
182         .scale                  = true,
183         .unit_width             = 4, /* strlen("unit") */
184         .run_count              = 1,
185         .metric_only_len        = METRIC_ONLY_LEN,
186         .walltime_nsecs_stats   = &walltime_nsecs_stats,
187         .big_num                = true,
188 };
189
190 static inline void diff_timespec(struct timespec *r, struct timespec *a,
191                                  struct timespec *b)
192 {
193         r->tv_sec = a->tv_sec - b->tv_sec;
194         if (a->tv_nsec < b->tv_nsec) {
195                 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
196                 r->tv_sec--;
197         } else {
198                 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
199         }
200 }
201
202 static void perf_stat__reset_stats(void)
203 {
204         int i;
205
206         perf_evlist__reset_stats(evsel_list);
207         perf_stat__reset_shadow_stats();
208
209         for (i = 0; i < stat_config.stats_num; i++)
210                 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
211 }
212
213 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
214                                      union perf_event *event,
215                                      struct perf_sample *sample __maybe_unused,
216                                      struct machine *machine __maybe_unused)
217 {
218         if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
219                 pr_err("failed to write perf data, error: %m\n");
220                 return -1;
221         }
222
223         perf_stat.bytes_written += event->header.size;
224         return 0;
225 }
226
227 static int write_stat_round_event(u64 tm, u64 type)
228 {
229         return perf_event__synthesize_stat_round(NULL, tm, type,
230                                                  process_synthesized_event,
231                                                  NULL);
232 }
233
234 #define WRITE_STAT_ROUND_EVENT(time, interval) \
235         write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
236
237 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
238
239 static int
240 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
241                              struct perf_counts_values *count)
242 {
243         struct perf_sample_id *sid = SID(counter, cpu, thread);
244
245         return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
246                                            process_synthesized_event, NULL);
247 }
248
249 /*
250  * Read out the results of a single counter:
251  * do not aggregate counts across CPUs in system-wide mode
252  */
253 static int read_counter(struct perf_evsel *counter)
254 {
255         int nthreads = thread_map__nr(evsel_list->threads);
256         int ncpus, cpu, thread;
257
258         if (target__has_cpu(&target) && !target__has_per_thread(&target))
259                 ncpus = perf_evsel__nr_cpus(counter);
260         else
261                 ncpus = 1;
262
263         if (!counter->supported)
264                 return -ENOENT;
265
266         if (counter->system_wide)
267                 nthreads = 1;
268
269         for (thread = 0; thread < nthreads; thread++) {
270                 for (cpu = 0; cpu < ncpus; cpu++) {
271                         struct perf_counts_values *count;
272
273                         count = perf_counts(counter->counts, cpu, thread);
274
275                         /*
276                          * The leader's group read loads data into its group members
277                          * (via perf_evsel__read_counter) and sets threir count->loaded.
278                          */
279                         if (!count->loaded &&
280                             perf_evsel__read_counter(counter, cpu, thread)) {
281                                 counter->counts->scaled = -1;
282                                 perf_counts(counter->counts, cpu, thread)->ena = 0;
283                                 perf_counts(counter->counts, cpu, thread)->run = 0;
284                                 return -1;
285                         }
286
287                         count->loaded = false;
288
289                         if (STAT_RECORD) {
290                                 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
291                                         pr_err("failed to write stat event\n");
292                                         return -1;
293                                 }
294                         }
295
296                         if (verbose > 1) {
297                                 fprintf(stat_config.output,
298                                         "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
299                                                 perf_evsel__name(counter),
300                                                 cpu,
301                                                 count->val, count->ena, count->run);
302                         }
303                 }
304         }
305
306         return 0;
307 }
308
309 static void read_counters(void)
310 {
311         struct perf_evsel *counter;
312         int ret;
313
314         evlist__for_each_entry(evsel_list, counter) {
315                 ret = read_counter(counter);
316                 if (ret)
317                         pr_debug("failed to read counter %s\n", counter->name);
318
319                 if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
320                         pr_warning("failed to process counter %s\n", counter->name);
321         }
322 }
323
324 static void process_interval(void)
325 {
326         struct timespec ts, rs;
327
328         read_counters();
329
330         clock_gettime(CLOCK_MONOTONIC, &ts);
331         diff_timespec(&rs, &ts, &ref_time);
332
333         if (STAT_RECORD) {
334                 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
335                         pr_err("failed to write stat round event\n");
336         }
337
338         init_stats(&walltime_nsecs_stats);
339         update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
340         print_counters(&rs, 0, NULL);
341 }
342
343 static void enable_counters(void)
344 {
345         if (stat_config.initial_delay)
346                 usleep(stat_config.initial_delay * USEC_PER_MSEC);
347
348         /*
349          * We need to enable counters only if:
350          * - we don't have tracee (attaching to task or cpu)
351          * - we have initial delay configured
352          */
353         if (!target__none(&target) || stat_config.initial_delay)
354                 perf_evlist__enable(evsel_list);
355 }
356
357 static void disable_counters(void)
358 {
359         /*
360          * If we don't have tracee (attaching to task or cpu), counters may
361          * still be running. To get accurate group ratios, we must stop groups
362          * from counting before reading their constituent counters.
363          */
364         if (!target__none(&target))
365                 perf_evlist__disable(evsel_list);
366 }
367
368 static volatile int workload_exec_errno;
369
370 /*
371  * perf_evlist__prepare_workload will send a SIGUSR1
372  * if the fork fails, since we asked by setting its
373  * want_signal to true.
374  */
375 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
376                                         void *ucontext __maybe_unused)
377 {
378         workload_exec_errno = info->si_value.sival_int;
379 }
380
381 static bool perf_evsel__should_store_id(struct perf_evsel *counter)
382 {
383         return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
384 }
385
386 static bool is_target_alive(struct target *_target,
387                             struct thread_map *threads)
388 {
389         struct stat st;
390         int i;
391
392         if (!target__has_task(_target))
393                 return true;
394
395         for (i = 0; i < threads->nr; i++) {
396                 char path[PATH_MAX];
397
398                 scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
399                           threads->map[i].pid);
400
401                 if (!stat(path, &st))
402                         return true;
403         }
404
405         return false;
406 }
407
408 static int __run_perf_stat(int argc, const char **argv, int run_idx)
409 {
410         int interval = stat_config.interval;
411         int times = stat_config.times;
412         int timeout = stat_config.timeout;
413         char msg[BUFSIZ];
414         unsigned long long t0, t1;
415         struct perf_evsel *counter;
416         struct timespec ts;
417         size_t l;
418         int status = 0;
419         const bool forks = (argc > 0);
420         bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
421         struct perf_evsel_config_term *err_term;
422
423         if (interval) {
424                 ts.tv_sec  = interval / USEC_PER_MSEC;
425                 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
426         } else if (timeout) {
427                 ts.tv_sec  = timeout / USEC_PER_MSEC;
428                 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
429         } else {
430                 ts.tv_sec  = 1;
431                 ts.tv_nsec = 0;
432         }
433
434         if (forks) {
435                 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
436                                                   workload_exec_failed_signal) < 0) {
437                         perror("failed to prepare workload");
438                         return -1;
439                 }
440                 child_pid = evsel_list->workload.pid;
441         }
442
443         if (group)
444                 perf_evlist__set_leader(evsel_list);
445
446         evlist__for_each_entry(evsel_list, counter) {
447 try_again:
448                 if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
449
450                         /* Weak group failed. Reset the group. */
451                         if ((errno == EINVAL || errno == EBADF) &&
452                             counter->leader != counter &&
453                             counter->weak_group) {
454                                 counter = perf_evlist__reset_weak_group(evsel_list, counter);
455                                 goto try_again;
456                         }
457
458                         /*
459                          * PPC returns ENXIO for HW counters until 2.6.37
460                          * (behavior changed with commit b0a873e).
461                          */
462                         if (errno == EINVAL || errno == ENOSYS ||
463                             errno == ENOENT || errno == EOPNOTSUPP ||
464                             errno == ENXIO) {
465                                 if (verbose > 0)
466                                         ui__warning("%s event is not supported by the kernel.\n",
467                                                     perf_evsel__name(counter));
468                                 counter->supported = false;
469
470                                 if ((counter->leader != counter) ||
471                                     !(counter->leader->nr_members > 1))
472                                         continue;
473                         } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
474                                 if (verbose > 0)
475                                         ui__warning("%s\n", msg);
476                                 goto try_again;
477                         } else if (target__has_per_thread(&target) &&
478                                    evsel_list->threads &&
479                                    evsel_list->threads->err_thread != -1) {
480                                 /*
481                                  * For global --per-thread case, skip current
482                                  * error thread.
483                                  */
484                                 if (!thread_map__remove(evsel_list->threads,
485                                                         evsel_list->threads->err_thread)) {
486                                         evsel_list->threads->err_thread = -1;
487                                         goto try_again;
488                                 }
489                         }
490
491                         perf_evsel__open_strerror(counter, &target,
492                                                   errno, msg, sizeof(msg));
493                         ui__error("%s\n", msg);
494
495                         if (child_pid != -1)
496                                 kill(child_pid, SIGTERM);
497
498                         return -1;
499                 }
500                 counter->supported = true;
501
502                 l = strlen(counter->unit);
503                 if (l > stat_config.unit_width)
504                         stat_config.unit_width = l;
505
506                 if (perf_evsel__should_store_id(counter) &&
507                     perf_evsel__store_ids(counter, evsel_list))
508                         return -1;
509         }
510
511         if (perf_evlist__apply_filters(evsel_list, &counter)) {
512                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
513                         counter->filter, perf_evsel__name(counter), errno,
514                         str_error_r(errno, msg, sizeof(msg)));
515                 return -1;
516         }
517
518         if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
519                 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
520                       err_term->val.drv_cfg, perf_evsel__name(counter), errno,
521                       str_error_r(errno, msg, sizeof(msg)));
522                 return -1;
523         }
524
525         if (STAT_RECORD) {
526                 int err, fd = perf_data__fd(&perf_stat.data);
527
528                 if (is_pipe) {
529                         err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
530                 } else {
531                         err = perf_session__write_header(perf_stat.session, evsel_list,
532                                                          fd, false);
533                 }
534
535                 if (err < 0)
536                         return err;
537
538                 err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
539                                                   process_synthesized_event, is_pipe);
540                 if (err < 0)
541                         return err;
542         }
543
544         /*
545          * Enable counters and exec the command:
546          */
547         t0 = rdclock();
548         clock_gettime(CLOCK_MONOTONIC, &ref_time);
549
550         if (forks) {
551                 perf_evlist__start_workload(evsel_list);
552                 enable_counters();
553
554                 if (interval || timeout) {
555                         while (!waitpid(child_pid, &status, WNOHANG)) {
556                                 nanosleep(&ts, NULL);
557                                 if (timeout)
558                                         break;
559                                 process_interval();
560                                 if (interval_count && !(--times))
561                                         break;
562                         }
563                 }
564                 if (child_pid != -1)
565                         wait4(child_pid, &status, 0, &stat_config.ru_data);
566
567                 if (workload_exec_errno) {
568                         const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
569                         pr_err("Workload failed: %s\n", emsg);
570                         return -1;
571                 }
572
573                 if (WIFSIGNALED(status))
574                         psignal(WTERMSIG(status), argv[0]);
575         } else {
576                 enable_counters();
577                 while (!done) {
578                         nanosleep(&ts, NULL);
579                         if (!is_target_alive(&target, evsel_list->threads))
580                                 break;
581                         if (timeout)
582                                 break;
583                         if (interval) {
584                                 process_interval();
585                                 if (interval_count && !(--times))
586                                         break;
587                         }
588                 }
589         }
590
591         disable_counters();
592
593         t1 = rdclock();
594
595         if (stat_config.walltime_run_table)
596                 stat_config.walltime_run[run_idx] = t1 - t0;
597
598         update_stats(&walltime_nsecs_stats, t1 - t0);
599
600         /*
601          * Closing a group leader splits the group, and as we only disable
602          * group leaders, results in remaining events becoming enabled. To
603          * avoid arbitrary skew, we must read all counters before closing any
604          * group leaders.
605          */
606         read_counters();
607         perf_evlist__close(evsel_list);
608
609         return WEXITSTATUS(status);
610 }
611
612 static int run_perf_stat(int argc, const char **argv, int run_idx)
613 {
614         int ret;
615
616         if (pre_cmd) {
617                 ret = system(pre_cmd);
618                 if (ret)
619                         return ret;
620         }
621
622         if (sync_run)
623                 sync();
624
625         ret = __run_perf_stat(argc, argv, run_idx);
626         if (ret)
627                 return ret;
628
629         if (post_cmd) {
630                 ret = system(post_cmd);
631                 if (ret)
632                         return ret;
633         }
634
635         return ret;
636 }
637
638 static void print_counters(struct timespec *ts, int argc, const char **argv)
639 {
640         /* Do not print anything if we record to the pipe. */
641         if (STAT_RECORD && perf_stat.data.is_pipe)
642                 return;
643
644         perf_evlist__print_counters(evsel_list, &stat_config, &target,
645                                     ts, argc, argv);
646 }
647
648 static volatile int signr = -1;
649
650 static void skip_signal(int signo)
651 {
652         if ((child_pid == -1) || stat_config.interval)
653                 done = 1;
654
655         signr = signo;
656         /*
657          * render child_pid harmless
658          * won't send SIGTERM to a random
659          * process in case of race condition
660          * and fast PID recycling
661          */
662         child_pid = -1;
663 }
664
665 static void sig_atexit(void)
666 {
667         sigset_t set, oset;
668
669         /*
670          * avoid race condition with SIGCHLD handler
671          * in skip_signal() which is modifying child_pid
672          * goal is to avoid send SIGTERM to a random
673          * process
674          */
675         sigemptyset(&set);
676         sigaddset(&set, SIGCHLD);
677         sigprocmask(SIG_BLOCK, &set, &oset);
678
679         if (child_pid != -1)
680                 kill(child_pid, SIGTERM);
681
682         sigprocmask(SIG_SETMASK, &oset, NULL);
683
684         if (signr == -1)
685                 return;
686
687         signal(signr, SIG_DFL);
688         kill(getpid(), signr);
689 }
690
691 static int stat__set_big_num(const struct option *opt __maybe_unused,
692                              const char *s __maybe_unused, int unset)
693 {
694         big_num_opt = unset ? 0 : 1;
695         return 0;
696 }
697
698 static int enable_metric_only(const struct option *opt __maybe_unused,
699                               const char *s __maybe_unused, int unset)
700 {
701         force_metric_only = true;
702         stat_config.metric_only = !unset;
703         return 0;
704 }
705
706 static int parse_metric_groups(const struct option *opt,
707                                const char *str,
708                                int unset __maybe_unused)
709 {
710         return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
711 }
712
713 static struct option stat_options[] = {
714         OPT_BOOLEAN('T', "transaction", &transaction_run,
715                     "hardware transaction statistics"),
716         OPT_CALLBACK('e', "event", &evsel_list, "event",
717                      "event selector. use 'perf list' to list available events",
718                      parse_events_option),
719         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
720                      "event filter", parse_filter),
721         OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
722                     "child tasks do not inherit counters"),
723         OPT_STRING('p', "pid", &target.pid, "pid",
724                    "stat events on existing process id"),
725         OPT_STRING('t', "tid", &target.tid, "tid",
726                    "stat events on existing thread id"),
727         OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
728                     "system-wide collection from all CPUs"),
729         OPT_BOOLEAN('g', "group", &group,
730                     "put the counters into a counter group"),
731         OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
732         OPT_INCR('v', "verbose", &verbose,
733                     "be more verbose (show counter open errors, etc)"),
734         OPT_INTEGER('r', "repeat", &stat_config.run_count,
735                     "repeat command and print average + stddev (max: 100, forever: 0)"),
736         OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
737                     "display details about each run (only with -r option)"),
738         OPT_BOOLEAN('n', "null", &stat_config.null_run,
739                     "null run - dont start any counters"),
740         OPT_INCR('d', "detailed", &detailed_run,
741                     "detailed run - start a lot of events"),
742         OPT_BOOLEAN('S', "sync", &sync_run,
743                     "call sync() before starting a run"),
744         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
745                            "print large numbers with thousands\' separators",
746                            stat__set_big_num),
747         OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
748                     "list of cpus to monitor in system-wide"),
749         OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
750                     "disable CPU count aggregation", AGGR_NONE),
751         OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
752         OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
753                    "print counts with custom separator"),
754         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
755                      "monitor event in cgroup name only", parse_cgroups),
756         OPT_STRING('o', "output", &output_name, "file", "output file name"),
757         OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
758         OPT_INTEGER(0, "log-fd", &output_fd,
759                     "log output to fd, instead of stderr"),
760         OPT_STRING(0, "pre", &pre_cmd, "command",
761                         "command to run prior to the measured command"),
762         OPT_STRING(0, "post", &post_cmd, "command",
763                         "command to run after to the measured command"),
764         OPT_UINTEGER('I', "interval-print", &stat_config.interval,
765                     "print counts at regular interval in ms "
766                     "(overhead is possible for values <= 100ms)"),
767         OPT_INTEGER(0, "interval-count", &stat_config.times,
768                     "print counts for fixed number of times"),
769         OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
770                     "clear screen in between new interval"),
771         OPT_UINTEGER(0, "timeout", &stat_config.timeout,
772                     "stop workload and print counts after a timeout period in ms (>= 10ms)"),
773         OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
774                      "aggregate counts per processor socket", AGGR_SOCKET),
775         OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
776                      "aggregate counts per physical processor core", AGGR_CORE),
777         OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
778                      "aggregate counts per thread", AGGR_THREAD),
779         OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
780                      "ms to wait before starting measurement after program start"),
781         OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
782                         "Only print computed metrics. No raw values", enable_metric_only),
783         OPT_BOOLEAN(0, "topdown", &topdown_run,
784                         "measure topdown level 1 statistics"),
785         OPT_BOOLEAN(0, "smi-cost", &smi_cost,
786                         "measure SMI cost"),
787         OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
788                      "monitor specified metrics or metric groups (separated by ,)",
789                      parse_metric_groups),
790         OPT_END()
791 };
792
793 static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
794                                  struct cpu_map *map, int cpu)
795 {
796         return cpu_map__get_socket(map, cpu, NULL);
797 }
798
799 static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
800                                struct cpu_map *map, int cpu)
801 {
802         return cpu_map__get_core(map, cpu, NULL);
803 }
804
805 static int cpu_map__get_max(struct cpu_map *map)
806 {
807         int i, max = -1;
808
809         for (i = 0; i < map->nr; i++) {
810                 if (map->map[i] > max)
811                         max = map->map[i];
812         }
813
814         return max;
815 }
816
817 static int perf_stat__get_aggr(struct perf_stat_config *config,
818                                aggr_get_id_t get_id, struct cpu_map *map, int idx)
819 {
820         int cpu;
821
822         if (idx >= map->nr)
823                 return -1;
824
825         cpu = map->map[idx];
826
827         if (config->cpus_aggr_map->map[cpu] == -1)
828                 config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
829
830         return config->cpus_aggr_map->map[cpu];
831 }
832
833 static int perf_stat__get_socket_cached(struct perf_stat_config *config,
834                                         struct cpu_map *map, int idx)
835 {
836         return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
837 }
838
839 static int perf_stat__get_core_cached(struct perf_stat_config *config,
840                                       struct cpu_map *map, int idx)
841 {
842         return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
843 }
844
845 static int perf_stat_init_aggr_mode(void)
846 {
847         int nr;
848
849         switch (stat_config.aggr_mode) {
850         case AGGR_SOCKET:
851                 if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) {
852                         perror("cannot build socket map");
853                         return -1;
854                 }
855                 stat_config.aggr_get_id = perf_stat__get_socket_cached;
856                 break;
857         case AGGR_CORE:
858                 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
859                         perror("cannot build core map");
860                         return -1;
861                 }
862                 stat_config.aggr_get_id = perf_stat__get_core_cached;
863                 break;
864         case AGGR_NONE:
865         case AGGR_GLOBAL:
866         case AGGR_THREAD:
867         case AGGR_UNSET:
868         default:
869                 break;
870         }
871
872         /*
873          * The evsel_list->cpus is the base we operate on,
874          * taking the highest cpu number to be the size of
875          * the aggregation translate cpumap.
876          */
877         nr = cpu_map__get_max(evsel_list->cpus);
878         stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1);
879         return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
880 }
881
882 static void perf_stat__exit_aggr_mode(void)
883 {
884         cpu_map__put(stat_config.aggr_map);
885         cpu_map__put(stat_config.cpus_aggr_map);
886         stat_config.aggr_map = NULL;
887         stat_config.cpus_aggr_map = NULL;
888 }
889
890 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
891 {
892         int cpu;
893
894         if (idx > map->nr)
895                 return -1;
896
897         cpu = map->map[idx];
898
899         if (cpu >= env->nr_cpus_avail)
900                 return -1;
901
902         return cpu;
903 }
904
905 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
906 {
907         struct perf_env *env = data;
908         int cpu = perf_env__get_cpu(env, map, idx);
909
910         return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
911 }
912
913 static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
914 {
915         struct perf_env *env = data;
916         int core = -1, cpu = perf_env__get_cpu(env, map, idx);
917
918         if (cpu != -1) {
919                 int socket_id = env->cpu[cpu].socket_id;
920
921                 /*
922                  * Encode socket in upper 16 bits
923                  * core_id is relative to socket, and
924                  * we need a global id. So we combine
925                  * socket + core id.
926                  */
927                 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
928         }
929
930         return core;
931 }
932
933 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
934                                       struct cpu_map **sockp)
935 {
936         return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
937 }
938
939 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
940                                     struct cpu_map **corep)
941 {
942         return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
943 }
944
945 static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
946                                       struct cpu_map *map, int idx)
947 {
948         return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
949 }
950
951 static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
952                                     struct cpu_map *map, int idx)
953 {
954         return perf_env__get_core(map, idx, &perf_stat.session->header.env);
955 }
956
957 static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
958 {
959         struct perf_env *env = &st->session->header.env;
960
961         switch (stat_config.aggr_mode) {
962         case AGGR_SOCKET:
963                 if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
964                         perror("cannot build socket map");
965                         return -1;
966                 }
967                 stat_config.aggr_get_id = perf_stat__get_socket_file;
968                 break;
969         case AGGR_CORE:
970                 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
971                         perror("cannot build core map");
972                         return -1;
973                 }
974                 stat_config.aggr_get_id = perf_stat__get_core_file;
975                 break;
976         case AGGR_NONE:
977         case AGGR_GLOBAL:
978         case AGGR_THREAD:
979         case AGGR_UNSET:
980         default:
981                 break;
982         }
983
984         return 0;
985 }
986
987 static int topdown_filter_events(const char **attr, char **str, bool use_group)
988 {
989         int off = 0;
990         int i;
991         int len = 0;
992         char *s;
993
994         for (i = 0; attr[i]; i++) {
995                 if (pmu_have_event("cpu", attr[i])) {
996                         len += strlen(attr[i]) + 1;
997                         attr[i - off] = attr[i];
998                 } else
999                         off++;
1000         }
1001         attr[i - off] = NULL;
1002
1003         *str = malloc(len + 1 + 2);
1004         if (!*str)
1005                 return -1;
1006         s = *str;
1007         if (i - off == 0) {
1008                 *s = 0;
1009                 return 0;
1010         }
1011         if (use_group)
1012                 *s++ = '{';
1013         for (i = 0; attr[i]; i++) {
1014                 strcpy(s, attr[i]);
1015                 s += strlen(s);
1016                 *s++ = ',';
1017         }
1018         if (use_group) {
1019                 s[-1] = '}';
1020                 *s = 0;
1021         } else
1022                 s[-1] = 0;
1023         return 0;
1024 }
1025
1026 __weak bool arch_topdown_check_group(bool *warn)
1027 {
1028         *warn = false;
1029         return false;
1030 }
1031
1032 __weak void arch_topdown_group_warn(void)
1033 {
1034 }
1035
1036 /*
1037  * Add default attributes, if there were no attributes specified or
1038  * if -d/--detailed, -d -d or -d -d -d is used:
1039  */
1040 static int add_default_attributes(void)
1041 {
1042         int err;
1043         struct perf_event_attr default_attrs0[] = {
1044
1045   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1046   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1047   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1048   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1049
1050   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1051 };
1052         struct perf_event_attr frontend_attrs[] = {
1053   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1054 };
1055         struct perf_event_attr backend_attrs[] = {
1056   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1057 };
1058         struct perf_event_attr default_attrs1[] = {
1059   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1060   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1061   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1062
1063 };
1064
1065 /*
1066  * Detailed stats (-d), covering the L1 and last level data caches:
1067  */
1068         struct perf_event_attr detailed_attrs[] = {
1069
1070   { .type = PERF_TYPE_HW_CACHE,
1071     .config =
1072          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1073         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1074         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1075
1076   { .type = PERF_TYPE_HW_CACHE,
1077     .config =
1078          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1079         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1080         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1081
1082   { .type = PERF_TYPE_HW_CACHE,
1083     .config =
1084          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1085         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1086         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1087
1088   { .type = PERF_TYPE_HW_CACHE,
1089     .config =
1090          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1091         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1092         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1093 };
1094
1095 /*
1096  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1097  */
1098         struct perf_event_attr very_detailed_attrs[] = {
1099
1100   { .type = PERF_TYPE_HW_CACHE,
1101     .config =
1102          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1103         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1104         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1105
1106   { .type = PERF_TYPE_HW_CACHE,
1107     .config =
1108          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1109         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1110         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1111
1112   { .type = PERF_TYPE_HW_CACHE,
1113     .config =
1114          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1115         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1116         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1117
1118   { .type = PERF_TYPE_HW_CACHE,
1119     .config =
1120          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1121         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1122         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1123
1124   { .type = PERF_TYPE_HW_CACHE,
1125     .config =
1126          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1127         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1128         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1129
1130   { .type = PERF_TYPE_HW_CACHE,
1131     .config =
1132          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1133         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1134         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1135
1136 };
1137
1138 /*
1139  * Very, very detailed stats (-d -d -d), adding prefetch events:
1140  */
1141         struct perf_event_attr very_very_detailed_attrs[] = {
1142
1143   { .type = PERF_TYPE_HW_CACHE,
1144     .config =
1145          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1146         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1147         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1148
1149   { .type = PERF_TYPE_HW_CACHE,
1150     .config =
1151          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1152         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1153         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1154 };
1155         struct parse_events_error errinfo;
1156
1157         /* Set attrs if no event is selected and !null_run: */
1158         if (stat_config.null_run)
1159                 return 0;
1160
1161         if (transaction_run) {
1162                 /* Handle -T as -M transaction. Once platform specific metrics
1163                  * support has been added to the json files, all archictures
1164                  * will use this approach. To determine transaction support
1165                  * on an architecture test for such a metric name.
1166                  */
1167                 if (metricgroup__has_metric("transaction")) {
1168                         struct option opt = { .value = &evsel_list };
1169
1170                         return metricgroup__parse_groups(&opt, "transaction",
1171                                                          &stat_config.metric_events);
1172                 }
1173
1174                 if (pmu_have_event("cpu", "cycles-ct") &&
1175                     pmu_have_event("cpu", "el-start"))
1176                         err = parse_events(evsel_list, transaction_attrs,
1177                                            &errinfo);
1178                 else
1179                         err = parse_events(evsel_list,
1180                                            transaction_limited_attrs,
1181                                            &errinfo);
1182                 if (err) {
1183                         fprintf(stderr, "Cannot set up transaction events\n");
1184                         parse_events_print_error(&errinfo, transaction_attrs);
1185                         return -1;
1186                 }
1187                 return 0;
1188         }
1189
1190         if (smi_cost) {
1191                 int smi;
1192
1193                 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
1194                         fprintf(stderr, "freeze_on_smi is not supported.\n");
1195                         return -1;
1196                 }
1197
1198                 if (!smi) {
1199                         if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
1200                                 fprintf(stderr, "Failed to set freeze_on_smi.\n");
1201                                 return -1;
1202                         }
1203                         smi_reset = true;
1204                 }
1205
1206                 if (pmu_have_event("msr", "aperf") &&
1207                     pmu_have_event("msr", "smi")) {
1208                         if (!force_metric_only)
1209                                 stat_config.metric_only = true;
1210                         err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
1211                 } else {
1212                         fprintf(stderr, "To measure SMI cost, it needs "
1213                                 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
1214                         parse_events_print_error(&errinfo, smi_cost_attrs);
1215                         return -1;
1216                 }
1217                 if (err) {
1218                         fprintf(stderr, "Cannot set up SMI cost events\n");
1219                         return -1;
1220                 }
1221                 return 0;
1222         }
1223
1224         if (topdown_run) {
1225                 char *str = NULL;
1226                 bool warn = false;
1227
1228                 if (stat_config.aggr_mode != AGGR_GLOBAL &&
1229                     stat_config.aggr_mode != AGGR_CORE) {
1230                         pr_err("top down event configuration requires --per-core mode\n");
1231                         return -1;
1232                 }
1233                 stat_config.aggr_mode = AGGR_CORE;
1234                 if (nr_cgroups || !target__has_cpu(&target)) {
1235                         pr_err("top down event configuration requires system-wide mode (-a)\n");
1236                         return -1;
1237                 }
1238
1239                 if (!force_metric_only)
1240                         stat_config.metric_only = true;
1241                 if (topdown_filter_events(topdown_attrs, &str,
1242                                 arch_topdown_check_group(&warn)) < 0) {
1243                         pr_err("Out of memory\n");
1244                         return -1;
1245                 }
1246                 if (topdown_attrs[0] && str) {
1247                         if (warn)
1248                                 arch_topdown_group_warn();
1249                         err = parse_events(evsel_list, str, &errinfo);
1250                         if (err) {
1251                                 fprintf(stderr,
1252                                         "Cannot set up top down events %s: %d\n",
1253                                         str, err);
1254                                 free(str);
1255                                 parse_events_print_error(&errinfo, str);
1256                                 return -1;
1257                         }
1258                 } else {
1259                         fprintf(stderr, "System does not support topdown\n");
1260                         return -1;
1261                 }
1262                 free(str);
1263         }
1264
1265         if (!evsel_list->nr_entries) {
1266                 if (target__has_cpu(&target))
1267                         default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
1268
1269                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
1270                         return -1;
1271                 if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
1272                         if (perf_evlist__add_default_attrs(evsel_list,
1273                                                 frontend_attrs) < 0)
1274                                 return -1;
1275                 }
1276                 if (pmu_have_event("cpu", "stalled-cycles-backend")) {
1277                         if (perf_evlist__add_default_attrs(evsel_list,
1278                                                 backend_attrs) < 0)
1279                                 return -1;
1280                 }
1281                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
1282                         return -1;
1283         }
1284
1285         /* Detailed events get appended to the event list: */
1286
1287         if (detailed_run <  1)
1288                 return 0;
1289
1290         /* Append detailed run extra attributes: */
1291         if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1292                 return -1;
1293
1294         if (detailed_run < 2)
1295                 return 0;
1296
1297         /* Append very detailed run extra attributes: */
1298         if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1299                 return -1;
1300
1301         if (detailed_run < 3)
1302                 return 0;
1303
1304         /* Append very, very detailed run extra attributes: */
1305         return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1306 }
1307
1308 static const char * const stat_record_usage[] = {
1309         "perf stat record [<options>]",
1310         NULL,
1311 };
1312
1313 static void init_features(struct perf_session *session)
1314 {
1315         int feat;
1316
1317         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1318                 perf_header__set_feat(&session->header, feat);
1319
1320         perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1321         perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1322         perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1323         perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1324 }
1325
1326 static int __cmd_record(int argc, const char **argv)
1327 {
1328         struct perf_session *session;
1329         struct perf_data *data = &perf_stat.data;
1330
1331         argc = parse_options(argc, argv, stat_options, stat_record_usage,
1332                              PARSE_OPT_STOP_AT_NON_OPTION);
1333
1334         if (output_name)
1335                 data->file.path = output_name;
1336
1337         if (stat_config.run_count != 1 || forever) {
1338                 pr_err("Cannot use -r option with perf stat record.\n");
1339                 return -1;
1340         }
1341
1342         session = perf_session__new(data, false, NULL);
1343         if (session == NULL) {
1344                 pr_err("Perf session creation failed.\n");
1345                 return -1;
1346         }
1347
1348         init_features(session);
1349
1350         session->evlist   = evsel_list;
1351         perf_stat.session = session;
1352         perf_stat.record  = true;
1353         return argc;
1354 }
1355
1356 static int process_stat_round_event(struct perf_session *session,
1357                                     union perf_event *event)
1358 {
1359         struct stat_round_event *stat_round = &event->stat_round;
1360         struct perf_evsel *counter;
1361         struct timespec tsh, *ts = NULL;
1362         const char **argv = session->header.env.cmdline_argv;
1363         int argc = session->header.env.nr_cmdline;
1364
1365         evlist__for_each_entry(evsel_list, counter)
1366                 perf_stat_process_counter(&stat_config, counter);
1367
1368         if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
1369                 update_stats(&walltime_nsecs_stats, stat_round->time);
1370
1371         if (stat_config.interval && stat_round->time) {
1372                 tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
1373                 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
1374                 ts = &tsh;
1375         }
1376
1377         print_counters(ts, argc, argv);
1378         return 0;
1379 }
1380
1381 static
1382 int process_stat_config_event(struct perf_session *session,
1383                               union perf_event *event)
1384 {
1385         struct perf_tool *tool = session->tool;
1386         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1387
1388         perf_event__read_stat_config(&stat_config, &event->stat_config);
1389
1390         if (cpu_map__empty(st->cpus)) {
1391                 if (st->aggr_mode != AGGR_UNSET)
1392                         pr_warning("warning: processing task data, aggregation mode not set\n");
1393                 return 0;
1394         }
1395
1396         if (st->aggr_mode != AGGR_UNSET)
1397                 stat_config.aggr_mode = st->aggr_mode;
1398
1399         if (perf_stat.data.is_pipe)
1400                 perf_stat_init_aggr_mode();
1401         else
1402                 perf_stat_init_aggr_mode_file(st);
1403
1404         return 0;
1405 }
1406
1407 static int set_maps(struct perf_stat *st)
1408 {
1409         if (!st->cpus || !st->threads)
1410                 return 0;
1411
1412         if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
1413                 return -EINVAL;
1414
1415         perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
1416
1417         if (perf_evlist__alloc_stats(evsel_list, true))
1418                 return -ENOMEM;
1419
1420         st->maps_allocated = true;
1421         return 0;
1422 }
1423
1424 static
1425 int process_thread_map_event(struct perf_session *session,
1426                              union perf_event *event)
1427 {
1428         struct perf_tool *tool = session->tool;
1429         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1430
1431         if (st->threads) {
1432                 pr_warning("Extra thread map event, ignoring.\n");
1433                 return 0;
1434         }
1435
1436         st->threads = thread_map__new_event(&event->thread_map);
1437         if (!st->threads)
1438                 return -ENOMEM;
1439
1440         return set_maps(st);
1441 }
1442
1443 static
1444 int process_cpu_map_event(struct perf_session *session,
1445                           union perf_event *event)
1446 {
1447         struct perf_tool *tool = session->tool;
1448         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1449         struct cpu_map *cpus;
1450
1451         if (st->cpus) {
1452                 pr_warning("Extra cpu map event, ignoring.\n");
1453                 return 0;
1454         }
1455
1456         cpus = cpu_map__new_data(&event->cpu_map.data);
1457         if (!cpus)
1458                 return -ENOMEM;
1459
1460         st->cpus = cpus;
1461         return set_maps(st);
1462 }
1463
1464 static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
1465 {
1466         int i;
1467
1468         config->stats = calloc(nthreads, sizeof(struct runtime_stat));
1469         if (!config->stats)
1470                 return -1;
1471
1472         config->stats_num = nthreads;
1473
1474         for (i = 0; i < nthreads; i++)
1475                 runtime_stat__init(&config->stats[i]);
1476
1477         return 0;
1478 }
1479
1480 static void runtime_stat_delete(struct perf_stat_config *config)
1481 {
1482         int i;
1483
1484         if (!config->stats)
1485                 return;
1486
1487         for (i = 0; i < config->stats_num; i++)
1488                 runtime_stat__exit(&config->stats[i]);
1489
1490         free(config->stats);
1491 }
1492
1493 static const char * const stat_report_usage[] = {
1494         "perf stat report [<options>]",
1495         NULL,
1496 };
1497
1498 static struct perf_stat perf_stat = {
1499         .tool = {
1500                 .attr           = perf_event__process_attr,
1501                 .event_update   = perf_event__process_event_update,
1502                 .thread_map     = process_thread_map_event,
1503                 .cpu_map        = process_cpu_map_event,
1504                 .stat_config    = process_stat_config_event,
1505                 .stat           = perf_event__process_stat_event,
1506                 .stat_round     = process_stat_round_event,
1507         },
1508         .aggr_mode = AGGR_UNSET,
1509 };
1510
1511 static int __cmd_report(int argc, const char **argv)
1512 {
1513         struct perf_session *session;
1514         const struct option options[] = {
1515         OPT_STRING('i', "input", &input_name, "file", "input file name"),
1516         OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1517                      "aggregate counts per processor socket", AGGR_SOCKET),
1518         OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1519                      "aggregate counts per physical processor core", AGGR_CORE),
1520         OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
1521                      "disable CPU count aggregation", AGGR_NONE),
1522         OPT_END()
1523         };
1524         struct stat st;
1525         int ret;
1526
1527         argc = parse_options(argc, argv, options, stat_report_usage, 0);
1528
1529         if (!input_name || !strlen(input_name)) {
1530                 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
1531                         input_name = "-";
1532                 else
1533                         input_name = "perf.data";
1534         }
1535
1536         perf_stat.data.file.path = input_name;
1537         perf_stat.data.mode      = PERF_DATA_MODE_READ;
1538
1539         session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
1540         if (session == NULL)
1541                 return -1;
1542
1543         perf_stat.session  = session;
1544         stat_config.output = stderr;
1545         evsel_list         = session->evlist;
1546
1547         ret = perf_session__process_events(session);
1548         if (ret)
1549                 return ret;
1550
1551         perf_session__delete(session);
1552         return 0;
1553 }
1554
1555 static void setup_system_wide(int forks)
1556 {
1557         /*
1558          * Make system wide (-a) the default target if
1559          * no target was specified and one of following
1560          * conditions is met:
1561          *
1562          *   - there's no workload specified
1563          *   - there is workload specified but all requested
1564          *     events are system wide events
1565          */
1566         if (!target__none(&target))
1567                 return;
1568
1569         if (!forks)
1570                 target.system_wide = true;
1571         else {
1572                 struct perf_evsel *counter;
1573
1574                 evlist__for_each_entry(evsel_list, counter) {
1575                         if (!counter->system_wide)
1576                                 return;
1577                 }
1578
1579                 if (evsel_list->nr_entries)
1580                         target.system_wide = true;
1581         }
1582 }
1583
1584 int cmd_stat(int argc, const char **argv)
1585 {
1586         const char * const stat_usage[] = {
1587                 "perf stat [<options>] [<command>]",
1588                 NULL
1589         };
1590         int status = -EINVAL, run_idx;
1591         const char *mode;
1592         FILE *output = stderr;
1593         unsigned int interval, timeout;
1594         const char * const stat_subcommands[] = { "record", "report" };
1595
1596         setlocale(LC_ALL, "");
1597
1598         evsel_list = perf_evlist__new();
1599         if (evsel_list == NULL)
1600                 return -ENOMEM;
1601
1602         parse_events__shrink_config_terms();
1603
1604         /* String-parsing callback-based options would segfault when negated */
1605         set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
1606         set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
1607         set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
1608
1609         argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
1610                                         (const char **) stat_usage,
1611                                         PARSE_OPT_STOP_AT_NON_OPTION);
1612         perf_stat__collect_metric_expr(evsel_list);
1613         perf_stat__init_shadow_stats();
1614
1615         if (stat_config.csv_sep) {
1616                 stat_config.csv_output = true;
1617                 if (!strcmp(stat_config.csv_sep, "\\t"))
1618                         stat_config.csv_sep = "\t";
1619         } else
1620                 stat_config.csv_sep = DEFAULT_SEPARATOR;
1621
1622         if (argc && !strncmp(argv[0], "rec", 3)) {
1623                 argc = __cmd_record(argc, argv);
1624                 if (argc < 0)
1625                         return -1;
1626         } else if (argc && !strncmp(argv[0], "rep", 3))
1627                 return __cmd_report(argc, argv);
1628
1629         interval = stat_config.interval;
1630         timeout = stat_config.timeout;
1631
1632         /*
1633          * For record command the -o is already taken care of.
1634          */
1635         if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
1636                 output = NULL;
1637
1638         if (output_name && output_fd) {
1639                 fprintf(stderr, "cannot use both --output and --log-fd\n");
1640                 parse_options_usage(stat_usage, stat_options, "o", 1);
1641                 parse_options_usage(NULL, stat_options, "log-fd", 0);
1642                 goto out;
1643         }
1644
1645         if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
1646                 fprintf(stderr, "--metric-only is not supported with --per-thread\n");
1647                 goto out;
1648         }
1649
1650         if (stat_config.metric_only && stat_config.run_count > 1) {
1651                 fprintf(stderr, "--metric-only is not supported with -r\n");
1652                 goto out;
1653         }
1654
1655         if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
1656                 fprintf(stderr, "--table is only supported with -r\n");
1657                 parse_options_usage(stat_usage, stat_options, "r", 1);
1658                 parse_options_usage(NULL, stat_options, "table", 0);
1659                 goto out;
1660         }
1661
1662         if (output_fd < 0) {
1663                 fprintf(stderr, "argument to --log-fd must be a > 0\n");
1664                 parse_options_usage(stat_usage, stat_options, "log-fd", 0);
1665                 goto out;
1666         }
1667
1668         if (!output) {
1669                 struct timespec tm;
1670                 mode = append_file ? "a" : "w";
1671
1672                 output = fopen(output_name, mode);
1673                 if (!output) {
1674                         perror("failed to create output file");
1675                         return -1;
1676                 }
1677                 clock_gettime(CLOCK_REALTIME, &tm);
1678                 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1679         } else if (output_fd > 0) {
1680                 mode = append_file ? "a" : "w";
1681                 output = fdopen(output_fd, mode);
1682                 if (!output) {
1683                         perror("Failed opening logfd");
1684                         return -errno;
1685                 }
1686         }
1687
1688         stat_config.output = output;
1689
1690         /*
1691          * let the spreadsheet do the pretty-printing
1692          */
1693         if (stat_config.csv_output) {
1694                 /* User explicitly passed -B? */
1695                 if (big_num_opt == 1) {
1696                         fprintf(stderr, "-B option not supported with -x\n");
1697                         parse_options_usage(stat_usage, stat_options, "B", 1);
1698                         parse_options_usage(NULL, stat_options, "x", 1);
1699                         goto out;
1700                 } else /* Nope, so disable big number formatting */
1701                         stat_config.big_num = false;
1702         } else if (big_num_opt == 0) /* User passed --no-big-num */
1703                 stat_config.big_num = false;
1704
1705         setup_system_wide(argc);
1706
1707         /*
1708          * Display user/system times only for single
1709          * run and when there's specified tracee.
1710          */
1711         if ((stat_config.run_count == 1) && target__none(&target))
1712                 stat_config.ru_display = true;
1713
1714         if (stat_config.run_count < 0) {
1715                 pr_err("Run count must be a positive number\n");
1716                 parse_options_usage(stat_usage, stat_options, "r", 1);
1717                 goto out;
1718         } else if (stat_config.run_count == 0) {
1719                 forever = true;
1720                 stat_config.run_count = 1;
1721         }
1722
1723         if (stat_config.walltime_run_table) {
1724                 stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
1725                 if (!stat_config.walltime_run) {
1726                         pr_err("failed to setup -r option");
1727                         goto out;
1728                 }
1729         }
1730
1731         if ((stat_config.aggr_mode == AGGR_THREAD) &&
1732                 !target__has_task(&target)) {
1733                 if (!target.system_wide || target.cpu_list) {
1734                         fprintf(stderr, "The --per-thread option is only "
1735                                 "available when monitoring via -p -t -a "
1736                                 "options or only --per-thread.\n");
1737                         parse_options_usage(NULL, stat_options, "p", 1);
1738                         parse_options_usage(NULL, stat_options, "t", 1);
1739                         goto out;
1740                 }
1741         }
1742
1743         /*
1744          * no_aggr, cgroup are for system-wide only
1745          * --per-thread is aggregated per thread, we dont mix it with cpu mode
1746          */
1747         if (((stat_config.aggr_mode != AGGR_GLOBAL &&
1748               stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1749             !target__has_cpu(&target)) {
1750                 fprintf(stderr, "both cgroup and no-aggregation "
1751                         "modes only available in system-wide mode\n");
1752
1753                 parse_options_usage(stat_usage, stat_options, "G", 1);
1754                 parse_options_usage(NULL, stat_options, "A", 1);
1755                 parse_options_usage(NULL, stat_options, "a", 1);
1756                 goto out;
1757         }
1758
1759         if (add_default_attributes())
1760                 goto out;
1761
1762         target__validate(&target);
1763
1764         if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
1765                 target.per_thread = true;
1766
1767         if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1768                 if (target__has_task(&target)) {
1769                         pr_err("Problems finding threads of monitor\n");
1770                         parse_options_usage(stat_usage, stat_options, "p", 1);
1771                         parse_options_usage(NULL, stat_options, "t", 1);
1772                 } else if (target__has_cpu(&target)) {
1773                         perror("failed to parse CPUs map");
1774                         parse_options_usage(stat_usage, stat_options, "C", 1);
1775                         parse_options_usage(NULL, stat_options, "a", 1);
1776                 }
1777                 goto out;
1778         }
1779
1780         /*
1781          * Initialize thread_map with comm names,
1782          * so we could print it out on output.
1783          */
1784         if (stat_config.aggr_mode == AGGR_THREAD) {
1785                 thread_map__read_comms(evsel_list->threads);
1786                 if (target.system_wide) {
1787                         if (runtime_stat_new(&stat_config,
1788                                 thread_map__nr(evsel_list->threads))) {
1789                                 goto out;
1790                         }
1791                 }
1792         }
1793
1794         if (stat_config.times && interval)
1795                 interval_count = true;
1796         else if (stat_config.times && !interval) {
1797                 pr_err("interval-count option should be used together with "
1798                                 "interval-print.\n");
1799                 parse_options_usage(stat_usage, stat_options, "interval-count", 0);
1800                 parse_options_usage(stat_usage, stat_options, "I", 1);
1801                 goto out;
1802         }
1803
1804         if (timeout && timeout < 100) {
1805                 if (timeout < 10) {
1806                         pr_err("timeout must be >= 10ms.\n");
1807                         parse_options_usage(stat_usage, stat_options, "timeout", 0);
1808                         goto out;
1809                 } else
1810                         pr_warning("timeout < 100ms. "
1811                                    "The overhead percentage could be high in some cases. "
1812                                    "Please proceed with caution.\n");
1813         }
1814         if (timeout && interval) {
1815                 pr_err("timeout option is not supported with interval-print.\n");
1816                 parse_options_usage(stat_usage, stat_options, "timeout", 0);
1817                 parse_options_usage(stat_usage, stat_options, "I", 1);
1818                 goto out;
1819         }
1820
1821         if (perf_evlist__alloc_stats(evsel_list, interval))
1822                 goto out;
1823
1824         if (perf_stat_init_aggr_mode())
1825                 goto out;
1826
1827         /*
1828          * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
1829          * while avoiding that older tools show confusing messages.
1830          *
1831          * However for pipe sessions we need to keep it zero,
1832          * because script's perf_evsel__check_attr is triggered
1833          * by attr->sample_type != 0, and we can't run it on
1834          * stat sessions.
1835          */
1836         stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
1837
1838         /*
1839          * We dont want to block the signals - that would cause
1840          * child tasks to inherit that and Ctrl-C would not work.
1841          * What we want is for Ctrl-C to work in the exec()-ed
1842          * task, but being ignored by perf stat itself:
1843          */
1844         atexit(sig_atexit);
1845         if (!forever)
1846                 signal(SIGINT,  skip_signal);
1847         signal(SIGCHLD, skip_signal);
1848         signal(SIGALRM, skip_signal);
1849         signal(SIGABRT, skip_signal);
1850
1851         status = 0;
1852         for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
1853                 if (stat_config.run_count != 1 && verbose > 0)
1854                         fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1855                                 run_idx + 1);
1856
1857                 status = run_perf_stat(argc, argv, run_idx);
1858                 if (forever && status != -1) {
1859                         print_counters(NULL, argc, argv);
1860                         perf_stat__reset_stats();
1861                 }
1862         }
1863
1864         if (!forever && status != -1 && !interval)
1865                 print_counters(NULL, argc, argv);
1866
1867         if (STAT_RECORD) {
1868                 /*
1869                  * We synthesize the kernel mmap record just so that older tools
1870                  * don't emit warnings about not being able to resolve symbols
1871                  * due to /proc/sys/kernel/kptr_restrict settings and instear provide
1872                  * a saner message about no samples being in the perf.data file.
1873                  *
1874                  * This also serves to suppress a warning about f_header.data.size == 0
1875                  * in header.c at the moment 'perf stat record' gets introduced, which
1876                  * is not really needed once we start adding the stat specific PERF_RECORD_
1877                  * records, but the need to suppress the kptr_restrict messages in older
1878                  * tools remain  -acme
1879                  */
1880                 int fd = perf_data__fd(&perf_stat.data);
1881                 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
1882                                                              process_synthesized_event,
1883                                                              &perf_stat.session->machines.host);
1884                 if (err) {
1885                         pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
1886                                    "older tools may produce warnings about this file\n.");
1887                 }
1888
1889                 if (!interval) {
1890                         if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
1891                                 pr_err("failed to write stat round event\n");
1892                 }
1893
1894                 if (!perf_stat.data.is_pipe) {
1895                         perf_stat.session->header.data_size += perf_stat.bytes_written;
1896                         perf_session__write_header(perf_stat.session, evsel_list, fd, true);
1897                 }
1898
1899                 perf_session__delete(perf_stat.session);
1900         }
1901
1902         perf_stat__exit_aggr_mode();
1903         perf_evlist__free_stats(evsel_list);
1904 out:
1905         free(stat_config.walltime_run);
1906
1907         if (smi_cost && smi_reset)
1908                 sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
1909
1910         perf_evlist__delete(evsel_list);
1911
1912         runtime_stat_delete(&stat_config);
1913
1914         return status;
1915 }