Merge tag 'for-linus-20171120' of git://git.infradead.org/linux-mtd
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "perf.h"
12
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <poll.h>
49 #include <unistd.h>
50 #include <sched.h>
51 #include <signal.h>
52 #include <sys/mman.h>
53 #include <sys/wait.h>
54 #include <asm/bug.h>
55 #include <linux/time64.h>
56
57 struct switch_output {
58         bool             enabled;
59         bool             signal;
60         unsigned long    size;
61         unsigned long    time;
62         const char      *str;
63         bool             set;
64 };
65
66 struct record {
67         struct perf_tool        tool;
68         struct record_opts      opts;
69         u64                     bytes_written;
70         struct perf_data        data;
71         struct auxtrace_record  *itr;
72         struct perf_evlist      *evlist;
73         struct perf_session     *session;
74         const char              *progname;
75         int                     realtime_prio;
76         bool                    no_buildid;
77         bool                    no_buildid_set;
78         bool                    no_buildid_cache;
79         bool                    no_buildid_cache_set;
80         bool                    buildid_all;
81         bool                    timestamp_filename;
82         struct switch_output    switch_output;
83         unsigned long long      samples;
84 };
85
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89
90 static bool switch_output_signal(struct record *rec)
91 {
92         return rec->switch_output.signal &&
93                trigger_is_ready(&switch_output_trigger);
94 }
95
96 static bool switch_output_size(struct record *rec)
97 {
98         return rec->switch_output.size &&
99                trigger_is_ready(&switch_output_trigger) &&
100                (rec->bytes_written >= rec->switch_output.size);
101 }
102
103 static bool switch_output_time(struct record *rec)
104 {
105         return rec->switch_output.time &&
106                trigger_is_ready(&switch_output_trigger);
107 }
108
109 static int record__write(struct record *rec, void *bf, size_t size)
110 {
111         if (perf_data__write(rec->session->data, bf, size) < 0) {
112                 pr_err("failed to write perf data, error: %m\n");
113                 return -1;
114         }
115
116         rec->bytes_written += size;
117
118         if (switch_output_size(rec))
119                 trigger_hit(&switch_output_trigger);
120
121         return 0;
122 }
123
124 static int process_synthesized_event(struct perf_tool *tool,
125                                      union perf_event *event,
126                                      struct perf_sample *sample __maybe_unused,
127                                      struct machine *machine __maybe_unused)
128 {
129         struct record *rec = container_of(tool, struct record, tool);
130         return record__write(rec, event, event->header.size);
131 }
132
133 static int record__pushfn(void *to, void *bf, size_t size)
134 {
135         struct record *rec = to;
136
137         rec->samples++;
138         return record__write(rec, bf, size);
139 }
140
141 static volatile int done;
142 static volatile int signr = -1;
143 static volatile int child_finished;
144
145 static void sig_handler(int sig)
146 {
147         if (sig == SIGCHLD)
148                 child_finished = 1;
149         else
150                 signr = sig;
151
152         done = 1;
153 }
154
155 static void sigsegv_handler(int sig)
156 {
157         perf_hooks__recover();
158         sighandler_dump_stack(sig);
159 }
160
161 static void record__sig_exit(void)
162 {
163         if (signr == -1)
164                 return;
165
166         signal(signr, SIG_DFL);
167         raise(signr);
168 }
169
170 #ifdef HAVE_AUXTRACE_SUPPORT
171
172 static int record__process_auxtrace(struct perf_tool *tool,
173                                     union perf_event *event, void *data1,
174                                     size_t len1, void *data2, size_t len2)
175 {
176         struct record *rec = container_of(tool, struct record, tool);
177         struct perf_data *data = &rec->data;
178         size_t padding;
179         u8 pad[8] = {0};
180
181         if (!perf_data__is_pipe(data)) {
182                 off_t file_offset;
183                 int fd = perf_data__fd(data);
184                 int err;
185
186                 file_offset = lseek(fd, 0, SEEK_CUR);
187                 if (file_offset == -1)
188                         return -1;
189                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
190                                                      event, file_offset);
191                 if (err)
192                         return err;
193         }
194
195         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
196         padding = (len1 + len2) & 7;
197         if (padding)
198                 padding = 8 - padding;
199
200         record__write(rec, event, event->header.size);
201         record__write(rec, data1, len1);
202         if (len2)
203                 record__write(rec, data2, len2);
204         record__write(rec, &pad, padding);
205
206         return 0;
207 }
208
209 static int record__auxtrace_mmap_read(struct record *rec,
210                                       struct auxtrace_mmap *mm)
211 {
212         int ret;
213
214         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
215                                   record__process_auxtrace);
216         if (ret < 0)
217                 return ret;
218
219         if (ret)
220                 rec->samples++;
221
222         return 0;
223 }
224
225 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
226                                                struct auxtrace_mmap *mm)
227 {
228         int ret;
229
230         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
231                                            record__process_auxtrace,
232                                            rec->opts.auxtrace_snapshot_size);
233         if (ret < 0)
234                 return ret;
235
236         if (ret)
237                 rec->samples++;
238
239         return 0;
240 }
241
242 static int record__auxtrace_read_snapshot_all(struct record *rec)
243 {
244         int i;
245         int rc = 0;
246
247         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
248                 struct auxtrace_mmap *mm =
249                                 &rec->evlist->mmap[i].auxtrace_mmap;
250
251                 if (!mm->base)
252                         continue;
253
254                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
255                         rc = -1;
256                         goto out;
257                 }
258         }
259 out:
260         return rc;
261 }
262
263 static void record__read_auxtrace_snapshot(struct record *rec)
264 {
265         pr_debug("Recording AUX area tracing snapshot\n");
266         if (record__auxtrace_read_snapshot_all(rec) < 0) {
267                 trigger_error(&auxtrace_snapshot_trigger);
268         } else {
269                 if (auxtrace_record__snapshot_finish(rec->itr))
270                         trigger_error(&auxtrace_snapshot_trigger);
271                 else
272                         trigger_ready(&auxtrace_snapshot_trigger);
273         }
274 }
275
276 #else
277
278 static inline
279 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
280                                struct auxtrace_mmap *mm __maybe_unused)
281 {
282         return 0;
283 }
284
285 static inline
286 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
287 {
288 }
289
290 static inline
291 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
292 {
293         return 0;
294 }
295
296 #endif
297
298 static int record__mmap_evlist(struct record *rec,
299                                struct perf_evlist *evlist)
300 {
301         struct record_opts *opts = &rec->opts;
302         char msg[512];
303
304         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
305                                  opts->auxtrace_mmap_pages,
306                                  opts->auxtrace_snapshot_mode) < 0) {
307                 if (errno == EPERM) {
308                         pr_err("Permission error mapping pages.\n"
309                                "Consider increasing "
310                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
311                                "or try again with a smaller value of -m/--mmap_pages.\n"
312                                "(current value: %u,%u)\n",
313                                opts->mmap_pages, opts->auxtrace_mmap_pages);
314                         return -errno;
315                 } else {
316                         pr_err("failed to mmap with %d (%s)\n", errno,
317                                 str_error_r(errno, msg, sizeof(msg)));
318                         if (errno)
319                                 return -errno;
320                         else
321                                 return -EINVAL;
322                 }
323         }
324         return 0;
325 }
326
327 static int record__mmap(struct record *rec)
328 {
329         return record__mmap_evlist(rec, rec->evlist);
330 }
331
332 static int record__open(struct record *rec)
333 {
334         char msg[BUFSIZ];
335         struct perf_evsel *pos;
336         struct perf_evlist *evlist = rec->evlist;
337         struct perf_session *session = rec->session;
338         struct record_opts *opts = &rec->opts;
339         struct perf_evsel_config_term *err_term;
340         int rc = 0;
341
342         perf_evlist__config(evlist, opts, &callchain_param);
343
344         evlist__for_each_entry(evlist, pos) {
345 try_again:
346                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
347                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
348                                 if (verbose > 0)
349                                         ui__warning("%s\n", msg);
350                                 goto try_again;
351                         }
352
353                         rc = -errno;
354                         perf_evsel__open_strerror(pos, &opts->target,
355                                                   errno, msg, sizeof(msg));
356                         ui__error("%s\n", msg);
357                         goto out;
358                 }
359         }
360
361         if (perf_evlist__apply_filters(evlist, &pos)) {
362                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
363                         pos->filter, perf_evsel__name(pos), errno,
364                         str_error_r(errno, msg, sizeof(msg)));
365                 rc = -1;
366                 goto out;
367         }
368
369         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
370                 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
371                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
372                       str_error_r(errno, msg, sizeof(msg)));
373                 rc = -1;
374                 goto out;
375         }
376
377         rc = record__mmap(rec);
378         if (rc)
379                 goto out;
380
381         session->evlist = evlist;
382         perf_session__set_id_hdr_size(session);
383 out:
384         return rc;
385 }
386
387 static int process_sample_event(struct perf_tool *tool,
388                                 union perf_event *event,
389                                 struct perf_sample *sample,
390                                 struct perf_evsel *evsel,
391                                 struct machine *machine)
392 {
393         struct record *rec = container_of(tool, struct record, tool);
394
395         rec->samples++;
396
397         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
398 }
399
400 static int process_buildids(struct record *rec)
401 {
402         struct perf_data *data = &rec->data;
403         struct perf_session *session = rec->session;
404
405         if (data->size == 0)
406                 return 0;
407
408         /*
409          * During this process, it'll load kernel map and replace the
410          * dso->long_name to a real pathname it found.  In this case
411          * we prefer the vmlinux path like
412          *   /lib/modules/3.16.4/build/vmlinux
413          *
414          * rather than build-id path (in debug directory).
415          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
416          */
417         symbol_conf.ignore_vmlinux_buildid = true;
418
419         /*
420          * If --buildid-all is given, it marks all DSO regardless of hits,
421          * so no need to process samples.
422          */
423         if (rec->buildid_all)
424                 rec->tool.sample = NULL;
425
426         return perf_session__process_events(session);
427 }
428
429 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
430 {
431         int err;
432         struct perf_tool *tool = data;
433         /*
434          *As for guest kernel when processing subcommand record&report,
435          *we arrange module mmap prior to guest kernel mmap and trigger
436          *a preload dso because default guest module symbols are loaded
437          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
438          *method is used to avoid symbol missing when the first addr is
439          *in module instead of in guest kernel.
440          */
441         err = perf_event__synthesize_modules(tool, process_synthesized_event,
442                                              machine);
443         if (err < 0)
444                 pr_err("Couldn't record guest kernel [%d]'s reference"
445                        " relocation symbol.\n", machine->pid);
446
447         /*
448          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
449          * have no _text sometimes.
450          */
451         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
452                                                  machine);
453         if (err < 0)
454                 pr_err("Couldn't record guest kernel [%d]'s reference"
455                        " relocation symbol.\n", machine->pid);
456 }
457
458 static struct perf_event_header finished_round_event = {
459         .size = sizeof(struct perf_event_header),
460         .type = PERF_RECORD_FINISHED_ROUND,
461 };
462
463 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
464                                     bool backward)
465 {
466         u64 bytes_written = rec->bytes_written;
467         int i;
468         int rc = 0;
469         struct perf_mmap *maps;
470
471         if (!evlist)
472                 return 0;
473
474         maps = backward ? evlist->backward_mmap : evlist->mmap;
475         if (!maps)
476                 return 0;
477
478         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
479                 return 0;
480
481         for (i = 0; i < evlist->nr_mmaps; i++) {
482                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
483
484                 if (maps[i].base) {
485                         if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) {
486                                 rc = -1;
487                                 goto out;
488                         }
489                 }
490
491                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
492                     record__auxtrace_mmap_read(rec, mm) != 0) {
493                         rc = -1;
494                         goto out;
495                 }
496         }
497
498         /*
499          * Mark the round finished in case we wrote
500          * at least one event.
501          */
502         if (bytes_written != rec->bytes_written)
503                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
504
505         if (backward)
506                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
507 out:
508         return rc;
509 }
510
511 static int record__mmap_read_all(struct record *rec)
512 {
513         int err;
514
515         err = record__mmap_read_evlist(rec, rec->evlist, false);
516         if (err)
517                 return err;
518
519         return record__mmap_read_evlist(rec, rec->evlist, true);
520 }
521
522 static void record__init_features(struct record *rec)
523 {
524         struct perf_session *session = rec->session;
525         int feat;
526
527         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
528                 perf_header__set_feat(&session->header, feat);
529
530         if (rec->no_buildid)
531                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
532
533         if (!have_tracepoints(&rec->evlist->entries))
534                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
535
536         if (!rec->opts.branch_stack)
537                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
538
539         if (!rec->opts.full_auxtrace)
540                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
541
542         perf_header__clear_feat(&session->header, HEADER_STAT);
543 }
544
545 static void
546 record__finish_output(struct record *rec)
547 {
548         struct perf_data *data = &rec->data;
549         int fd = perf_data__fd(data);
550
551         if (data->is_pipe)
552                 return;
553
554         rec->session->header.data_size += rec->bytes_written;
555         data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
556
557         if (!rec->no_buildid) {
558                 process_buildids(rec);
559
560                 if (rec->buildid_all)
561                         dsos__hit_all(rec->session);
562         }
563         perf_session__write_header(rec->session, rec->evlist, fd, true);
564
565         return;
566 }
567
568 static int record__synthesize_workload(struct record *rec, bool tail)
569 {
570         int err;
571         struct thread_map *thread_map;
572
573         if (rec->opts.tail_synthesize != tail)
574                 return 0;
575
576         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
577         if (thread_map == NULL)
578                 return -1;
579
580         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
581                                                  process_synthesized_event,
582                                                  &rec->session->machines.host,
583                                                  rec->opts.sample_address,
584                                                  rec->opts.proc_map_timeout);
585         thread_map__put(thread_map);
586         return err;
587 }
588
589 static int record__synthesize(struct record *rec, bool tail);
590
591 static int
592 record__switch_output(struct record *rec, bool at_exit)
593 {
594         struct perf_data *data = &rec->data;
595         int fd, err;
596
597         /* Same Size:      "2015122520103046"*/
598         char timestamp[] = "InvalidTimestamp";
599
600         record__synthesize(rec, true);
601         if (target__none(&rec->opts.target))
602                 record__synthesize_workload(rec, true);
603
604         rec->samples = 0;
605         record__finish_output(rec);
606         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
607         if (err) {
608                 pr_err("Failed to get current timestamp\n");
609                 return -EINVAL;
610         }
611
612         fd = perf_data__switch(data, timestamp,
613                                     rec->session->header.data_offset,
614                                     at_exit);
615         if (fd >= 0 && !at_exit) {
616                 rec->bytes_written = 0;
617                 rec->session->header.data_size = 0;
618         }
619
620         if (!quiet)
621                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
622                         data->file.path, timestamp);
623
624         /* Output tracking events */
625         if (!at_exit) {
626                 record__synthesize(rec, false);
627
628                 /*
629                  * In 'perf record --switch-output' without -a,
630                  * record__synthesize() in record__switch_output() won't
631                  * generate tracking events because there's no thread_map
632                  * in evlist. Which causes newly created perf.data doesn't
633                  * contain map and comm information.
634                  * Create a fake thread_map and directly call
635                  * perf_event__synthesize_thread_map() for those events.
636                  */
637                 if (target__none(&rec->opts.target))
638                         record__synthesize_workload(rec, false);
639         }
640         return fd;
641 }
642
643 static volatile int workload_exec_errno;
644
645 /*
646  * perf_evlist__prepare_workload will send a SIGUSR1
647  * if the fork fails, since we asked by setting its
648  * want_signal to true.
649  */
650 static void workload_exec_failed_signal(int signo __maybe_unused,
651                                         siginfo_t *info,
652                                         void *ucontext __maybe_unused)
653 {
654         workload_exec_errno = info->si_value.sival_int;
655         done = 1;
656         child_finished = 1;
657 }
658
659 static void snapshot_sig_handler(int sig);
660 static void alarm_sig_handler(int sig);
661
662 int __weak
663 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
664                             struct perf_tool *tool __maybe_unused,
665                             perf_event__handler_t process __maybe_unused,
666                             struct machine *machine __maybe_unused)
667 {
668         return 0;
669 }
670
671 static const struct perf_event_mmap_page *
672 perf_evlist__pick_pc(struct perf_evlist *evlist)
673 {
674         if (evlist) {
675                 if (evlist->mmap && evlist->mmap[0].base)
676                         return evlist->mmap[0].base;
677                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
678                         return evlist->backward_mmap[0].base;
679         }
680         return NULL;
681 }
682
683 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
684 {
685         const struct perf_event_mmap_page *pc;
686
687         pc = perf_evlist__pick_pc(rec->evlist);
688         if (pc)
689                 return pc;
690         return NULL;
691 }
692
693 static int record__synthesize(struct record *rec, bool tail)
694 {
695         struct perf_session *session = rec->session;
696         struct machine *machine = &session->machines.host;
697         struct perf_data *data = &rec->data;
698         struct record_opts *opts = &rec->opts;
699         struct perf_tool *tool = &rec->tool;
700         int fd = perf_data__fd(data);
701         int err = 0;
702
703         if (rec->opts.tail_synthesize != tail)
704                 return 0;
705
706         if (data->is_pipe) {
707                 err = perf_event__synthesize_features(
708                         tool, session, rec->evlist, process_synthesized_event);
709                 if (err < 0) {
710                         pr_err("Couldn't synthesize features.\n");
711                         return err;
712                 }
713
714                 err = perf_event__synthesize_attrs(tool, session,
715                                                    process_synthesized_event);
716                 if (err < 0) {
717                         pr_err("Couldn't synthesize attrs.\n");
718                         goto out;
719                 }
720
721                 if (have_tracepoints(&rec->evlist->entries)) {
722                         /*
723                          * FIXME err <= 0 here actually means that
724                          * there were no tracepoints so its not really
725                          * an error, just that we don't need to
726                          * synthesize anything.  We really have to
727                          * return this more properly and also
728                          * propagate errors that now are calling die()
729                          */
730                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
731                                                                   process_synthesized_event);
732                         if (err <= 0) {
733                                 pr_err("Couldn't record tracing data.\n");
734                                 goto out;
735                         }
736                         rec->bytes_written += err;
737                 }
738         }
739
740         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
741                                           process_synthesized_event, machine);
742         if (err)
743                 goto out;
744
745         if (rec->opts.full_auxtrace) {
746                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
747                                         session, process_synthesized_event);
748                 if (err)
749                         goto out;
750         }
751
752         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
753                                                  machine);
754         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
755                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
756                            "Check /proc/kallsyms permission or run as root.\n");
757
758         err = perf_event__synthesize_modules(tool, process_synthesized_event,
759                                              machine);
760         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
761                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
762                            "Check /proc/modules permission or run as root.\n");
763
764         if (perf_guest) {
765                 machines__process_guests(&session->machines,
766                                          perf_event__synthesize_guest_os, tool);
767         }
768
769         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
770                                             process_synthesized_event, opts->sample_address,
771                                             opts->proc_map_timeout, 1);
772 out:
773         return err;
774 }
775
776 static int __cmd_record(struct record *rec, int argc, const char **argv)
777 {
778         int err;
779         int status = 0;
780         unsigned long waking = 0;
781         const bool forks = argc > 0;
782         struct machine *machine;
783         struct perf_tool *tool = &rec->tool;
784         struct record_opts *opts = &rec->opts;
785         struct perf_data *data = &rec->data;
786         struct perf_session *session;
787         bool disabled = false, draining = false;
788         int fd;
789
790         rec->progname = argv[0];
791
792         atexit(record__sig_exit);
793         signal(SIGCHLD, sig_handler);
794         signal(SIGINT, sig_handler);
795         signal(SIGTERM, sig_handler);
796         signal(SIGSEGV, sigsegv_handler);
797
798         if (rec->opts.record_namespaces)
799                 tool->namespace_events = true;
800
801         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
802                 signal(SIGUSR2, snapshot_sig_handler);
803                 if (rec->opts.auxtrace_snapshot_mode)
804                         trigger_on(&auxtrace_snapshot_trigger);
805                 if (rec->switch_output.enabled)
806                         trigger_on(&switch_output_trigger);
807         } else {
808                 signal(SIGUSR2, SIG_IGN);
809         }
810
811         session = perf_session__new(data, false, tool);
812         if (session == NULL) {
813                 pr_err("Perf session creation failed.\n");
814                 return -1;
815         }
816
817         fd = perf_data__fd(data);
818         rec->session = session;
819
820         record__init_features(rec);
821
822         if (forks) {
823                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
824                                                     argv, data->is_pipe,
825                                                     workload_exec_failed_signal);
826                 if (err < 0) {
827                         pr_err("Couldn't run the workload!\n");
828                         status = err;
829                         goto out_delete_session;
830                 }
831         }
832
833         if (record__open(rec) != 0) {
834                 err = -1;
835                 goto out_child;
836         }
837
838         err = bpf__apply_obj_config();
839         if (err) {
840                 char errbuf[BUFSIZ];
841
842                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
843                 pr_err("ERROR: Apply config to BPF failed: %s\n",
844                          errbuf);
845                 goto out_child;
846         }
847
848         /*
849          * Normally perf_session__new would do this, but it doesn't have the
850          * evlist.
851          */
852         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
853                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
854                 rec->tool.ordered_events = false;
855         }
856
857         if (!rec->evlist->nr_groups)
858                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
859
860         if (data->is_pipe) {
861                 err = perf_header__write_pipe(fd);
862                 if (err < 0)
863                         goto out_child;
864         } else {
865                 err = perf_session__write_header(session, rec->evlist, fd, false);
866                 if (err < 0)
867                         goto out_child;
868         }
869
870         if (!rec->no_buildid
871             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
872                 pr_err("Couldn't generate buildids. "
873                        "Use --no-buildid to profile anyway.\n");
874                 err = -1;
875                 goto out_child;
876         }
877
878         machine = &session->machines.host;
879
880         err = record__synthesize(rec, false);
881         if (err < 0)
882                 goto out_child;
883
884         if (rec->realtime_prio) {
885                 struct sched_param param;
886
887                 param.sched_priority = rec->realtime_prio;
888                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
889                         pr_err("Could not set realtime priority.\n");
890                         err = -1;
891                         goto out_child;
892                 }
893         }
894
895         /*
896          * When perf is starting the traced process, all the events
897          * (apart from group members) have enable_on_exec=1 set,
898          * so don't spoil it by prematurely enabling them.
899          */
900         if (!target__none(&opts->target) && !opts->initial_delay)
901                 perf_evlist__enable(rec->evlist);
902
903         /*
904          * Let the child rip
905          */
906         if (forks) {
907                 union perf_event *event;
908                 pid_t tgid;
909
910                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
911                 if (event == NULL) {
912                         err = -ENOMEM;
913                         goto out_child;
914                 }
915
916                 /*
917                  * Some H/W events are generated before COMM event
918                  * which is emitted during exec(), so perf script
919                  * cannot see a correct process name for those events.
920                  * Synthesize COMM event to prevent it.
921                  */
922                 tgid = perf_event__synthesize_comm(tool, event,
923                                                    rec->evlist->workload.pid,
924                                                    process_synthesized_event,
925                                                    machine);
926                 free(event);
927
928                 if (tgid == -1)
929                         goto out_child;
930
931                 event = malloc(sizeof(event->namespaces) +
932                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
933                                machine->id_hdr_size);
934                 if (event == NULL) {
935                         err = -ENOMEM;
936                         goto out_child;
937                 }
938
939                 /*
940                  * Synthesize NAMESPACES event for the command specified.
941                  */
942                 perf_event__synthesize_namespaces(tool, event,
943                                                   rec->evlist->workload.pid,
944                                                   tgid, process_synthesized_event,
945                                                   machine);
946                 free(event);
947
948                 perf_evlist__start_workload(rec->evlist);
949         }
950
951         if (opts->initial_delay) {
952                 usleep(opts->initial_delay * USEC_PER_MSEC);
953                 perf_evlist__enable(rec->evlist);
954         }
955
956         trigger_ready(&auxtrace_snapshot_trigger);
957         trigger_ready(&switch_output_trigger);
958         perf_hooks__invoke_record_start();
959         for (;;) {
960                 unsigned long long hits = rec->samples;
961
962                 /*
963                  * rec->evlist->bkw_mmap_state is possible to be
964                  * BKW_MMAP_EMPTY here: when done == true and
965                  * hits != rec->samples in previous round.
966                  *
967                  * perf_evlist__toggle_bkw_mmap ensure we never
968                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
969                  */
970                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
971                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
972
973                 if (record__mmap_read_all(rec) < 0) {
974                         trigger_error(&auxtrace_snapshot_trigger);
975                         trigger_error(&switch_output_trigger);
976                         err = -1;
977                         goto out_child;
978                 }
979
980                 if (auxtrace_record__snapshot_started) {
981                         auxtrace_record__snapshot_started = 0;
982                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
983                                 record__read_auxtrace_snapshot(rec);
984                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
985                                 pr_err("AUX area tracing snapshot failed\n");
986                                 err = -1;
987                                 goto out_child;
988                         }
989                 }
990
991                 if (trigger_is_hit(&switch_output_trigger)) {
992                         /*
993                          * If switch_output_trigger is hit, the data in
994                          * overwritable ring buffer should have been collected,
995                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
996                          *
997                          * If SIGUSR2 raise after or during record__mmap_read_all(),
998                          * record__mmap_read_all() didn't collect data from
999                          * overwritable ring buffer. Read again.
1000                          */
1001                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1002                                 continue;
1003                         trigger_ready(&switch_output_trigger);
1004
1005                         /*
1006                          * Reenable events in overwrite ring buffer after
1007                          * record__mmap_read_all(): we should have collected
1008                          * data from it.
1009                          */
1010                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1011
1012                         if (!quiet)
1013                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1014                                         waking);
1015                         waking = 0;
1016                         fd = record__switch_output(rec, false);
1017                         if (fd < 0) {
1018                                 pr_err("Failed to switch to new file\n");
1019                                 trigger_error(&switch_output_trigger);
1020                                 err = fd;
1021                                 goto out_child;
1022                         }
1023
1024                         /* re-arm the alarm */
1025                         if (rec->switch_output.time)
1026                                 alarm(rec->switch_output.time);
1027                 }
1028
1029                 if (hits == rec->samples) {
1030                         if (done || draining)
1031                                 break;
1032                         err = perf_evlist__poll(rec->evlist, -1);
1033                         /*
1034                          * Propagate error, only if there's any. Ignore positive
1035                          * number of returned events and interrupt error.
1036                          */
1037                         if (err > 0 || (err < 0 && errno == EINTR))
1038                                 err = 0;
1039                         waking++;
1040
1041                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1042                                 draining = true;
1043                 }
1044
1045                 /*
1046                  * When perf is starting the traced process, at the end events
1047                  * die with the process and we wait for that. Thus no need to
1048                  * disable events in this case.
1049                  */
1050                 if (done && !disabled && !target__none(&opts->target)) {
1051                         trigger_off(&auxtrace_snapshot_trigger);
1052                         perf_evlist__disable(rec->evlist);
1053                         disabled = true;
1054                 }
1055         }
1056         trigger_off(&auxtrace_snapshot_trigger);
1057         trigger_off(&switch_output_trigger);
1058
1059         if (forks && workload_exec_errno) {
1060                 char msg[STRERR_BUFSIZE];
1061                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1062                 pr_err("Workload failed: %s\n", emsg);
1063                 err = -1;
1064                 goto out_child;
1065         }
1066
1067         if (!quiet)
1068                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1069
1070         if (target__none(&rec->opts.target))
1071                 record__synthesize_workload(rec, true);
1072
1073 out_child:
1074         if (forks) {
1075                 int exit_status;
1076
1077                 if (!child_finished)
1078                         kill(rec->evlist->workload.pid, SIGTERM);
1079
1080                 wait(&exit_status);
1081
1082                 if (err < 0)
1083                         status = err;
1084                 else if (WIFEXITED(exit_status))
1085                         status = WEXITSTATUS(exit_status);
1086                 else if (WIFSIGNALED(exit_status))
1087                         signr = WTERMSIG(exit_status);
1088         } else
1089                 status = err;
1090
1091         record__synthesize(rec, true);
1092         /* this will be recalculated during process_buildids() */
1093         rec->samples = 0;
1094
1095         if (!err) {
1096                 if (!rec->timestamp_filename) {
1097                         record__finish_output(rec);
1098                 } else {
1099                         fd = record__switch_output(rec, true);
1100                         if (fd < 0) {
1101                                 status = fd;
1102                                 goto out_delete_session;
1103                         }
1104                 }
1105         }
1106
1107         perf_hooks__invoke_record_end();
1108
1109         if (!err && !quiet) {
1110                 char samples[128];
1111                 const char *postfix = rec->timestamp_filename ?
1112                                         ".<timestamp>" : "";
1113
1114                 if (rec->samples && !rec->opts.full_auxtrace)
1115                         scnprintf(samples, sizeof(samples),
1116                                   " (%" PRIu64 " samples)", rec->samples);
1117                 else
1118                         samples[0] = '\0';
1119
1120                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1121                         perf_data__size(data) / 1024.0 / 1024.0,
1122                         data->file.path, postfix, samples);
1123         }
1124
1125 out_delete_session:
1126         perf_session__delete(session);
1127         return status;
1128 }
1129
1130 static void callchain_debug(struct callchain_param *callchain)
1131 {
1132         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1133
1134         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1135
1136         if (callchain->record_mode == CALLCHAIN_DWARF)
1137                 pr_debug("callchain: stack dump size %d\n",
1138                          callchain->dump_size);
1139 }
1140
1141 int record_opts__parse_callchain(struct record_opts *record,
1142                                  struct callchain_param *callchain,
1143                                  const char *arg, bool unset)
1144 {
1145         int ret;
1146         callchain->enabled = !unset;
1147
1148         /* --no-call-graph */
1149         if (unset) {
1150                 callchain->record_mode = CALLCHAIN_NONE;
1151                 pr_debug("callchain: disabled\n");
1152                 return 0;
1153         }
1154
1155         ret = parse_callchain_record_opt(arg, callchain);
1156         if (!ret) {
1157                 /* Enable data address sampling for DWARF unwind. */
1158                 if (callchain->record_mode == CALLCHAIN_DWARF)
1159                         record->sample_address = true;
1160                 callchain_debug(callchain);
1161         }
1162
1163         return ret;
1164 }
1165
1166 int record_parse_callchain_opt(const struct option *opt,
1167                                const char *arg,
1168                                int unset)
1169 {
1170         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1171 }
1172
1173 int record_callchain_opt(const struct option *opt,
1174                          const char *arg __maybe_unused,
1175                          int unset __maybe_unused)
1176 {
1177         struct callchain_param *callchain = opt->value;
1178
1179         callchain->enabled = true;
1180
1181         if (callchain->record_mode == CALLCHAIN_NONE)
1182                 callchain->record_mode = CALLCHAIN_FP;
1183
1184         callchain_debug(callchain);
1185         return 0;
1186 }
1187
1188 static int perf_record_config(const char *var, const char *value, void *cb)
1189 {
1190         struct record *rec = cb;
1191
1192         if (!strcmp(var, "record.build-id")) {
1193                 if (!strcmp(value, "cache"))
1194                         rec->no_buildid_cache = false;
1195                 else if (!strcmp(value, "no-cache"))
1196                         rec->no_buildid_cache = true;
1197                 else if (!strcmp(value, "skip"))
1198                         rec->no_buildid = true;
1199                 else
1200                         return -1;
1201                 return 0;
1202         }
1203         if (!strcmp(var, "record.call-graph"))
1204                 var = "call-graph.record-mode"; /* fall-through */
1205
1206         return perf_default_config(var, value, cb);
1207 }
1208
1209 struct clockid_map {
1210         const char *name;
1211         int clockid;
1212 };
1213
1214 #define CLOCKID_MAP(n, c)       \
1215         { .name = n, .clockid = (c), }
1216
1217 #define CLOCKID_END     { .name = NULL, }
1218
1219
1220 /*
1221  * Add the missing ones, we need to build on many distros...
1222  */
1223 #ifndef CLOCK_MONOTONIC_RAW
1224 #define CLOCK_MONOTONIC_RAW 4
1225 #endif
1226 #ifndef CLOCK_BOOTTIME
1227 #define CLOCK_BOOTTIME 7
1228 #endif
1229 #ifndef CLOCK_TAI
1230 #define CLOCK_TAI 11
1231 #endif
1232
1233 static const struct clockid_map clockids[] = {
1234         /* available for all events, NMI safe */
1235         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1236         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1237
1238         /* available for some events */
1239         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1240         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1241         CLOCKID_MAP("tai", CLOCK_TAI),
1242
1243         /* available for the lazy */
1244         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1245         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1246         CLOCKID_MAP("real", CLOCK_REALTIME),
1247         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1248
1249         CLOCKID_END,
1250 };
1251
1252 static int parse_clockid(const struct option *opt, const char *str, int unset)
1253 {
1254         struct record_opts *opts = (struct record_opts *)opt->value;
1255         const struct clockid_map *cm;
1256         const char *ostr = str;
1257
1258         if (unset) {
1259                 opts->use_clockid = 0;
1260                 return 0;
1261         }
1262
1263         /* no arg passed */
1264         if (!str)
1265                 return 0;
1266
1267         /* no setting it twice */
1268         if (opts->use_clockid)
1269                 return -1;
1270
1271         opts->use_clockid = true;
1272
1273         /* if its a number, we're done */
1274         if (sscanf(str, "%d", &opts->clockid) == 1)
1275                 return 0;
1276
1277         /* allow a "CLOCK_" prefix to the name */
1278         if (!strncasecmp(str, "CLOCK_", 6))
1279                 str += 6;
1280
1281         for (cm = clockids; cm->name; cm++) {
1282                 if (!strcasecmp(str, cm->name)) {
1283                         opts->clockid = cm->clockid;
1284                         return 0;
1285                 }
1286         }
1287
1288         opts->use_clockid = false;
1289         ui__warning("unknown clockid %s, check man page\n", ostr);
1290         return -1;
1291 }
1292
1293 static int record__parse_mmap_pages(const struct option *opt,
1294                                     const char *str,
1295                                     int unset __maybe_unused)
1296 {
1297         struct record_opts *opts = opt->value;
1298         char *s, *p;
1299         unsigned int mmap_pages;
1300         int ret;
1301
1302         if (!str)
1303                 return -EINVAL;
1304
1305         s = strdup(str);
1306         if (!s)
1307                 return -ENOMEM;
1308
1309         p = strchr(s, ',');
1310         if (p)
1311                 *p = '\0';
1312
1313         if (*s) {
1314                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1315                 if (ret)
1316                         goto out_free;
1317                 opts->mmap_pages = mmap_pages;
1318         }
1319
1320         if (!p) {
1321                 ret = 0;
1322                 goto out_free;
1323         }
1324
1325         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1326         if (ret)
1327                 goto out_free;
1328
1329         opts->auxtrace_mmap_pages = mmap_pages;
1330
1331 out_free:
1332         free(s);
1333         return ret;
1334 }
1335
1336 static void switch_output_size_warn(struct record *rec)
1337 {
1338         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1339         struct switch_output *s = &rec->switch_output;
1340
1341         wakeup_size /= 2;
1342
1343         if (s->size < wakeup_size) {
1344                 char buf[100];
1345
1346                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1347                 pr_warning("WARNING: switch-output data size lower than "
1348                            "wakeup kernel buffer size (%s) "
1349                            "expect bigger perf.data sizes\n", buf);
1350         }
1351 }
1352
1353 static int switch_output_setup(struct record *rec)
1354 {
1355         struct switch_output *s = &rec->switch_output;
1356         static struct parse_tag tags_size[] = {
1357                 { .tag  = 'B', .mult = 1       },
1358                 { .tag  = 'K', .mult = 1 << 10 },
1359                 { .tag  = 'M', .mult = 1 << 20 },
1360                 { .tag  = 'G', .mult = 1 << 30 },
1361                 { .tag  = 0 },
1362         };
1363         static struct parse_tag tags_time[] = {
1364                 { .tag  = 's', .mult = 1        },
1365                 { .tag  = 'm', .mult = 60       },
1366                 { .tag  = 'h', .mult = 60*60    },
1367                 { .tag  = 'd', .mult = 60*60*24 },
1368                 { .tag  = 0 },
1369         };
1370         unsigned long val;
1371
1372         if (!s->set)
1373                 return 0;
1374
1375         if (!strcmp(s->str, "signal")) {
1376                 s->signal = true;
1377                 pr_debug("switch-output with SIGUSR2 signal\n");
1378                 goto enabled;
1379         }
1380
1381         val = parse_tag_value(s->str, tags_size);
1382         if (val != (unsigned long) -1) {
1383                 s->size = val;
1384                 pr_debug("switch-output with %s size threshold\n", s->str);
1385                 goto enabled;
1386         }
1387
1388         val = parse_tag_value(s->str, tags_time);
1389         if (val != (unsigned long) -1) {
1390                 s->time = val;
1391                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1392                          s->str, s->time);
1393                 goto enabled;
1394         }
1395
1396         return -1;
1397
1398 enabled:
1399         rec->timestamp_filename = true;
1400         s->enabled              = true;
1401
1402         if (s->size && !rec->opts.no_buffering)
1403                 switch_output_size_warn(rec);
1404
1405         return 0;
1406 }
1407
1408 static const char * const __record_usage[] = {
1409         "perf record [<options>] [<command>]",
1410         "perf record [<options>] -- <command> [<options>]",
1411         NULL
1412 };
1413 const char * const *record_usage = __record_usage;
1414
1415 /*
1416  * XXX Ideally would be local to cmd_record() and passed to a record__new
1417  * because we need to have access to it in record__exit, that is called
1418  * after cmd_record() exits, but since record_options need to be accessible to
1419  * builtin-script, leave it here.
1420  *
1421  * At least we don't ouch it in all the other functions here directly.
1422  *
1423  * Just say no to tons of global variables, sigh.
1424  */
1425 static struct record record = {
1426         .opts = {
1427                 .sample_time         = true,
1428                 .mmap_pages          = UINT_MAX,
1429                 .user_freq           = UINT_MAX,
1430                 .user_interval       = ULLONG_MAX,
1431                 .freq                = 4000,
1432                 .target              = {
1433                         .uses_mmap   = true,
1434                         .default_per_cpu = true,
1435                 },
1436                 .proc_map_timeout     = 500,
1437         },
1438         .tool = {
1439                 .sample         = process_sample_event,
1440                 .fork           = perf_event__process_fork,
1441                 .exit           = perf_event__process_exit,
1442                 .comm           = perf_event__process_comm,
1443                 .namespaces     = perf_event__process_namespaces,
1444                 .mmap           = perf_event__process_mmap,
1445                 .mmap2          = perf_event__process_mmap2,
1446                 .ordered_events = true,
1447         },
1448 };
1449
1450 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1451         "\n\t\t\t\tDefault: fp";
1452
1453 static bool dry_run;
1454
1455 /*
1456  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1457  * with it and switch to use the library functions in perf_evlist that came
1458  * from builtin-record.c, i.e. use record_opts,
1459  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1460  * using pipes, etc.
1461  */
1462 static struct option __record_options[] = {
1463         OPT_CALLBACK('e', "event", &record.evlist, "event",
1464                      "event selector. use 'perf list' to list available events",
1465                      parse_events_option),
1466         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1467                      "event filter", parse_filter),
1468         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1469                            NULL, "don't record events from perf itself",
1470                            exclude_perf),
1471         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1472                     "record events on existing process id"),
1473         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1474                     "record events on existing thread id"),
1475         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1476                     "collect data with this RT SCHED_FIFO priority"),
1477         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1478                     "collect data without buffering"),
1479         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1480                     "collect raw sample records from all opened counters"),
1481         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1482                             "system-wide collection from all CPUs"),
1483         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1484                     "list of cpus to monitor"),
1485         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1486         OPT_STRING('o', "output", &record.data.file.path, "file",
1487                     "output file name"),
1488         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1489                         &record.opts.no_inherit_set,
1490                         "child tasks do not inherit counters"),
1491         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1492                     "synthesize non-sample events at the end of output"),
1493         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1494         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1495         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1496                      "number of mmap data pages and AUX area tracing mmap pages",
1497                      record__parse_mmap_pages),
1498         OPT_BOOLEAN(0, "group", &record.opts.group,
1499                     "put the counters into a counter group"),
1500         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1501                            NULL, "enables call-graph recording" ,
1502                            &record_callchain_opt),
1503         OPT_CALLBACK(0, "call-graph", &record.opts,
1504                      "record_mode[,record_size]", record_callchain_help,
1505                      &record_parse_callchain_opt),
1506         OPT_INCR('v', "verbose", &verbose,
1507                     "be more verbose (show counter open errors, etc)"),
1508         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1509         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1510                     "per thread counts"),
1511         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1512         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1513                     "Record the sample physical addresses"),
1514         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1515         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1516                         &record.opts.sample_time_set,
1517                         "Record the sample timestamps"),
1518         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1519         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1520                     "don't sample"),
1521         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1522                         &record.no_buildid_cache_set,
1523                         "do not update the buildid cache"),
1524         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1525                         &record.no_buildid_set,
1526                         "do not collect buildids in perf.data"),
1527         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1528                      "monitor event in cgroup name only",
1529                      parse_cgroups),
1530         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1531                   "ms to wait before starting measurement after program start"),
1532         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1533                    "user to profile"),
1534
1535         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1536                      "branch any", "sample any taken branches",
1537                      parse_branch_stack),
1538
1539         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1540                      "branch filter mask", "branch stack filter modes",
1541                      parse_branch_stack),
1542         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1543                     "sample by weight (on special events only)"),
1544         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1545                     "sample transaction flags (special events only)"),
1546         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1547                     "use per-thread mmaps"),
1548         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1549                     "sample selected machine registers on interrupt,"
1550                     " use -I ? to list register names", parse_regs),
1551         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1552                     "sample selected machine registers on interrupt,"
1553                     " use -I ? to list register names", parse_regs),
1554         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1555                     "Record running/enabled time of read (:S) events"),
1556         OPT_CALLBACK('k', "clockid", &record.opts,
1557         "clockid", "clockid to use for events, see clock_gettime()",
1558         parse_clockid),
1559         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1560                           "opts", "AUX area tracing Snapshot Mode", ""),
1561         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1562                         "per thread proc mmap processing timeout in ms"),
1563         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1564                     "Record namespaces events"),
1565         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1566                     "Record context switch events"),
1567         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1568                          "Configure all used events to run in kernel space.",
1569                          PARSE_OPT_EXCLUSIVE),
1570         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1571                          "Configure all used events to run in user space.",
1572                          PARSE_OPT_EXCLUSIVE),
1573         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1574                    "clang binary to use for compiling BPF scriptlets"),
1575         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1576                    "options passed to clang when compiling BPF scriptlets"),
1577         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1578                    "file", "vmlinux pathname"),
1579         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1580                     "Record build-id of all DSOs regardless of hits"),
1581         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1582                     "append timestamp to output filename"),
1583         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1584                           &record.switch_output.set, "signal,size,time",
1585                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1586                           "signal"),
1587         OPT_BOOLEAN(0, "dry-run", &dry_run,
1588                     "Parse options then exit"),
1589         OPT_END()
1590 };
1591
1592 struct option *record_options = __record_options;
1593
1594 int cmd_record(int argc, const char **argv)
1595 {
1596         int err;
1597         struct record *rec = &record;
1598         char errbuf[BUFSIZ];
1599
1600 #ifndef HAVE_LIBBPF_SUPPORT
1601 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1602         set_nobuild('\0', "clang-path", true);
1603         set_nobuild('\0', "clang-opt", true);
1604 # undef set_nobuild
1605 #endif
1606
1607 #ifndef HAVE_BPF_PROLOGUE
1608 # if !defined (HAVE_DWARF_SUPPORT)
1609 #  define REASON  "NO_DWARF=1"
1610 # elif !defined (HAVE_LIBBPF_SUPPORT)
1611 #  define REASON  "NO_LIBBPF=1"
1612 # else
1613 #  define REASON  "this architecture doesn't support BPF prologue"
1614 # endif
1615 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1616         set_nobuild('\0', "vmlinux", true);
1617 # undef set_nobuild
1618 # undef REASON
1619 #endif
1620
1621         rec->evlist = perf_evlist__new();
1622         if (rec->evlist == NULL)
1623                 return -ENOMEM;
1624
1625         err = perf_config(perf_record_config, rec);
1626         if (err)
1627                 return err;
1628
1629         argc = parse_options(argc, argv, record_options, record_usage,
1630                             PARSE_OPT_STOP_AT_NON_OPTION);
1631         if (quiet)
1632                 perf_quiet_option();
1633
1634         /* Make system wide (-a) the default target. */
1635         if (!argc && target__none(&rec->opts.target))
1636                 rec->opts.target.system_wide = true;
1637
1638         if (nr_cgroups && !rec->opts.target.system_wide) {
1639                 usage_with_options_msg(record_usage, record_options,
1640                         "cgroup monitoring only available in system-wide mode");
1641
1642         }
1643         if (rec->opts.record_switch_events &&
1644             !perf_can_record_switch_events()) {
1645                 ui__error("kernel does not support recording context switch events\n");
1646                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1647                 return -EINVAL;
1648         }
1649
1650         if (switch_output_setup(rec)) {
1651                 parse_options_usage(record_usage, record_options, "switch-output", 0);
1652                 return -EINVAL;
1653         }
1654
1655         if (rec->switch_output.time) {
1656                 signal(SIGALRM, alarm_sig_handler);
1657                 alarm(rec->switch_output.time);
1658         }
1659
1660         if (!rec->itr) {
1661                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1662                 if (err)
1663                         goto out;
1664         }
1665
1666         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1667                                               rec->opts.auxtrace_snapshot_opts);
1668         if (err)
1669                 goto out;
1670
1671         /*
1672          * Allow aliases to facilitate the lookup of symbols for address
1673          * filters. Refer to auxtrace_parse_filters().
1674          */
1675         symbol_conf.allow_aliases = true;
1676
1677         symbol__init(NULL);
1678
1679         err = auxtrace_parse_filters(rec->evlist);
1680         if (err)
1681                 goto out;
1682
1683         if (dry_run)
1684                 goto out;
1685
1686         err = bpf__setup_stdout(rec->evlist);
1687         if (err) {
1688                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1689                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1690                          errbuf);
1691                 goto out;
1692         }
1693
1694         err = -ENOMEM;
1695
1696         if (symbol_conf.kptr_restrict)
1697                 pr_warning(
1698 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1699 "check /proc/sys/kernel/kptr_restrict.\n\n"
1700 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1701 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1702 "Samples in kernel modules won't be resolved at all.\n\n"
1703 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1704 "even with a suitable vmlinux or kallsyms file.\n\n");
1705
1706         if (rec->no_buildid_cache || rec->no_buildid) {
1707                 disable_buildid_cache();
1708         } else if (rec->switch_output.enabled) {
1709                 /*
1710                  * In 'perf record --switch-output', disable buildid
1711                  * generation by default to reduce data file switching
1712                  * overhead. Still generate buildid if they are required
1713                  * explicitly using
1714                  *
1715                  *  perf record --switch-output --no-no-buildid \
1716                  *              --no-no-buildid-cache
1717                  *
1718                  * Following code equals to:
1719                  *
1720                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1721                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1722                  *         disable_buildid_cache();
1723                  */
1724                 bool disable = true;
1725
1726                 if (rec->no_buildid_set && !rec->no_buildid)
1727                         disable = false;
1728                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1729                         disable = false;
1730                 if (disable) {
1731                         rec->no_buildid = true;
1732                         rec->no_buildid_cache = true;
1733                         disable_buildid_cache();
1734                 }
1735         }
1736
1737         if (record.opts.overwrite)
1738                 record.opts.tail_synthesize = true;
1739
1740         if (rec->evlist->nr_entries == 0 &&
1741             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1742                 pr_err("Not enough memory for event selector list\n");
1743                 goto out;
1744         }
1745
1746         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1747                 rec->opts.no_inherit = true;
1748
1749         err = target__validate(&rec->opts.target);
1750         if (err) {
1751                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1752                 ui__warning("%s", errbuf);
1753         }
1754
1755         err = target__parse_uid(&rec->opts.target);
1756         if (err) {
1757                 int saved_errno = errno;
1758
1759                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1760                 ui__error("%s", errbuf);
1761
1762                 err = -saved_errno;
1763                 goto out;
1764         }
1765
1766         /* Enable ignoring missing threads when -u option is defined. */
1767         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1768
1769         err = -ENOMEM;
1770         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1771                 usage_with_options(record_usage, record_options);
1772
1773         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1774         if (err)
1775                 goto out;
1776
1777         /*
1778          * We take all buildids when the file contains
1779          * AUX area tracing data because we do not decode the
1780          * trace because it would take too long.
1781          */
1782         if (rec->opts.full_auxtrace)
1783                 rec->buildid_all = true;
1784
1785         if (record_opts__config(&rec->opts)) {
1786                 err = -EINVAL;
1787                 goto out;
1788         }
1789
1790         err = __cmd_record(&record, argc, argv);
1791 out:
1792         perf_evlist__delete(rec->evlist);
1793         symbol__exit();
1794         auxtrace_record__free(rec->itr);
1795         return err;
1796 }
1797
1798 static void snapshot_sig_handler(int sig __maybe_unused)
1799 {
1800         struct record *rec = &record;
1801
1802         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1803                 trigger_hit(&auxtrace_snapshot_trigger);
1804                 auxtrace_record__snapshot_started = 1;
1805                 if (auxtrace_record__snapshot_start(record.itr))
1806                         trigger_error(&auxtrace_snapshot_trigger);
1807         }
1808
1809         if (switch_output_signal(rec))
1810                 trigger_hit(&switch_output_trigger);
1811 }
1812
1813 static void alarm_sig_handler(int sig __maybe_unused)
1814 {
1815         struct record *rec = &record;
1816
1817         if (switch_output_time(rec))
1818                 trigger_hit(&switch_output_trigger);
1819 }