s390/vdso: drop unnecessary cc-ldoption
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "perf.h"
12
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
45 #include "asm/bug.h"
46
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <locale.h>
50 #include <poll.h>
51 #include <unistd.h>
52 #include <sched.h>
53 #include <signal.h>
54 #include <sys/mman.h>
55 #include <sys/wait.h>
56 #include <linux/time64.h>
57
58 struct switch_output {
59         bool             enabled;
60         bool             signal;
61         unsigned long    size;
62         unsigned long    time;
63         const char      *str;
64         bool             set;
65         char             **filenames;
66         int              num_files;
67         int              cur_file;
68 };
69
70 struct record {
71         struct perf_tool        tool;
72         struct record_opts      opts;
73         u64                     bytes_written;
74         struct perf_data        data;
75         struct auxtrace_record  *itr;
76         struct perf_evlist      *evlist;
77         struct perf_session     *session;
78         int                     realtime_prio;
79         bool                    no_buildid;
80         bool                    no_buildid_set;
81         bool                    no_buildid_cache;
82         bool                    no_buildid_cache_set;
83         bool                    buildid_all;
84         bool                    timestamp_filename;
85         bool                    timestamp_boundary;
86         struct switch_output    switch_output;
87         unsigned long long      samples;
88         cpu_set_t               affinity_mask;
89 };
90
91 static volatile int auxtrace_record__snapshot_started;
92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93 static DEFINE_TRIGGER(switch_output_trigger);
94
95 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96         "SYS", "NODE", "CPU"
97 };
98
99 static bool switch_output_signal(struct record *rec)
100 {
101         return rec->switch_output.signal &&
102                trigger_is_ready(&switch_output_trigger);
103 }
104
105 static bool switch_output_size(struct record *rec)
106 {
107         return rec->switch_output.size &&
108                trigger_is_ready(&switch_output_trigger) &&
109                (rec->bytes_written >= rec->switch_output.size);
110 }
111
112 static bool switch_output_time(struct record *rec)
113 {
114         return rec->switch_output.time &&
115                trigger_is_ready(&switch_output_trigger);
116 }
117
118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119                          void *bf, size_t size)
120 {
121         struct perf_data_file *file = &rec->session->data->file;
122
123         if (perf_data_file__write(file, bf, size) < 0) {
124                 pr_err("failed to write perf data, error: %m\n");
125                 return -1;
126         }
127
128         rec->bytes_written += size;
129
130         if (switch_output_size(rec))
131                 trigger_hit(&switch_output_trigger);
132
133         return 0;
134 }
135
136 #ifdef HAVE_AIO_SUPPORT
137 static int record__aio_write(struct aiocb *cblock, int trace_fd,
138                 void *buf, size_t size, off_t off)
139 {
140         int rc;
141
142         cblock->aio_fildes = trace_fd;
143         cblock->aio_buf    = buf;
144         cblock->aio_nbytes = size;
145         cblock->aio_offset = off;
146         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
147
148         do {
149                 rc = aio_write(cblock);
150                 if (rc == 0) {
151                         break;
152                 } else if (errno != EAGAIN) {
153                         cblock->aio_fildes = -1;
154                         pr_err("failed to queue perf data, error: %m\n");
155                         break;
156                 }
157         } while (1);
158
159         return rc;
160 }
161
162 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
163 {
164         void *rem_buf;
165         off_t rem_off;
166         size_t rem_size;
167         int rc, aio_errno;
168         ssize_t aio_ret, written;
169
170         aio_errno = aio_error(cblock);
171         if (aio_errno == EINPROGRESS)
172                 return 0;
173
174         written = aio_ret = aio_return(cblock);
175         if (aio_ret < 0) {
176                 if (aio_errno != EINTR)
177                         pr_err("failed to write perf data, error: %m\n");
178                 written = 0;
179         }
180
181         rem_size = cblock->aio_nbytes - written;
182
183         if (rem_size == 0) {
184                 cblock->aio_fildes = -1;
185                 /*
186                  * md->refcount is incremented in perf_mmap__push() for
187                  * every enqueued aio write request so decrement it because
188                  * the request is now complete.
189                  */
190                 perf_mmap__put(md);
191                 rc = 1;
192         } else {
193                 /*
194                  * aio write request may require restart with the
195                  * reminder if the kernel didn't write whole
196                  * chunk at once.
197                  */
198                 rem_off = cblock->aio_offset + written;
199                 rem_buf = (void *)(cblock->aio_buf + written);
200                 record__aio_write(cblock, cblock->aio_fildes,
201                                 rem_buf, rem_size, rem_off);
202                 rc = 0;
203         }
204
205         return rc;
206 }
207
208 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
209 {
210         struct aiocb **aiocb = md->aio.aiocb;
211         struct aiocb *cblocks = md->aio.cblocks;
212         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
213         int i, do_suspend;
214
215         do {
216                 do_suspend = 0;
217                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
219                                 if (sync_all)
220                                         aiocb[i] = NULL;
221                                 else
222                                         return i;
223                         } else {
224                                 /*
225                                  * Started aio write is not complete yet
226                                  * so it has to be waited before the
227                                  * next allocation.
228                                  */
229                                 aiocb[i] = &cblocks[i];
230                                 do_suspend = 1;
231                         }
232                 }
233                 if (!do_suspend)
234                         return -1;
235
236                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
237                         if (!(errno == EAGAIN || errno == EINTR))
238                                 pr_err("failed to sync perf data, error: %m\n");
239                 }
240         } while (1);
241 }
242
243 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
244 {
245         struct record *rec = to;
246         int ret, trace_fd = rec->session->data->file.fd;
247
248         rec->samples++;
249
250         ret = record__aio_write(cblock, trace_fd, bf, size, off);
251         if (!ret) {
252                 rec->bytes_written += size;
253                 if (switch_output_size(rec))
254                         trigger_hit(&switch_output_trigger);
255         }
256
257         return ret;
258 }
259
260 static off_t record__aio_get_pos(int trace_fd)
261 {
262         return lseek(trace_fd, 0, SEEK_CUR);
263 }
264
265 static void record__aio_set_pos(int trace_fd, off_t pos)
266 {
267         lseek(trace_fd, pos, SEEK_SET);
268 }
269
270 static void record__aio_mmap_read_sync(struct record *rec)
271 {
272         int i;
273         struct perf_evlist *evlist = rec->evlist;
274         struct perf_mmap *maps = evlist->mmap;
275
276         if (!rec->opts.nr_cblocks)
277                 return;
278
279         for (i = 0; i < evlist->nr_mmaps; i++) {
280                 struct perf_mmap *map = &maps[i];
281
282                 if (map->base)
283                         record__aio_sync(map, true);
284         }
285 }
286
287 static int nr_cblocks_default = 1;
288 static int nr_cblocks_max = 4;
289
290 static int record__aio_parse(const struct option *opt,
291                              const char *str,
292                              int unset)
293 {
294         struct record_opts *opts = (struct record_opts *)opt->value;
295
296         if (unset) {
297                 opts->nr_cblocks = 0;
298         } else {
299                 if (str)
300                         opts->nr_cblocks = strtol(str, NULL, 0);
301                 if (!opts->nr_cblocks)
302                         opts->nr_cblocks = nr_cblocks_default;
303         }
304
305         return 0;
306 }
307 #else /* HAVE_AIO_SUPPORT */
308 static int nr_cblocks_max = 0;
309
310 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
311 {
312         return -1;
313 }
314
315 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316                 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317 {
318         return -1;
319 }
320
321 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
322 {
323         return -1;
324 }
325
326 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327 {
328 }
329
330 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
331 {
332 }
333 #endif
334
335 static int record__aio_enabled(struct record *rec)
336 {
337         return rec->opts.nr_cblocks > 0;
338 }
339
340 static int process_synthesized_event(struct perf_tool *tool,
341                                      union perf_event *event,
342                                      struct perf_sample *sample __maybe_unused,
343                                      struct machine *machine __maybe_unused)
344 {
345         struct record *rec = container_of(tool, struct record, tool);
346         return record__write(rec, NULL, event, event->header.size);
347 }
348
349 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
350 {
351         struct record *rec = to;
352
353         rec->samples++;
354         return record__write(rec, map, bf, size);
355 }
356
357 static volatile int done;
358 static volatile int signr = -1;
359 static volatile int child_finished;
360
361 static void sig_handler(int sig)
362 {
363         if (sig == SIGCHLD)
364                 child_finished = 1;
365         else
366                 signr = sig;
367
368         done = 1;
369 }
370
371 static void sigsegv_handler(int sig)
372 {
373         perf_hooks__recover();
374         sighandler_dump_stack(sig);
375 }
376
377 static void record__sig_exit(void)
378 {
379         if (signr == -1)
380                 return;
381
382         signal(signr, SIG_DFL);
383         raise(signr);
384 }
385
386 #ifdef HAVE_AUXTRACE_SUPPORT
387
388 static int record__process_auxtrace(struct perf_tool *tool,
389                                     struct perf_mmap *map,
390                                     union perf_event *event, void *data1,
391                                     size_t len1, void *data2, size_t len2)
392 {
393         struct record *rec = container_of(tool, struct record, tool);
394         struct perf_data *data = &rec->data;
395         size_t padding;
396         u8 pad[8] = {0};
397
398         if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
399                 off_t file_offset;
400                 int fd = perf_data__fd(data);
401                 int err;
402
403                 file_offset = lseek(fd, 0, SEEK_CUR);
404                 if (file_offset == -1)
405                         return -1;
406                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
407                                                      event, file_offset);
408                 if (err)
409                         return err;
410         }
411
412         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
413         padding = (len1 + len2) & 7;
414         if (padding)
415                 padding = 8 - padding;
416
417         record__write(rec, map, event, event->header.size);
418         record__write(rec, map, data1, len1);
419         if (len2)
420                 record__write(rec, map, data2, len2);
421         record__write(rec, map, &pad, padding);
422
423         return 0;
424 }
425
426 static int record__auxtrace_mmap_read(struct record *rec,
427                                       struct perf_mmap *map)
428 {
429         int ret;
430
431         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
432                                   record__process_auxtrace);
433         if (ret < 0)
434                 return ret;
435
436         if (ret)
437                 rec->samples++;
438
439         return 0;
440 }
441
442 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
443                                                struct perf_mmap *map)
444 {
445         int ret;
446
447         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
448                                            record__process_auxtrace,
449                                            rec->opts.auxtrace_snapshot_size);
450         if (ret < 0)
451                 return ret;
452
453         if (ret)
454                 rec->samples++;
455
456         return 0;
457 }
458
459 static int record__auxtrace_read_snapshot_all(struct record *rec)
460 {
461         int i;
462         int rc = 0;
463
464         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
465                 struct perf_mmap *map = &rec->evlist->mmap[i];
466
467                 if (!map->auxtrace_mmap.base)
468                         continue;
469
470                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
471                         rc = -1;
472                         goto out;
473                 }
474         }
475 out:
476         return rc;
477 }
478
479 static void record__read_auxtrace_snapshot(struct record *rec)
480 {
481         pr_debug("Recording AUX area tracing snapshot\n");
482         if (record__auxtrace_read_snapshot_all(rec) < 0) {
483                 trigger_error(&auxtrace_snapshot_trigger);
484         } else {
485                 if (auxtrace_record__snapshot_finish(rec->itr))
486                         trigger_error(&auxtrace_snapshot_trigger);
487                 else
488                         trigger_ready(&auxtrace_snapshot_trigger);
489         }
490 }
491
492 static int record__auxtrace_init(struct record *rec)
493 {
494         int err;
495
496         if (!rec->itr) {
497                 rec->itr = auxtrace_record__init(rec->evlist, &err);
498                 if (err)
499                         return err;
500         }
501
502         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
503                                               rec->opts.auxtrace_snapshot_opts);
504         if (err)
505                 return err;
506
507         return auxtrace_parse_filters(rec->evlist);
508 }
509
510 #else
511
512 static inline
513 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
514                                struct perf_mmap *map __maybe_unused)
515 {
516         return 0;
517 }
518
519 static inline
520 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
521 {
522 }
523
524 static inline
525 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
526 {
527         return 0;
528 }
529
530 static int record__auxtrace_init(struct record *rec __maybe_unused)
531 {
532         return 0;
533 }
534
535 #endif
536
537 static int record__mmap_evlist(struct record *rec,
538                                struct perf_evlist *evlist)
539 {
540         struct record_opts *opts = &rec->opts;
541         char msg[512];
542
543         if (opts->affinity != PERF_AFFINITY_SYS)
544                 cpu__setup_cpunode_map();
545
546         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
547                                  opts->auxtrace_mmap_pages,
548                                  opts->auxtrace_snapshot_mode,
549                                  opts->nr_cblocks, opts->affinity) < 0) {
550                 if (errno == EPERM) {
551                         pr_err("Permission error mapping pages.\n"
552                                "Consider increasing "
553                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
554                                "or try again with a smaller value of -m/--mmap_pages.\n"
555                                "(current value: %u,%u)\n",
556                                opts->mmap_pages, opts->auxtrace_mmap_pages);
557                         return -errno;
558                 } else {
559                         pr_err("failed to mmap with %d (%s)\n", errno,
560                                 str_error_r(errno, msg, sizeof(msg)));
561                         if (errno)
562                                 return -errno;
563                         else
564                                 return -EINVAL;
565                 }
566         }
567         return 0;
568 }
569
570 static int record__mmap(struct record *rec)
571 {
572         return record__mmap_evlist(rec, rec->evlist);
573 }
574
575 static int record__open(struct record *rec)
576 {
577         char msg[BUFSIZ];
578         struct perf_evsel *pos;
579         struct perf_evlist *evlist = rec->evlist;
580         struct perf_session *session = rec->session;
581         struct record_opts *opts = &rec->opts;
582         int rc = 0;
583
584         /*
585          * For initial_delay we need to add a dummy event so that we can track
586          * PERF_RECORD_MMAP while we wait for the initial delay to enable the
587          * real events, the ones asked by the user.
588          */
589         if (opts->initial_delay) {
590                 if (perf_evlist__add_dummy(evlist))
591                         return -ENOMEM;
592
593                 pos = perf_evlist__first(evlist);
594                 pos->tracking = 0;
595                 pos = perf_evlist__last(evlist);
596                 pos->tracking = 1;
597                 pos->attr.enable_on_exec = 1;
598         }
599
600         perf_evlist__config(evlist, opts, &callchain_param);
601
602         evlist__for_each_entry(evlist, pos) {
603 try_again:
604                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
605                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
606                                 if (verbose > 0)
607                                         ui__warning("%s\n", msg);
608                                 goto try_again;
609                         }
610                         if ((errno == EINVAL || errno == EBADF) &&
611                             pos->leader != pos &&
612                             pos->weak_group) {
613                                 pos = perf_evlist__reset_weak_group(evlist, pos);
614                                 goto try_again;
615                         }
616                         rc = -errno;
617                         perf_evsel__open_strerror(pos, &opts->target,
618                                                   errno, msg, sizeof(msg));
619                         ui__error("%s\n", msg);
620                         goto out;
621                 }
622
623                 pos->supported = true;
624         }
625
626         if (perf_evlist__apply_filters(evlist, &pos)) {
627                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
628                         pos->filter, perf_evsel__name(pos), errno,
629                         str_error_r(errno, msg, sizeof(msg)));
630                 rc = -1;
631                 goto out;
632         }
633
634         rc = record__mmap(rec);
635         if (rc)
636                 goto out;
637
638         session->evlist = evlist;
639         perf_session__set_id_hdr_size(session);
640 out:
641         return rc;
642 }
643
644 static int process_sample_event(struct perf_tool *tool,
645                                 union perf_event *event,
646                                 struct perf_sample *sample,
647                                 struct perf_evsel *evsel,
648                                 struct machine *machine)
649 {
650         struct record *rec = container_of(tool, struct record, tool);
651
652         if (rec->evlist->first_sample_time == 0)
653                 rec->evlist->first_sample_time = sample->time;
654
655         rec->evlist->last_sample_time = sample->time;
656
657         if (rec->buildid_all)
658                 return 0;
659
660         rec->samples++;
661         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
662 }
663
664 static int process_buildids(struct record *rec)
665 {
666         struct perf_session *session = rec->session;
667
668         if (perf_data__size(&rec->data) == 0)
669                 return 0;
670
671         /*
672          * During this process, it'll load kernel map and replace the
673          * dso->long_name to a real pathname it found.  In this case
674          * we prefer the vmlinux path like
675          *   /lib/modules/3.16.4/build/vmlinux
676          *
677          * rather than build-id path (in debug directory).
678          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
679          */
680         symbol_conf.ignore_vmlinux_buildid = true;
681
682         /*
683          * If --buildid-all is given, it marks all DSO regardless of hits,
684          * so no need to process samples. But if timestamp_boundary is enabled,
685          * it still needs to walk on all samples to get the timestamps of
686          * first/last samples.
687          */
688         if (rec->buildid_all && !rec->timestamp_boundary)
689                 rec->tool.sample = NULL;
690
691         return perf_session__process_events(session);
692 }
693
694 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
695 {
696         int err;
697         struct perf_tool *tool = data;
698         /*
699          *As for guest kernel when processing subcommand record&report,
700          *we arrange module mmap prior to guest kernel mmap and trigger
701          *a preload dso because default guest module symbols are loaded
702          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
703          *method is used to avoid symbol missing when the first addr is
704          *in module instead of in guest kernel.
705          */
706         err = perf_event__synthesize_modules(tool, process_synthesized_event,
707                                              machine);
708         if (err < 0)
709                 pr_err("Couldn't record guest kernel [%d]'s reference"
710                        " relocation symbol.\n", machine->pid);
711
712         /*
713          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
714          * have no _text sometimes.
715          */
716         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
717                                                  machine);
718         if (err < 0)
719                 pr_err("Couldn't record guest kernel [%d]'s reference"
720                        " relocation symbol.\n", machine->pid);
721 }
722
723 static struct perf_event_header finished_round_event = {
724         .size = sizeof(struct perf_event_header),
725         .type = PERF_RECORD_FINISHED_ROUND,
726 };
727
728 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
729 {
730         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
731             !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
732                 CPU_ZERO(&rec->affinity_mask);
733                 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
734                 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
735         }
736 }
737
738 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
739                                     bool overwrite)
740 {
741         u64 bytes_written = rec->bytes_written;
742         int i;
743         int rc = 0;
744         struct perf_mmap *maps;
745         int trace_fd = rec->data.file.fd;
746         off_t off;
747
748         if (!evlist)
749                 return 0;
750
751         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
752         if (!maps)
753                 return 0;
754
755         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
756                 return 0;
757
758         if (record__aio_enabled(rec))
759                 off = record__aio_get_pos(trace_fd);
760
761         for (i = 0; i < evlist->nr_mmaps; i++) {
762                 struct perf_mmap *map = &maps[i];
763
764                 if (map->base) {
765                         record__adjust_affinity(rec, map);
766                         if (!record__aio_enabled(rec)) {
767                                 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
768                                         rc = -1;
769                                         goto out;
770                                 }
771                         } else {
772                                 int idx;
773                                 /*
774                                  * Call record__aio_sync() to wait till map->data buffer
775                                  * becomes available after previous aio write request.
776                                  */
777                                 idx = record__aio_sync(map, false);
778                                 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
779                                         record__aio_set_pos(trace_fd, off);
780                                         rc = -1;
781                                         goto out;
782                                 }
783                         }
784                 }
785
786                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
787                     record__auxtrace_mmap_read(rec, map) != 0) {
788                         rc = -1;
789                         goto out;
790                 }
791         }
792
793         if (record__aio_enabled(rec))
794                 record__aio_set_pos(trace_fd, off);
795
796         /*
797          * Mark the round finished in case we wrote
798          * at least one event.
799          */
800         if (bytes_written != rec->bytes_written)
801                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
802
803         if (overwrite)
804                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
805 out:
806         return rc;
807 }
808
809 static int record__mmap_read_all(struct record *rec)
810 {
811         int err;
812
813         err = record__mmap_read_evlist(rec, rec->evlist, false);
814         if (err)
815                 return err;
816
817         return record__mmap_read_evlist(rec, rec->evlist, true);
818 }
819
820 static void record__init_features(struct record *rec)
821 {
822         struct perf_session *session = rec->session;
823         int feat;
824
825         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
826                 perf_header__set_feat(&session->header, feat);
827
828         if (rec->no_buildid)
829                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
830
831         if (!have_tracepoints(&rec->evlist->entries))
832                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
833
834         if (!rec->opts.branch_stack)
835                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
836
837         if (!rec->opts.full_auxtrace)
838                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
839
840         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
841                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
842
843         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
844
845         perf_header__clear_feat(&session->header, HEADER_STAT);
846 }
847
848 static void
849 record__finish_output(struct record *rec)
850 {
851         struct perf_data *data = &rec->data;
852         int fd = perf_data__fd(data);
853
854         if (data->is_pipe)
855                 return;
856
857         rec->session->header.data_size += rec->bytes_written;
858         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
859
860         if (!rec->no_buildid) {
861                 process_buildids(rec);
862
863                 if (rec->buildid_all)
864                         dsos__hit_all(rec->session);
865         }
866         perf_session__write_header(rec->session, rec->evlist, fd, true);
867
868         return;
869 }
870
871 static int record__synthesize_workload(struct record *rec, bool tail)
872 {
873         int err;
874         struct thread_map *thread_map;
875
876         if (rec->opts.tail_synthesize != tail)
877                 return 0;
878
879         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
880         if (thread_map == NULL)
881                 return -1;
882
883         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
884                                                  process_synthesized_event,
885                                                  &rec->session->machines.host,
886                                                  rec->opts.sample_address);
887         thread_map__put(thread_map);
888         return err;
889 }
890
891 static int record__synthesize(struct record *rec, bool tail);
892
893 static int
894 record__switch_output(struct record *rec, bool at_exit)
895 {
896         struct perf_data *data = &rec->data;
897         int fd, err;
898         char *new_filename;
899
900         /* Same Size:      "2015122520103046"*/
901         char timestamp[] = "InvalidTimestamp";
902
903         record__aio_mmap_read_sync(rec);
904
905         record__synthesize(rec, true);
906         if (target__none(&rec->opts.target))
907                 record__synthesize_workload(rec, true);
908
909         rec->samples = 0;
910         record__finish_output(rec);
911         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
912         if (err) {
913                 pr_err("Failed to get current timestamp\n");
914                 return -EINVAL;
915         }
916
917         fd = perf_data__switch(data, timestamp,
918                                     rec->session->header.data_offset,
919                                     at_exit, &new_filename);
920         if (fd >= 0 && !at_exit) {
921                 rec->bytes_written = 0;
922                 rec->session->header.data_size = 0;
923         }
924
925         if (!quiet)
926                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
927                         data->path, timestamp);
928
929         if (rec->switch_output.num_files) {
930                 int n = rec->switch_output.cur_file + 1;
931
932                 if (n >= rec->switch_output.num_files)
933                         n = 0;
934                 rec->switch_output.cur_file = n;
935                 if (rec->switch_output.filenames[n]) {
936                         remove(rec->switch_output.filenames[n]);
937                         free(rec->switch_output.filenames[n]);
938                 }
939                 rec->switch_output.filenames[n] = new_filename;
940         } else {
941                 free(new_filename);
942         }
943
944         /* Output tracking events */
945         if (!at_exit) {
946                 record__synthesize(rec, false);
947
948                 /*
949                  * In 'perf record --switch-output' without -a,
950                  * record__synthesize() in record__switch_output() won't
951                  * generate tracking events because there's no thread_map
952                  * in evlist. Which causes newly created perf.data doesn't
953                  * contain map and comm information.
954                  * Create a fake thread_map and directly call
955                  * perf_event__synthesize_thread_map() for those events.
956                  */
957                 if (target__none(&rec->opts.target))
958                         record__synthesize_workload(rec, false);
959         }
960         return fd;
961 }
962
963 static volatile int workload_exec_errno;
964
965 /*
966  * perf_evlist__prepare_workload will send a SIGUSR1
967  * if the fork fails, since we asked by setting its
968  * want_signal to true.
969  */
970 static void workload_exec_failed_signal(int signo __maybe_unused,
971                                         siginfo_t *info,
972                                         void *ucontext __maybe_unused)
973 {
974         workload_exec_errno = info->si_value.sival_int;
975         done = 1;
976         child_finished = 1;
977 }
978
979 static void snapshot_sig_handler(int sig);
980 static void alarm_sig_handler(int sig);
981
982 int __weak
983 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
984                             struct perf_tool *tool __maybe_unused,
985                             perf_event__handler_t process __maybe_unused,
986                             struct machine *machine __maybe_unused)
987 {
988         return 0;
989 }
990
991 static const struct perf_event_mmap_page *
992 perf_evlist__pick_pc(struct perf_evlist *evlist)
993 {
994         if (evlist) {
995                 if (evlist->mmap && evlist->mmap[0].base)
996                         return evlist->mmap[0].base;
997                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
998                         return evlist->overwrite_mmap[0].base;
999         }
1000         return NULL;
1001 }
1002
1003 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1004 {
1005         const struct perf_event_mmap_page *pc;
1006
1007         pc = perf_evlist__pick_pc(rec->evlist);
1008         if (pc)
1009                 return pc;
1010         return NULL;
1011 }
1012
1013 static int record__synthesize(struct record *rec, bool tail)
1014 {
1015         struct perf_session *session = rec->session;
1016         struct machine *machine = &session->machines.host;
1017         struct perf_data *data = &rec->data;
1018         struct record_opts *opts = &rec->opts;
1019         struct perf_tool *tool = &rec->tool;
1020         int fd = perf_data__fd(data);
1021         int err = 0;
1022
1023         if (rec->opts.tail_synthesize != tail)
1024                 return 0;
1025
1026         if (data->is_pipe) {
1027                 /*
1028                  * We need to synthesize events first, because some
1029                  * features works on top of them (on report side).
1030                  */
1031                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1032                                                    process_synthesized_event);
1033                 if (err < 0) {
1034                         pr_err("Couldn't synthesize attrs.\n");
1035                         goto out;
1036                 }
1037
1038                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1039                                                       process_synthesized_event);
1040                 if (err < 0) {
1041                         pr_err("Couldn't synthesize features.\n");
1042                         return err;
1043                 }
1044
1045                 if (have_tracepoints(&rec->evlist->entries)) {
1046                         /*
1047                          * FIXME err <= 0 here actually means that
1048                          * there were no tracepoints so its not really
1049                          * an error, just that we don't need to
1050                          * synthesize anything.  We really have to
1051                          * return this more properly and also
1052                          * propagate errors that now are calling die()
1053                          */
1054                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1055                                                                   process_synthesized_event);
1056                         if (err <= 0) {
1057                                 pr_err("Couldn't record tracing data.\n");
1058                                 goto out;
1059                         }
1060                         rec->bytes_written += err;
1061                 }
1062         }
1063
1064         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1065                                           process_synthesized_event, machine);
1066         if (err)
1067                 goto out;
1068
1069         if (rec->opts.full_auxtrace) {
1070                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1071                                         session, process_synthesized_event);
1072                 if (err)
1073                         goto out;
1074         }
1075
1076         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1077                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1078                                                          machine);
1079                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1080                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1081                                    "Check /proc/kallsyms permission or run as root.\n");
1082
1083                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1084                                                      machine);
1085                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1086                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1087                                    "Check /proc/modules permission or run as root.\n");
1088         }
1089
1090         if (perf_guest) {
1091                 machines__process_guests(&session->machines,
1092                                          perf_event__synthesize_guest_os, tool);
1093         }
1094
1095         err = perf_event__synthesize_extra_attr(&rec->tool,
1096                                                 rec->evlist,
1097                                                 process_synthesized_event,
1098                                                 data->is_pipe);
1099         if (err)
1100                 goto out;
1101
1102         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1103                                                  process_synthesized_event,
1104                                                 NULL);
1105         if (err < 0) {
1106                 pr_err("Couldn't synthesize thread map.\n");
1107                 return err;
1108         }
1109
1110         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1111                                              process_synthesized_event, NULL);
1112         if (err < 0) {
1113                 pr_err("Couldn't synthesize cpu map.\n");
1114                 return err;
1115         }
1116
1117         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1118                                                 machine, opts);
1119         if (err < 0)
1120                 pr_warning("Couldn't synthesize bpf events.\n");
1121
1122         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1123                                             process_synthesized_event, opts->sample_address,
1124                                             1);
1125 out:
1126         return err;
1127 }
1128
1129 static int __cmd_record(struct record *rec, int argc, const char **argv)
1130 {
1131         int err;
1132         int status = 0;
1133         unsigned long waking = 0;
1134         const bool forks = argc > 0;
1135         struct perf_tool *tool = &rec->tool;
1136         struct record_opts *opts = &rec->opts;
1137         struct perf_data *data = &rec->data;
1138         struct perf_session *session;
1139         bool disabled = false, draining = false;
1140         struct perf_evlist *sb_evlist = NULL;
1141         int fd;
1142
1143         atexit(record__sig_exit);
1144         signal(SIGCHLD, sig_handler);
1145         signal(SIGINT, sig_handler);
1146         signal(SIGTERM, sig_handler);
1147         signal(SIGSEGV, sigsegv_handler);
1148
1149         if (rec->opts.record_namespaces)
1150                 tool->namespace_events = true;
1151
1152         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1153                 signal(SIGUSR2, snapshot_sig_handler);
1154                 if (rec->opts.auxtrace_snapshot_mode)
1155                         trigger_on(&auxtrace_snapshot_trigger);
1156                 if (rec->switch_output.enabled)
1157                         trigger_on(&switch_output_trigger);
1158         } else {
1159                 signal(SIGUSR2, SIG_IGN);
1160         }
1161
1162         session = perf_session__new(data, false, tool);
1163         if (session == NULL) {
1164                 pr_err("Perf session creation failed.\n");
1165                 return -1;
1166         }
1167
1168         fd = perf_data__fd(data);
1169         rec->session = session;
1170
1171         record__init_features(rec);
1172
1173         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1174                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1175
1176         if (forks) {
1177                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1178                                                     argv, data->is_pipe,
1179                                                     workload_exec_failed_signal);
1180                 if (err < 0) {
1181                         pr_err("Couldn't run the workload!\n");
1182                         status = err;
1183                         goto out_delete_session;
1184                 }
1185         }
1186
1187         /*
1188          * If we have just single event and are sending data
1189          * through pipe, we need to force the ids allocation,
1190          * because we synthesize event name through the pipe
1191          * and need the id for that.
1192          */
1193         if (data->is_pipe && rec->evlist->nr_entries == 1)
1194                 rec->opts.sample_id = true;
1195
1196         if (record__open(rec) != 0) {
1197                 err = -1;
1198                 goto out_child;
1199         }
1200
1201         err = bpf__apply_obj_config();
1202         if (err) {
1203                 char errbuf[BUFSIZ];
1204
1205                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1206                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1207                          errbuf);
1208                 goto out_child;
1209         }
1210
1211         /*
1212          * Normally perf_session__new would do this, but it doesn't have the
1213          * evlist.
1214          */
1215         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1216                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1217                 rec->tool.ordered_events = false;
1218         }
1219
1220         if (!rec->evlist->nr_groups)
1221                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1222
1223         if (data->is_pipe) {
1224                 err = perf_header__write_pipe(fd);
1225                 if (err < 0)
1226                         goto out_child;
1227         } else {
1228                 err = perf_session__write_header(session, rec->evlist, fd, false);
1229                 if (err < 0)
1230                         goto out_child;
1231         }
1232
1233         if (!rec->no_buildid
1234             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1235                 pr_err("Couldn't generate buildids. "
1236                        "Use --no-buildid to profile anyway.\n");
1237                 err = -1;
1238                 goto out_child;
1239         }
1240
1241         if (!opts->no_bpf_event)
1242                 bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1243
1244         if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1245                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1246                 opts->no_bpf_event = true;
1247         }
1248
1249         err = record__synthesize(rec, false);
1250         if (err < 0)
1251                 goto out_child;
1252
1253         if (rec->realtime_prio) {
1254                 struct sched_param param;
1255
1256                 param.sched_priority = rec->realtime_prio;
1257                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1258                         pr_err("Could not set realtime priority.\n");
1259                         err = -1;
1260                         goto out_child;
1261                 }
1262         }
1263
1264         /*
1265          * When perf is starting the traced process, all the events
1266          * (apart from group members) have enable_on_exec=1 set,
1267          * so don't spoil it by prematurely enabling them.
1268          */
1269         if (!target__none(&opts->target) && !opts->initial_delay)
1270                 perf_evlist__enable(rec->evlist);
1271
1272         /*
1273          * Let the child rip
1274          */
1275         if (forks) {
1276                 struct machine *machine = &session->machines.host;
1277                 union perf_event *event;
1278                 pid_t tgid;
1279
1280                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1281                 if (event == NULL) {
1282                         err = -ENOMEM;
1283                         goto out_child;
1284                 }
1285
1286                 /*
1287                  * Some H/W events are generated before COMM event
1288                  * which is emitted during exec(), so perf script
1289                  * cannot see a correct process name for those events.
1290                  * Synthesize COMM event to prevent it.
1291                  */
1292                 tgid = perf_event__synthesize_comm(tool, event,
1293                                                    rec->evlist->workload.pid,
1294                                                    process_synthesized_event,
1295                                                    machine);
1296                 free(event);
1297
1298                 if (tgid == -1)
1299                         goto out_child;
1300
1301                 event = malloc(sizeof(event->namespaces) +
1302                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1303                                machine->id_hdr_size);
1304                 if (event == NULL) {
1305                         err = -ENOMEM;
1306                         goto out_child;
1307                 }
1308
1309                 /*
1310                  * Synthesize NAMESPACES event for the command specified.
1311                  */
1312                 perf_event__synthesize_namespaces(tool, event,
1313                                                   rec->evlist->workload.pid,
1314                                                   tgid, process_synthesized_event,
1315                                                   machine);
1316                 free(event);
1317
1318                 perf_evlist__start_workload(rec->evlist);
1319         }
1320
1321         if (opts->initial_delay) {
1322                 usleep(opts->initial_delay * USEC_PER_MSEC);
1323                 perf_evlist__enable(rec->evlist);
1324         }
1325
1326         trigger_ready(&auxtrace_snapshot_trigger);
1327         trigger_ready(&switch_output_trigger);
1328         perf_hooks__invoke_record_start();
1329         for (;;) {
1330                 unsigned long long hits = rec->samples;
1331
1332                 /*
1333                  * rec->evlist->bkw_mmap_state is possible to be
1334                  * BKW_MMAP_EMPTY here: when done == true and
1335                  * hits != rec->samples in previous round.
1336                  *
1337                  * perf_evlist__toggle_bkw_mmap ensure we never
1338                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1339                  */
1340                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1341                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1342
1343                 if (record__mmap_read_all(rec) < 0) {
1344                         trigger_error(&auxtrace_snapshot_trigger);
1345                         trigger_error(&switch_output_trigger);
1346                         err = -1;
1347                         goto out_child;
1348                 }
1349
1350                 if (auxtrace_record__snapshot_started) {
1351                         auxtrace_record__snapshot_started = 0;
1352                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1353                                 record__read_auxtrace_snapshot(rec);
1354                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1355                                 pr_err("AUX area tracing snapshot failed\n");
1356                                 err = -1;
1357                                 goto out_child;
1358                         }
1359                 }
1360
1361                 if (trigger_is_hit(&switch_output_trigger)) {
1362                         /*
1363                          * If switch_output_trigger is hit, the data in
1364                          * overwritable ring buffer should have been collected,
1365                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1366                          *
1367                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1368                          * record__mmap_read_all() didn't collect data from
1369                          * overwritable ring buffer. Read again.
1370                          */
1371                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1372                                 continue;
1373                         trigger_ready(&switch_output_trigger);
1374
1375                         /*
1376                          * Reenable events in overwrite ring buffer after
1377                          * record__mmap_read_all(): we should have collected
1378                          * data from it.
1379                          */
1380                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1381
1382                         if (!quiet)
1383                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1384                                         waking);
1385                         waking = 0;
1386                         fd = record__switch_output(rec, false);
1387                         if (fd < 0) {
1388                                 pr_err("Failed to switch to new file\n");
1389                                 trigger_error(&switch_output_trigger);
1390                                 err = fd;
1391                                 goto out_child;
1392                         }
1393
1394                         /* re-arm the alarm */
1395                         if (rec->switch_output.time)
1396                                 alarm(rec->switch_output.time);
1397                 }
1398
1399                 if (hits == rec->samples) {
1400                         if (done || draining)
1401                                 break;
1402                         err = perf_evlist__poll(rec->evlist, -1);
1403                         /*
1404                          * Propagate error, only if there's any. Ignore positive
1405                          * number of returned events and interrupt error.
1406                          */
1407                         if (err > 0 || (err < 0 && errno == EINTR))
1408                                 err = 0;
1409                         waking++;
1410
1411                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1412                                 draining = true;
1413                 }
1414
1415                 /*
1416                  * When perf is starting the traced process, at the end events
1417                  * die with the process and we wait for that. Thus no need to
1418                  * disable events in this case.
1419                  */
1420                 if (done && !disabled && !target__none(&opts->target)) {
1421                         trigger_off(&auxtrace_snapshot_trigger);
1422                         perf_evlist__disable(rec->evlist);
1423                         disabled = true;
1424                 }
1425         }
1426         trigger_off(&auxtrace_snapshot_trigger);
1427         trigger_off(&switch_output_trigger);
1428
1429         if (forks && workload_exec_errno) {
1430                 char msg[STRERR_BUFSIZE];
1431                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1432                 pr_err("Workload failed: %s\n", emsg);
1433                 err = -1;
1434                 goto out_child;
1435         }
1436
1437         if (!quiet)
1438                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1439
1440         if (target__none(&rec->opts.target))
1441                 record__synthesize_workload(rec, true);
1442
1443 out_child:
1444         record__aio_mmap_read_sync(rec);
1445
1446         if (forks) {
1447                 int exit_status;
1448
1449                 if (!child_finished)
1450                         kill(rec->evlist->workload.pid, SIGTERM);
1451
1452                 wait(&exit_status);
1453
1454                 if (err < 0)
1455                         status = err;
1456                 else if (WIFEXITED(exit_status))
1457                         status = WEXITSTATUS(exit_status);
1458                 else if (WIFSIGNALED(exit_status))
1459                         signr = WTERMSIG(exit_status);
1460         } else
1461                 status = err;
1462
1463         record__synthesize(rec, true);
1464         /* this will be recalculated during process_buildids() */
1465         rec->samples = 0;
1466
1467         if (!err) {
1468                 if (!rec->timestamp_filename) {
1469                         record__finish_output(rec);
1470                 } else {
1471                         fd = record__switch_output(rec, true);
1472                         if (fd < 0) {
1473                                 status = fd;
1474                                 goto out_delete_session;
1475                         }
1476                 }
1477         }
1478
1479         perf_hooks__invoke_record_end();
1480
1481         if (!err && !quiet) {
1482                 char samples[128];
1483                 const char *postfix = rec->timestamp_filename ?
1484                                         ".<timestamp>" : "";
1485
1486                 if (rec->samples && !rec->opts.full_auxtrace)
1487                         scnprintf(samples, sizeof(samples),
1488                                   " (%" PRIu64 " samples)", rec->samples);
1489                 else
1490                         samples[0] = '\0';
1491
1492                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1493                         perf_data__size(data) / 1024.0 / 1024.0,
1494                         data->path, postfix, samples);
1495         }
1496
1497 out_delete_session:
1498         perf_session__delete(session);
1499
1500         if (!opts->no_bpf_event)
1501                 perf_evlist__stop_sb_thread(sb_evlist);
1502         return status;
1503 }
1504
1505 static void callchain_debug(struct callchain_param *callchain)
1506 {
1507         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1508
1509         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1510
1511         if (callchain->record_mode == CALLCHAIN_DWARF)
1512                 pr_debug("callchain: stack dump size %d\n",
1513                          callchain->dump_size);
1514 }
1515
1516 int record_opts__parse_callchain(struct record_opts *record,
1517                                  struct callchain_param *callchain,
1518                                  const char *arg, bool unset)
1519 {
1520         int ret;
1521         callchain->enabled = !unset;
1522
1523         /* --no-call-graph */
1524         if (unset) {
1525                 callchain->record_mode = CALLCHAIN_NONE;
1526                 pr_debug("callchain: disabled\n");
1527                 return 0;
1528         }
1529
1530         ret = parse_callchain_record_opt(arg, callchain);
1531         if (!ret) {
1532                 /* Enable data address sampling for DWARF unwind. */
1533                 if (callchain->record_mode == CALLCHAIN_DWARF)
1534                         record->sample_address = true;
1535                 callchain_debug(callchain);
1536         }
1537
1538         return ret;
1539 }
1540
1541 int record_parse_callchain_opt(const struct option *opt,
1542                                const char *arg,
1543                                int unset)
1544 {
1545         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1546 }
1547
1548 int record_callchain_opt(const struct option *opt,
1549                          const char *arg __maybe_unused,
1550                          int unset __maybe_unused)
1551 {
1552         struct callchain_param *callchain = opt->value;
1553
1554         callchain->enabled = true;
1555
1556         if (callchain->record_mode == CALLCHAIN_NONE)
1557                 callchain->record_mode = CALLCHAIN_FP;
1558
1559         callchain_debug(callchain);
1560         return 0;
1561 }
1562
1563 static int perf_record_config(const char *var, const char *value, void *cb)
1564 {
1565         struct record *rec = cb;
1566
1567         if (!strcmp(var, "record.build-id")) {
1568                 if (!strcmp(value, "cache"))
1569                         rec->no_buildid_cache = false;
1570                 else if (!strcmp(value, "no-cache"))
1571                         rec->no_buildid_cache = true;
1572                 else if (!strcmp(value, "skip"))
1573                         rec->no_buildid = true;
1574                 else
1575                         return -1;
1576                 return 0;
1577         }
1578         if (!strcmp(var, "record.call-graph")) {
1579                 var = "call-graph.record-mode";
1580                 return perf_default_config(var, value, cb);
1581         }
1582 #ifdef HAVE_AIO_SUPPORT
1583         if (!strcmp(var, "record.aio")) {
1584                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1585                 if (!rec->opts.nr_cblocks)
1586                         rec->opts.nr_cblocks = nr_cblocks_default;
1587         }
1588 #endif
1589
1590         return 0;
1591 }
1592
1593 struct clockid_map {
1594         const char *name;
1595         int clockid;
1596 };
1597
1598 #define CLOCKID_MAP(n, c)       \
1599         { .name = n, .clockid = (c), }
1600
1601 #define CLOCKID_END     { .name = NULL, }
1602
1603
1604 /*
1605  * Add the missing ones, we need to build on many distros...
1606  */
1607 #ifndef CLOCK_MONOTONIC_RAW
1608 #define CLOCK_MONOTONIC_RAW 4
1609 #endif
1610 #ifndef CLOCK_BOOTTIME
1611 #define CLOCK_BOOTTIME 7
1612 #endif
1613 #ifndef CLOCK_TAI
1614 #define CLOCK_TAI 11
1615 #endif
1616
1617 static const struct clockid_map clockids[] = {
1618         /* available for all events, NMI safe */
1619         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1620         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1621
1622         /* available for some events */
1623         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1624         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1625         CLOCKID_MAP("tai", CLOCK_TAI),
1626
1627         /* available for the lazy */
1628         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1629         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1630         CLOCKID_MAP("real", CLOCK_REALTIME),
1631         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1632
1633         CLOCKID_END,
1634 };
1635
1636 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1637 {
1638         struct timespec res;
1639
1640         *res_ns = 0;
1641         if (!clock_getres(clk_id, &res))
1642                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1643         else
1644                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1645
1646         return 0;
1647 }
1648
1649 static int parse_clockid(const struct option *opt, const char *str, int unset)
1650 {
1651         struct record_opts *opts = (struct record_opts *)opt->value;
1652         const struct clockid_map *cm;
1653         const char *ostr = str;
1654
1655         if (unset) {
1656                 opts->use_clockid = 0;
1657                 return 0;
1658         }
1659
1660         /* no arg passed */
1661         if (!str)
1662                 return 0;
1663
1664         /* no setting it twice */
1665         if (opts->use_clockid)
1666                 return -1;
1667
1668         opts->use_clockid = true;
1669
1670         /* if its a number, we're done */
1671         if (sscanf(str, "%d", &opts->clockid) == 1)
1672                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1673
1674         /* allow a "CLOCK_" prefix to the name */
1675         if (!strncasecmp(str, "CLOCK_", 6))
1676                 str += 6;
1677
1678         for (cm = clockids; cm->name; cm++) {
1679                 if (!strcasecmp(str, cm->name)) {
1680                         opts->clockid = cm->clockid;
1681                         return get_clockid_res(opts->clockid,
1682                                                &opts->clockid_res_ns);
1683                 }
1684         }
1685
1686         opts->use_clockid = false;
1687         ui__warning("unknown clockid %s, check man page\n", ostr);
1688         return -1;
1689 }
1690
1691 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1692 {
1693         struct record_opts *opts = (struct record_opts *)opt->value;
1694
1695         if (unset || !str)
1696                 return 0;
1697
1698         if (!strcasecmp(str, "node"))
1699                 opts->affinity = PERF_AFFINITY_NODE;
1700         else if (!strcasecmp(str, "cpu"))
1701                 opts->affinity = PERF_AFFINITY_CPU;
1702
1703         return 0;
1704 }
1705
1706 static int record__parse_mmap_pages(const struct option *opt,
1707                                     const char *str,
1708                                     int unset __maybe_unused)
1709 {
1710         struct record_opts *opts = opt->value;
1711         char *s, *p;
1712         unsigned int mmap_pages;
1713         int ret;
1714
1715         if (!str)
1716                 return -EINVAL;
1717
1718         s = strdup(str);
1719         if (!s)
1720                 return -ENOMEM;
1721
1722         p = strchr(s, ',');
1723         if (p)
1724                 *p = '\0';
1725
1726         if (*s) {
1727                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1728                 if (ret)
1729                         goto out_free;
1730                 opts->mmap_pages = mmap_pages;
1731         }
1732
1733         if (!p) {
1734                 ret = 0;
1735                 goto out_free;
1736         }
1737
1738         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1739         if (ret)
1740                 goto out_free;
1741
1742         opts->auxtrace_mmap_pages = mmap_pages;
1743
1744 out_free:
1745         free(s);
1746         return ret;
1747 }
1748
1749 static void switch_output_size_warn(struct record *rec)
1750 {
1751         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1752         struct switch_output *s = &rec->switch_output;
1753
1754         wakeup_size /= 2;
1755
1756         if (s->size < wakeup_size) {
1757                 char buf[100];
1758
1759                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1760                 pr_warning("WARNING: switch-output data size lower than "
1761                            "wakeup kernel buffer size (%s) "
1762                            "expect bigger perf.data sizes\n", buf);
1763         }
1764 }
1765
1766 static int switch_output_setup(struct record *rec)
1767 {
1768         struct switch_output *s = &rec->switch_output;
1769         static struct parse_tag tags_size[] = {
1770                 { .tag  = 'B', .mult = 1       },
1771                 { .tag  = 'K', .mult = 1 << 10 },
1772                 { .tag  = 'M', .mult = 1 << 20 },
1773                 { .tag  = 'G', .mult = 1 << 30 },
1774                 { .tag  = 0 },
1775         };
1776         static struct parse_tag tags_time[] = {
1777                 { .tag  = 's', .mult = 1        },
1778                 { .tag  = 'm', .mult = 60       },
1779                 { .tag  = 'h', .mult = 60*60    },
1780                 { .tag  = 'd', .mult = 60*60*24 },
1781                 { .tag  = 0 },
1782         };
1783         unsigned long val;
1784
1785         if (!s->set)
1786                 return 0;
1787
1788         if (!strcmp(s->str, "signal")) {
1789                 s->signal = true;
1790                 pr_debug("switch-output with SIGUSR2 signal\n");
1791                 goto enabled;
1792         }
1793
1794         val = parse_tag_value(s->str, tags_size);
1795         if (val != (unsigned long) -1) {
1796                 s->size = val;
1797                 pr_debug("switch-output with %s size threshold\n", s->str);
1798                 goto enabled;
1799         }
1800
1801         val = parse_tag_value(s->str, tags_time);
1802         if (val != (unsigned long) -1) {
1803                 s->time = val;
1804                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1805                          s->str, s->time);
1806                 goto enabled;
1807         }
1808
1809         return -1;
1810
1811 enabled:
1812         rec->timestamp_filename = true;
1813         s->enabled              = true;
1814
1815         if (s->size && !rec->opts.no_buffering)
1816                 switch_output_size_warn(rec);
1817
1818         return 0;
1819 }
1820
1821 static const char * const __record_usage[] = {
1822         "perf record [<options>] [<command>]",
1823         "perf record [<options>] -- <command> [<options>]",
1824         NULL
1825 };
1826 const char * const *record_usage = __record_usage;
1827
1828 /*
1829  * XXX Ideally would be local to cmd_record() and passed to a record__new
1830  * because we need to have access to it in record__exit, that is called
1831  * after cmd_record() exits, but since record_options need to be accessible to
1832  * builtin-script, leave it here.
1833  *
1834  * At least we don't ouch it in all the other functions here directly.
1835  *
1836  * Just say no to tons of global variables, sigh.
1837  */
1838 static struct record record = {
1839         .opts = {
1840                 .sample_time         = true,
1841                 .mmap_pages          = UINT_MAX,
1842                 .user_freq           = UINT_MAX,
1843                 .user_interval       = ULLONG_MAX,
1844                 .freq                = 4000,
1845                 .target              = {
1846                         .uses_mmap   = true,
1847                         .default_per_cpu = true,
1848                 },
1849         },
1850         .tool = {
1851                 .sample         = process_sample_event,
1852                 .fork           = perf_event__process_fork,
1853                 .exit           = perf_event__process_exit,
1854                 .comm           = perf_event__process_comm,
1855                 .namespaces     = perf_event__process_namespaces,
1856                 .mmap           = perf_event__process_mmap,
1857                 .mmap2          = perf_event__process_mmap2,
1858                 .ordered_events = true,
1859         },
1860 };
1861
1862 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1863         "\n\t\t\t\tDefault: fp";
1864
1865 static bool dry_run;
1866
1867 /*
1868  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1869  * with it and switch to use the library functions in perf_evlist that came
1870  * from builtin-record.c, i.e. use record_opts,
1871  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1872  * using pipes, etc.
1873  */
1874 static struct option __record_options[] = {
1875         OPT_CALLBACK('e', "event", &record.evlist, "event",
1876                      "event selector. use 'perf list' to list available events",
1877                      parse_events_option),
1878         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1879                      "event filter", parse_filter),
1880         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1881                            NULL, "don't record events from perf itself",
1882                            exclude_perf),
1883         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1884                     "record events on existing process id"),
1885         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1886                     "record events on existing thread id"),
1887         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1888                     "collect data with this RT SCHED_FIFO priority"),
1889         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1890                     "collect data without buffering"),
1891         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1892                     "collect raw sample records from all opened counters"),
1893         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1894                             "system-wide collection from all CPUs"),
1895         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1896                     "list of cpus to monitor"),
1897         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1898         OPT_STRING('o', "output", &record.data.path, "file",
1899                     "output file name"),
1900         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1901                         &record.opts.no_inherit_set,
1902                         "child tasks do not inherit counters"),
1903         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1904                     "synthesize non-sample events at the end of output"),
1905         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1906         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
1907         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1908                     "Fail if the specified frequency can't be used"),
1909         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1910                      "profile at this frequency",
1911                       record__parse_freq),
1912         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1913                      "number of mmap data pages and AUX area tracing mmap pages",
1914                      record__parse_mmap_pages),
1915         OPT_BOOLEAN(0, "group", &record.opts.group,
1916                     "put the counters into a counter group"),
1917         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1918                            NULL, "enables call-graph recording" ,
1919                            &record_callchain_opt),
1920         OPT_CALLBACK(0, "call-graph", &record.opts,
1921                      "record_mode[,record_size]", record_callchain_help,
1922                      &record_parse_callchain_opt),
1923         OPT_INCR('v', "verbose", &verbose,
1924                     "be more verbose (show counter open errors, etc)"),
1925         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1926         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1927                     "per thread counts"),
1928         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1929         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1930                     "Record the sample physical addresses"),
1931         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1932         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1933                         &record.opts.sample_time_set,
1934                         "Record the sample timestamps"),
1935         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1936                         "Record the sample period"),
1937         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1938                     "don't sample"),
1939         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1940                         &record.no_buildid_cache_set,
1941                         "do not update the buildid cache"),
1942         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1943                         &record.no_buildid_set,
1944                         "do not collect buildids in perf.data"),
1945         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1946                      "monitor event in cgroup name only",
1947                      parse_cgroups),
1948         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1949                   "ms to wait before starting measurement after program start"),
1950         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1951                    "user to profile"),
1952
1953         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1954                      "branch any", "sample any taken branches",
1955                      parse_branch_stack),
1956
1957         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1958                      "branch filter mask", "branch stack filter modes",
1959                      parse_branch_stack),
1960         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1961                     "sample by weight (on special events only)"),
1962         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1963                     "sample transaction flags (special events only)"),
1964         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1965                     "use per-thread mmaps"),
1966         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1967                     "sample selected machine registers on interrupt,"
1968                     " use -I ? to list register names", parse_regs),
1969         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1970                     "sample selected machine registers on interrupt,"
1971                     " use -I ? to list register names", parse_regs),
1972         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1973                     "Record running/enabled time of read (:S) events"),
1974         OPT_CALLBACK('k', "clockid", &record.opts,
1975         "clockid", "clockid to use for events, see clock_gettime()",
1976         parse_clockid),
1977         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1978                           "opts", "AUX area tracing Snapshot Mode", ""),
1979         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1980                         "per thread proc mmap processing timeout in ms"),
1981         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1982                     "Record namespaces events"),
1983         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1984                     "Record context switch events"),
1985         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1986                          "Configure all used events to run in kernel space.",
1987                          PARSE_OPT_EXCLUSIVE),
1988         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1989                          "Configure all used events to run in user space.",
1990                          PARSE_OPT_EXCLUSIVE),
1991         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1992                    "clang binary to use for compiling BPF scriptlets"),
1993         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1994                    "options passed to clang when compiling BPF scriptlets"),
1995         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1996                    "file", "vmlinux pathname"),
1997         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1998                     "Record build-id of all DSOs regardless of hits"),
1999         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2000                     "append timestamp to output filename"),
2001         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2002                     "Record timestamp boundary (time of first/last samples)"),
2003         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2004                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2005                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2006                           "signal"),
2007         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2008                    "Limit number of switch output generated files"),
2009         OPT_BOOLEAN(0, "dry-run", &dry_run,
2010                     "Parse options then exit"),
2011 #ifdef HAVE_AIO_SUPPORT
2012         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2013                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2014                      record__aio_parse),
2015 #endif
2016         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2017                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2018                      record__parse_affinity),
2019         OPT_END()
2020 };
2021
2022 struct option *record_options = __record_options;
2023
2024 int cmd_record(int argc, const char **argv)
2025 {
2026         int err;
2027         struct record *rec = &record;
2028         char errbuf[BUFSIZ];
2029
2030         setlocale(LC_ALL, "");
2031
2032 #ifndef HAVE_LIBBPF_SUPPORT
2033 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2034         set_nobuild('\0', "clang-path", true);
2035         set_nobuild('\0', "clang-opt", true);
2036 # undef set_nobuild
2037 #endif
2038
2039 #ifndef HAVE_BPF_PROLOGUE
2040 # if !defined (HAVE_DWARF_SUPPORT)
2041 #  define REASON  "NO_DWARF=1"
2042 # elif !defined (HAVE_LIBBPF_SUPPORT)
2043 #  define REASON  "NO_LIBBPF=1"
2044 # else
2045 #  define REASON  "this architecture doesn't support BPF prologue"
2046 # endif
2047 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2048         set_nobuild('\0', "vmlinux", true);
2049 # undef set_nobuild
2050 # undef REASON
2051 #endif
2052
2053         CPU_ZERO(&rec->affinity_mask);
2054         rec->opts.affinity = PERF_AFFINITY_SYS;
2055
2056         rec->evlist = perf_evlist__new();
2057         if (rec->evlist == NULL)
2058                 return -ENOMEM;
2059
2060         err = perf_config(perf_record_config, rec);
2061         if (err)
2062                 return err;
2063
2064         argc = parse_options(argc, argv, record_options, record_usage,
2065                             PARSE_OPT_STOP_AT_NON_OPTION);
2066         if (quiet)
2067                 perf_quiet_option();
2068
2069         /* Make system wide (-a) the default target. */
2070         if (!argc && target__none(&rec->opts.target))
2071                 rec->opts.target.system_wide = true;
2072
2073         if (nr_cgroups && !rec->opts.target.system_wide) {
2074                 usage_with_options_msg(record_usage, record_options,
2075                         "cgroup monitoring only available in system-wide mode");
2076
2077         }
2078         if (rec->opts.record_switch_events &&
2079             !perf_can_record_switch_events()) {
2080                 ui__error("kernel does not support recording context switch events\n");
2081                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2082                 return -EINVAL;
2083         }
2084
2085         if (switch_output_setup(rec)) {
2086                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2087                 return -EINVAL;
2088         }
2089
2090         if (rec->switch_output.time) {
2091                 signal(SIGALRM, alarm_sig_handler);
2092                 alarm(rec->switch_output.time);
2093         }
2094
2095         if (rec->switch_output.num_files) {
2096                 rec->switch_output.filenames = calloc(sizeof(char *),
2097                                                       rec->switch_output.num_files);
2098                 if (!rec->switch_output.filenames)
2099                         return -EINVAL;
2100         }
2101
2102         /*
2103          * Allow aliases to facilitate the lookup of symbols for address
2104          * filters. Refer to auxtrace_parse_filters().
2105          */
2106         symbol_conf.allow_aliases = true;
2107
2108         symbol__init(NULL);
2109
2110         err = record__auxtrace_init(rec);
2111         if (err)
2112                 goto out;
2113
2114         if (dry_run)
2115                 goto out;
2116
2117         err = bpf__setup_stdout(rec->evlist);
2118         if (err) {
2119                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2120                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2121                          errbuf);
2122                 goto out;
2123         }
2124
2125         err = -ENOMEM;
2126
2127         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2128                 pr_warning(
2129 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2130 "check /proc/sys/kernel/kptr_restrict.\n\n"
2131 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2132 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2133 "Samples in kernel modules won't be resolved at all.\n\n"
2134 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2135 "even with a suitable vmlinux or kallsyms file.\n\n");
2136
2137         if (rec->no_buildid_cache || rec->no_buildid) {
2138                 disable_buildid_cache();
2139         } else if (rec->switch_output.enabled) {
2140                 /*
2141                  * In 'perf record --switch-output', disable buildid
2142                  * generation by default to reduce data file switching
2143                  * overhead. Still generate buildid if they are required
2144                  * explicitly using
2145                  *
2146                  *  perf record --switch-output --no-no-buildid \
2147                  *              --no-no-buildid-cache
2148                  *
2149                  * Following code equals to:
2150                  *
2151                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2152                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2153                  *         disable_buildid_cache();
2154                  */
2155                 bool disable = true;
2156
2157                 if (rec->no_buildid_set && !rec->no_buildid)
2158                         disable = false;
2159                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2160                         disable = false;
2161                 if (disable) {
2162                         rec->no_buildid = true;
2163                         rec->no_buildid_cache = true;
2164                         disable_buildid_cache();
2165                 }
2166         }
2167
2168         if (record.opts.overwrite)
2169                 record.opts.tail_synthesize = true;
2170
2171         if (rec->evlist->nr_entries == 0 &&
2172             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2173                 pr_err("Not enough memory for event selector list\n");
2174                 goto out;
2175         }
2176
2177         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2178                 rec->opts.no_inherit = true;
2179
2180         err = target__validate(&rec->opts.target);
2181         if (err) {
2182                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2183                 ui__warning("%s\n", errbuf);
2184         }
2185
2186         err = target__parse_uid(&rec->opts.target);
2187         if (err) {
2188                 int saved_errno = errno;
2189
2190                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2191                 ui__error("%s", errbuf);
2192
2193                 err = -saved_errno;
2194                 goto out;
2195         }
2196
2197         /* Enable ignoring missing threads when -u/-p option is defined. */
2198         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2199
2200         err = -ENOMEM;
2201         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2202                 usage_with_options(record_usage, record_options);
2203
2204         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2205         if (err)
2206                 goto out;
2207
2208         /*
2209          * We take all buildids when the file contains
2210          * AUX area tracing data because we do not decode the
2211          * trace because it would take too long.
2212          */
2213         if (rec->opts.full_auxtrace)
2214                 rec->buildid_all = true;
2215
2216         if (record_opts__config(&rec->opts)) {
2217                 err = -EINVAL;
2218                 goto out;
2219         }
2220
2221         if (rec->opts.nr_cblocks > nr_cblocks_max)
2222                 rec->opts.nr_cblocks = nr_cblocks_max;
2223         if (verbose > 0)
2224                 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2225
2226         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2227
2228         err = __cmd_record(&record, argc, argv);
2229 out:
2230         perf_evlist__delete(rec->evlist);
2231         symbol__exit();
2232         auxtrace_record__free(rec->itr);
2233         return err;
2234 }
2235
2236 static void snapshot_sig_handler(int sig __maybe_unused)
2237 {
2238         struct record *rec = &record;
2239
2240         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2241                 trigger_hit(&auxtrace_snapshot_trigger);
2242                 auxtrace_record__snapshot_started = 1;
2243                 if (auxtrace_record__snapshot_start(record.itr))
2244                         trigger_error(&auxtrace_snapshot_trigger);
2245         }
2246
2247         if (switch_output_signal(rec))
2248                 trigger_hit(&switch_output_trigger);
2249 }
2250
2251 static void alarm_sig_handler(int sig __maybe_unused)
2252 {
2253         struct record *rec = &record;
2254
2255         if (switch_output_time(rec))
2256                 trigger_hit(&switch_output_trigger);
2257 }