Merge tag 's390-5.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "perf.h"
12
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
45 #include "asm/bug.h"
46
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <locale.h>
50 #include <poll.h>
51 #include <unistd.h>
52 #include <sched.h>
53 #include <signal.h>
54 #include <sys/mman.h>
55 #include <sys/wait.h>
56 #include <linux/time64.h>
57
58 struct switch_output {
59         bool             enabled;
60         bool             signal;
61         unsigned long    size;
62         unsigned long    time;
63         const char      *str;
64         bool             set;
65         char             **filenames;
66         int              num_files;
67         int              cur_file;
68 };
69
70 struct record {
71         struct perf_tool        tool;
72         struct record_opts      opts;
73         u64                     bytes_written;
74         struct perf_data        data;
75         struct auxtrace_record  *itr;
76         struct perf_evlist      *evlist;
77         struct perf_session     *session;
78         int                     realtime_prio;
79         bool                    no_buildid;
80         bool                    no_buildid_set;
81         bool                    no_buildid_cache;
82         bool                    no_buildid_cache_set;
83         bool                    buildid_all;
84         bool                    timestamp_filename;
85         bool                    timestamp_boundary;
86         struct switch_output    switch_output;
87         unsigned long long      samples;
88         cpu_set_t               affinity_mask;
89 };
90
91 static volatile int auxtrace_record__snapshot_started;
92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93 static DEFINE_TRIGGER(switch_output_trigger);
94
95 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96         "SYS", "NODE", "CPU"
97 };
98
99 static bool switch_output_signal(struct record *rec)
100 {
101         return rec->switch_output.signal &&
102                trigger_is_ready(&switch_output_trigger);
103 }
104
105 static bool switch_output_size(struct record *rec)
106 {
107         return rec->switch_output.size &&
108                trigger_is_ready(&switch_output_trigger) &&
109                (rec->bytes_written >= rec->switch_output.size);
110 }
111
112 static bool switch_output_time(struct record *rec)
113 {
114         return rec->switch_output.time &&
115                trigger_is_ready(&switch_output_trigger);
116 }
117
118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119                          void *bf, size_t size)
120 {
121         struct perf_data_file *file = &rec->session->data->file;
122
123         if (perf_data_file__write(file, bf, size) < 0) {
124                 pr_err("failed to write perf data, error: %m\n");
125                 return -1;
126         }
127
128         rec->bytes_written += size;
129
130         if (switch_output_size(rec))
131                 trigger_hit(&switch_output_trigger);
132
133         return 0;
134 }
135
136 #ifdef HAVE_AIO_SUPPORT
137 static int record__aio_write(struct aiocb *cblock, int trace_fd,
138                 void *buf, size_t size, off_t off)
139 {
140         int rc;
141
142         cblock->aio_fildes = trace_fd;
143         cblock->aio_buf    = buf;
144         cblock->aio_nbytes = size;
145         cblock->aio_offset = off;
146         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
147
148         do {
149                 rc = aio_write(cblock);
150                 if (rc == 0) {
151                         break;
152                 } else if (errno != EAGAIN) {
153                         cblock->aio_fildes = -1;
154                         pr_err("failed to queue perf data, error: %m\n");
155                         break;
156                 }
157         } while (1);
158
159         return rc;
160 }
161
162 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
163 {
164         void *rem_buf;
165         off_t rem_off;
166         size_t rem_size;
167         int rc, aio_errno;
168         ssize_t aio_ret, written;
169
170         aio_errno = aio_error(cblock);
171         if (aio_errno == EINPROGRESS)
172                 return 0;
173
174         written = aio_ret = aio_return(cblock);
175         if (aio_ret < 0) {
176                 if (aio_errno != EINTR)
177                         pr_err("failed to write perf data, error: %m\n");
178                 written = 0;
179         }
180
181         rem_size = cblock->aio_nbytes - written;
182
183         if (rem_size == 0) {
184                 cblock->aio_fildes = -1;
185                 /*
186                  * md->refcount is incremented in perf_mmap__push() for
187                  * every enqueued aio write request so decrement it because
188                  * the request is now complete.
189                  */
190                 perf_mmap__put(md);
191                 rc = 1;
192         } else {
193                 /*
194                  * aio write request may require restart with the
195                  * reminder if the kernel didn't write whole
196                  * chunk at once.
197                  */
198                 rem_off = cblock->aio_offset + written;
199                 rem_buf = (void *)(cblock->aio_buf + written);
200                 record__aio_write(cblock, cblock->aio_fildes,
201                                 rem_buf, rem_size, rem_off);
202                 rc = 0;
203         }
204
205         return rc;
206 }
207
208 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
209 {
210         struct aiocb **aiocb = md->aio.aiocb;
211         struct aiocb *cblocks = md->aio.cblocks;
212         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
213         int i, do_suspend;
214
215         do {
216                 do_suspend = 0;
217                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
219                                 if (sync_all)
220                                         aiocb[i] = NULL;
221                                 else
222                                         return i;
223                         } else {
224                                 /*
225                                  * Started aio write is not complete yet
226                                  * so it has to be waited before the
227                                  * next allocation.
228                                  */
229                                 aiocb[i] = &cblocks[i];
230                                 do_suspend = 1;
231                         }
232                 }
233                 if (!do_suspend)
234                         return -1;
235
236                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
237                         if (!(errno == EAGAIN || errno == EINTR))
238                                 pr_err("failed to sync perf data, error: %m\n");
239                 }
240         } while (1);
241 }
242
243 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
244 {
245         struct record *rec = to;
246         int ret, trace_fd = rec->session->data->file.fd;
247
248         rec->samples++;
249
250         ret = record__aio_write(cblock, trace_fd, bf, size, off);
251         if (!ret) {
252                 rec->bytes_written += size;
253                 if (switch_output_size(rec))
254                         trigger_hit(&switch_output_trigger);
255         }
256
257         return ret;
258 }
259
260 static off_t record__aio_get_pos(int trace_fd)
261 {
262         return lseek(trace_fd, 0, SEEK_CUR);
263 }
264
265 static void record__aio_set_pos(int trace_fd, off_t pos)
266 {
267         lseek(trace_fd, pos, SEEK_SET);
268 }
269
270 static void record__aio_mmap_read_sync(struct record *rec)
271 {
272         int i;
273         struct perf_evlist *evlist = rec->evlist;
274         struct perf_mmap *maps = evlist->mmap;
275
276         if (!rec->opts.nr_cblocks)
277                 return;
278
279         for (i = 0; i < evlist->nr_mmaps; i++) {
280                 struct perf_mmap *map = &maps[i];
281
282                 if (map->base)
283                         record__aio_sync(map, true);
284         }
285 }
286
287 static int nr_cblocks_default = 1;
288 static int nr_cblocks_max = 4;
289
290 static int record__aio_parse(const struct option *opt,
291                              const char *str,
292                              int unset)
293 {
294         struct record_opts *opts = (struct record_opts *)opt->value;
295
296         if (unset) {
297                 opts->nr_cblocks = 0;
298         } else {
299                 if (str)
300                         opts->nr_cblocks = strtol(str, NULL, 0);
301                 if (!opts->nr_cblocks)
302                         opts->nr_cblocks = nr_cblocks_default;
303         }
304
305         return 0;
306 }
307 #else /* HAVE_AIO_SUPPORT */
308 static int nr_cblocks_max = 0;
309
310 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
311 {
312         return -1;
313 }
314
315 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316                 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317 {
318         return -1;
319 }
320
321 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
322 {
323         return -1;
324 }
325
326 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327 {
328 }
329
330 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
331 {
332 }
333 #endif
334
335 static int record__aio_enabled(struct record *rec)
336 {
337         return rec->opts.nr_cblocks > 0;
338 }
339
340 #define MMAP_FLUSH_DEFAULT 1
341 static int record__mmap_flush_parse(const struct option *opt,
342                                     const char *str,
343                                     int unset)
344 {
345         int flush_max;
346         struct record_opts *opts = (struct record_opts *)opt->value;
347         static struct parse_tag tags[] = {
348                         { .tag  = 'B', .mult = 1       },
349                         { .tag  = 'K', .mult = 1 << 10 },
350                         { .tag  = 'M', .mult = 1 << 20 },
351                         { .tag  = 'G', .mult = 1 << 30 },
352                         { .tag  = 0 },
353         };
354
355         if (unset)
356                 return 0;
357
358         if (str) {
359                 opts->mmap_flush = parse_tag_value(str, tags);
360                 if (opts->mmap_flush == (int)-1)
361                         opts->mmap_flush = strtol(str, NULL, 0);
362         }
363
364         if (!opts->mmap_flush)
365                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
366
367         flush_max = perf_evlist__mmap_size(opts->mmap_pages);
368         flush_max /= 4;
369         if (opts->mmap_flush > flush_max)
370                 opts->mmap_flush = flush_max;
371
372         return 0;
373 }
374
375 static int process_synthesized_event(struct perf_tool *tool,
376                                      union perf_event *event,
377                                      struct perf_sample *sample __maybe_unused,
378                                      struct machine *machine __maybe_unused)
379 {
380         struct record *rec = container_of(tool, struct record, tool);
381         return record__write(rec, NULL, event, event->header.size);
382 }
383
384 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
385 {
386         struct record *rec = to;
387
388         rec->samples++;
389         return record__write(rec, map, bf, size);
390 }
391
392 static volatile int done;
393 static volatile int signr = -1;
394 static volatile int child_finished;
395
396 static void sig_handler(int sig)
397 {
398         if (sig == SIGCHLD)
399                 child_finished = 1;
400         else
401                 signr = sig;
402
403         done = 1;
404 }
405
406 static void sigsegv_handler(int sig)
407 {
408         perf_hooks__recover();
409         sighandler_dump_stack(sig);
410 }
411
412 static void record__sig_exit(void)
413 {
414         if (signr == -1)
415                 return;
416
417         signal(signr, SIG_DFL);
418         raise(signr);
419 }
420
421 #ifdef HAVE_AUXTRACE_SUPPORT
422
423 static int record__process_auxtrace(struct perf_tool *tool,
424                                     struct perf_mmap *map,
425                                     union perf_event *event, void *data1,
426                                     size_t len1, void *data2, size_t len2)
427 {
428         struct record *rec = container_of(tool, struct record, tool);
429         struct perf_data *data = &rec->data;
430         size_t padding;
431         u8 pad[8] = {0};
432
433         if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
434                 off_t file_offset;
435                 int fd = perf_data__fd(data);
436                 int err;
437
438                 file_offset = lseek(fd, 0, SEEK_CUR);
439                 if (file_offset == -1)
440                         return -1;
441                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
442                                                      event, file_offset);
443                 if (err)
444                         return err;
445         }
446
447         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
448         padding = (len1 + len2) & 7;
449         if (padding)
450                 padding = 8 - padding;
451
452         record__write(rec, map, event, event->header.size);
453         record__write(rec, map, data1, len1);
454         if (len2)
455                 record__write(rec, map, data2, len2);
456         record__write(rec, map, &pad, padding);
457
458         return 0;
459 }
460
461 static int record__auxtrace_mmap_read(struct record *rec,
462                                       struct perf_mmap *map)
463 {
464         int ret;
465
466         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
467                                   record__process_auxtrace);
468         if (ret < 0)
469                 return ret;
470
471         if (ret)
472                 rec->samples++;
473
474         return 0;
475 }
476
477 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
478                                                struct perf_mmap *map)
479 {
480         int ret;
481
482         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
483                                            record__process_auxtrace,
484                                            rec->opts.auxtrace_snapshot_size);
485         if (ret < 0)
486                 return ret;
487
488         if (ret)
489                 rec->samples++;
490
491         return 0;
492 }
493
494 static int record__auxtrace_read_snapshot_all(struct record *rec)
495 {
496         int i;
497         int rc = 0;
498
499         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
500                 struct perf_mmap *map = &rec->evlist->mmap[i];
501
502                 if (!map->auxtrace_mmap.base)
503                         continue;
504
505                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
506                         rc = -1;
507                         goto out;
508                 }
509         }
510 out:
511         return rc;
512 }
513
514 static void record__read_auxtrace_snapshot(struct record *rec)
515 {
516         pr_debug("Recording AUX area tracing snapshot\n");
517         if (record__auxtrace_read_snapshot_all(rec) < 0) {
518                 trigger_error(&auxtrace_snapshot_trigger);
519         } else {
520                 if (auxtrace_record__snapshot_finish(rec->itr))
521                         trigger_error(&auxtrace_snapshot_trigger);
522                 else
523                         trigger_ready(&auxtrace_snapshot_trigger);
524         }
525 }
526
527 static int record__auxtrace_init(struct record *rec)
528 {
529         int err;
530
531         if (!rec->itr) {
532                 rec->itr = auxtrace_record__init(rec->evlist, &err);
533                 if (err)
534                         return err;
535         }
536
537         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
538                                               rec->opts.auxtrace_snapshot_opts);
539         if (err)
540                 return err;
541
542         return auxtrace_parse_filters(rec->evlist);
543 }
544
545 #else
546
547 static inline
548 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
549                                struct perf_mmap *map __maybe_unused)
550 {
551         return 0;
552 }
553
554 static inline
555 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
556 {
557 }
558
559 static inline
560 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
561 {
562         return 0;
563 }
564
565 static int record__auxtrace_init(struct record *rec __maybe_unused)
566 {
567         return 0;
568 }
569
570 #endif
571
572 static int record__mmap_evlist(struct record *rec,
573                                struct perf_evlist *evlist)
574 {
575         struct record_opts *opts = &rec->opts;
576         char msg[512];
577
578         if (opts->affinity != PERF_AFFINITY_SYS)
579                 cpu__setup_cpunode_map();
580
581         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
582                                  opts->auxtrace_mmap_pages,
583                                  opts->auxtrace_snapshot_mode,
584                                  opts->nr_cblocks, opts->affinity,
585                                  opts->mmap_flush) < 0) {
586                 if (errno == EPERM) {
587                         pr_err("Permission error mapping pages.\n"
588                                "Consider increasing "
589                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
590                                "or try again with a smaller value of -m/--mmap_pages.\n"
591                                "(current value: %u,%u)\n",
592                                opts->mmap_pages, opts->auxtrace_mmap_pages);
593                         return -errno;
594                 } else {
595                         pr_err("failed to mmap with %d (%s)\n", errno,
596                                 str_error_r(errno, msg, sizeof(msg)));
597                         if (errno)
598                                 return -errno;
599                         else
600                                 return -EINVAL;
601                 }
602         }
603         return 0;
604 }
605
606 static int record__mmap(struct record *rec)
607 {
608         return record__mmap_evlist(rec, rec->evlist);
609 }
610
611 static int record__open(struct record *rec)
612 {
613         char msg[BUFSIZ];
614         struct perf_evsel *pos;
615         struct perf_evlist *evlist = rec->evlist;
616         struct perf_session *session = rec->session;
617         struct record_opts *opts = &rec->opts;
618         int rc = 0;
619
620         /*
621          * For initial_delay we need to add a dummy event so that we can track
622          * PERF_RECORD_MMAP while we wait for the initial delay to enable the
623          * real events, the ones asked by the user.
624          */
625         if (opts->initial_delay) {
626                 if (perf_evlist__add_dummy(evlist))
627                         return -ENOMEM;
628
629                 pos = perf_evlist__first(evlist);
630                 pos->tracking = 0;
631                 pos = perf_evlist__last(evlist);
632                 pos->tracking = 1;
633                 pos->attr.enable_on_exec = 1;
634         }
635
636         perf_evlist__config(evlist, opts, &callchain_param);
637
638         evlist__for_each_entry(evlist, pos) {
639 try_again:
640                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
641                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
642                                 if (verbose > 0)
643                                         ui__warning("%s\n", msg);
644                                 goto try_again;
645                         }
646                         if ((errno == EINVAL || errno == EBADF) &&
647                             pos->leader != pos &&
648                             pos->weak_group) {
649                                 pos = perf_evlist__reset_weak_group(evlist, pos);
650                                 goto try_again;
651                         }
652                         rc = -errno;
653                         perf_evsel__open_strerror(pos, &opts->target,
654                                                   errno, msg, sizeof(msg));
655                         ui__error("%s\n", msg);
656                         goto out;
657                 }
658
659                 pos->supported = true;
660         }
661
662         if (perf_evlist__apply_filters(evlist, &pos)) {
663                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
664                         pos->filter, perf_evsel__name(pos), errno,
665                         str_error_r(errno, msg, sizeof(msg)));
666                 rc = -1;
667                 goto out;
668         }
669
670         rc = record__mmap(rec);
671         if (rc)
672                 goto out;
673
674         session->evlist = evlist;
675         perf_session__set_id_hdr_size(session);
676 out:
677         return rc;
678 }
679
680 static int process_sample_event(struct perf_tool *tool,
681                                 union perf_event *event,
682                                 struct perf_sample *sample,
683                                 struct perf_evsel *evsel,
684                                 struct machine *machine)
685 {
686         struct record *rec = container_of(tool, struct record, tool);
687
688         if (rec->evlist->first_sample_time == 0)
689                 rec->evlist->first_sample_time = sample->time;
690
691         rec->evlist->last_sample_time = sample->time;
692
693         if (rec->buildid_all)
694                 return 0;
695
696         rec->samples++;
697         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
698 }
699
700 static int process_buildids(struct record *rec)
701 {
702         struct perf_session *session = rec->session;
703
704         if (perf_data__size(&rec->data) == 0)
705                 return 0;
706
707         /*
708          * During this process, it'll load kernel map and replace the
709          * dso->long_name to a real pathname it found.  In this case
710          * we prefer the vmlinux path like
711          *   /lib/modules/3.16.4/build/vmlinux
712          *
713          * rather than build-id path (in debug directory).
714          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
715          */
716         symbol_conf.ignore_vmlinux_buildid = true;
717
718         /*
719          * If --buildid-all is given, it marks all DSO regardless of hits,
720          * so no need to process samples. But if timestamp_boundary is enabled,
721          * it still needs to walk on all samples to get the timestamps of
722          * first/last samples.
723          */
724         if (rec->buildid_all && !rec->timestamp_boundary)
725                 rec->tool.sample = NULL;
726
727         return perf_session__process_events(session);
728 }
729
730 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
731 {
732         int err;
733         struct perf_tool *tool = data;
734         /*
735          *As for guest kernel when processing subcommand record&report,
736          *we arrange module mmap prior to guest kernel mmap and trigger
737          *a preload dso because default guest module symbols are loaded
738          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
739          *method is used to avoid symbol missing when the first addr is
740          *in module instead of in guest kernel.
741          */
742         err = perf_event__synthesize_modules(tool, process_synthesized_event,
743                                              machine);
744         if (err < 0)
745                 pr_err("Couldn't record guest kernel [%d]'s reference"
746                        " relocation symbol.\n", machine->pid);
747
748         /*
749          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
750          * have no _text sometimes.
751          */
752         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
753                                                  machine);
754         if (err < 0)
755                 pr_err("Couldn't record guest kernel [%d]'s reference"
756                        " relocation symbol.\n", machine->pid);
757 }
758
759 static struct perf_event_header finished_round_event = {
760         .size = sizeof(struct perf_event_header),
761         .type = PERF_RECORD_FINISHED_ROUND,
762 };
763
764 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
765 {
766         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
767             !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
768                 CPU_ZERO(&rec->affinity_mask);
769                 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
770                 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
771         }
772 }
773
774 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
775                                     bool overwrite, bool synch)
776 {
777         u64 bytes_written = rec->bytes_written;
778         int i;
779         int rc = 0;
780         struct perf_mmap *maps;
781         int trace_fd = rec->data.file.fd;
782         off_t off;
783
784         if (!evlist)
785                 return 0;
786
787         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
788         if (!maps)
789                 return 0;
790
791         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
792                 return 0;
793
794         if (record__aio_enabled(rec))
795                 off = record__aio_get_pos(trace_fd);
796
797         for (i = 0; i < evlist->nr_mmaps; i++) {
798                 u64 flush = 0;
799                 struct perf_mmap *map = &maps[i];
800
801                 if (map->base) {
802                         record__adjust_affinity(rec, map);
803                         if (synch) {
804                                 flush = map->flush;
805                                 map->flush = 1;
806                         }
807                         if (!record__aio_enabled(rec)) {
808                                 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
809                                         if (synch)
810                                                 map->flush = flush;
811                                         rc = -1;
812                                         goto out;
813                                 }
814                         } else {
815                                 int idx;
816                                 /*
817                                  * Call record__aio_sync() to wait till map->data buffer
818                                  * becomes available after previous aio write request.
819                                  */
820                                 idx = record__aio_sync(map, false);
821                                 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
822                                         record__aio_set_pos(trace_fd, off);
823                                         if (synch)
824                                                 map->flush = flush;
825                                         rc = -1;
826                                         goto out;
827                                 }
828                         }
829                         if (synch)
830                                 map->flush = flush;
831                 }
832
833                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
834                     record__auxtrace_mmap_read(rec, map) != 0) {
835                         rc = -1;
836                         goto out;
837                 }
838         }
839
840         if (record__aio_enabled(rec))
841                 record__aio_set_pos(trace_fd, off);
842
843         /*
844          * Mark the round finished in case we wrote
845          * at least one event.
846          */
847         if (bytes_written != rec->bytes_written)
848                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
849
850         if (overwrite)
851                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
852 out:
853         return rc;
854 }
855
856 static int record__mmap_read_all(struct record *rec, bool synch)
857 {
858         int err;
859
860         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
861         if (err)
862                 return err;
863
864         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
865 }
866
867 static void record__init_features(struct record *rec)
868 {
869         struct perf_session *session = rec->session;
870         int feat;
871
872         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
873                 perf_header__set_feat(&session->header, feat);
874
875         if (rec->no_buildid)
876                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
877
878         if (!have_tracepoints(&rec->evlist->entries))
879                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
880
881         if (!rec->opts.branch_stack)
882                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
883
884         if (!rec->opts.full_auxtrace)
885                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
886
887         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
888                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
889
890         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
891
892         perf_header__clear_feat(&session->header, HEADER_STAT);
893 }
894
895 static void
896 record__finish_output(struct record *rec)
897 {
898         struct perf_data *data = &rec->data;
899         int fd = perf_data__fd(data);
900
901         if (data->is_pipe)
902                 return;
903
904         rec->session->header.data_size += rec->bytes_written;
905         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
906
907         if (!rec->no_buildid) {
908                 process_buildids(rec);
909
910                 if (rec->buildid_all)
911                         dsos__hit_all(rec->session);
912         }
913         perf_session__write_header(rec->session, rec->evlist, fd, true);
914
915         return;
916 }
917
918 static int record__synthesize_workload(struct record *rec, bool tail)
919 {
920         int err;
921         struct thread_map *thread_map;
922
923         if (rec->opts.tail_synthesize != tail)
924                 return 0;
925
926         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
927         if (thread_map == NULL)
928                 return -1;
929
930         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
931                                                  process_synthesized_event,
932                                                  &rec->session->machines.host,
933                                                  rec->opts.sample_address);
934         thread_map__put(thread_map);
935         return err;
936 }
937
938 static int record__synthesize(struct record *rec, bool tail);
939
940 static int
941 record__switch_output(struct record *rec, bool at_exit)
942 {
943         struct perf_data *data = &rec->data;
944         int fd, err;
945         char *new_filename;
946
947         /* Same Size:      "2015122520103046"*/
948         char timestamp[] = "InvalidTimestamp";
949
950         record__aio_mmap_read_sync(rec);
951
952         record__synthesize(rec, true);
953         if (target__none(&rec->opts.target))
954                 record__synthesize_workload(rec, true);
955
956         rec->samples = 0;
957         record__finish_output(rec);
958         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
959         if (err) {
960                 pr_err("Failed to get current timestamp\n");
961                 return -EINVAL;
962         }
963
964         fd = perf_data__switch(data, timestamp,
965                                     rec->session->header.data_offset,
966                                     at_exit, &new_filename);
967         if (fd >= 0 && !at_exit) {
968                 rec->bytes_written = 0;
969                 rec->session->header.data_size = 0;
970         }
971
972         if (!quiet)
973                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
974                         data->path, timestamp);
975
976         if (rec->switch_output.num_files) {
977                 int n = rec->switch_output.cur_file + 1;
978
979                 if (n >= rec->switch_output.num_files)
980                         n = 0;
981                 rec->switch_output.cur_file = n;
982                 if (rec->switch_output.filenames[n]) {
983                         remove(rec->switch_output.filenames[n]);
984                         free(rec->switch_output.filenames[n]);
985                 }
986                 rec->switch_output.filenames[n] = new_filename;
987         } else {
988                 free(new_filename);
989         }
990
991         /* Output tracking events */
992         if (!at_exit) {
993                 record__synthesize(rec, false);
994
995                 /*
996                  * In 'perf record --switch-output' without -a,
997                  * record__synthesize() in record__switch_output() won't
998                  * generate tracking events because there's no thread_map
999                  * in evlist. Which causes newly created perf.data doesn't
1000                  * contain map and comm information.
1001                  * Create a fake thread_map and directly call
1002                  * perf_event__synthesize_thread_map() for those events.
1003                  */
1004                 if (target__none(&rec->opts.target))
1005                         record__synthesize_workload(rec, false);
1006         }
1007         return fd;
1008 }
1009
1010 static volatile int workload_exec_errno;
1011
1012 /*
1013  * perf_evlist__prepare_workload will send a SIGUSR1
1014  * if the fork fails, since we asked by setting its
1015  * want_signal to true.
1016  */
1017 static void workload_exec_failed_signal(int signo __maybe_unused,
1018                                         siginfo_t *info,
1019                                         void *ucontext __maybe_unused)
1020 {
1021         workload_exec_errno = info->si_value.sival_int;
1022         done = 1;
1023         child_finished = 1;
1024 }
1025
1026 static void snapshot_sig_handler(int sig);
1027 static void alarm_sig_handler(int sig);
1028
1029 int __weak
1030 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
1031                             struct perf_tool *tool __maybe_unused,
1032                             perf_event__handler_t process __maybe_unused,
1033                             struct machine *machine __maybe_unused)
1034 {
1035         return 0;
1036 }
1037
1038 static const struct perf_event_mmap_page *
1039 perf_evlist__pick_pc(struct perf_evlist *evlist)
1040 {
1041         if (evlist) {
1042                 if (evlist->mmap && evlist->mmap[0].base)
1043                         return evlist->mmap[0].base;
1044                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
1045                         return evlist->overwrite_mmap[0].base;
1046         }
1047         return NULL;
1048 }
1049
1050 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1051 {
1052         const struct perf_event_mmap_page *pc;
1053
1054         pc = perf_evlist__pick_pc(rec->evlist);
1055         if (pc)
1056                 return pc;
1057         return NULL;
1058 }
1059
1060 static int record__synthesize(struct record *rec, bool tail)
1061 {
1062         struct perf_session *session = rec->session;
1063         struct machine *machine = &session->machines.host;
1064         struct perf_data *data = &rec->data;
1065         struct record_opts *opts = &rec->opts;
1066         struct perf_tool *tool = &rec->tool;
1067         int fd = perf_data__fd(data);
1068         int err = 0;
1069
1070         if (rec->opts.tail_synthesize != tail)
1071                 return 0;
1072
1073         if (data->is_pipe) {
1074                 /*
1075                  * We need to synthesize events first, because some
1076                  * features works on top of them (on report side).
1077                  */
1078                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1079                                                    process_synthesized_event);
1080                 if (err < 0) {
1081                         pr_err("Couldn't synthesize attrs.\n");
1082                         goto out;
1083                 }
1084
1085                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1086                                                       process_synthesized_event);
1087                 if (err < 0) {
1088                         pr_err("Couldn't synthesize features.\n");
1089                         return err;
1090                 }
1091
1092                 if (have_tracepoints(&rec->evlist->entries)) {
1093                         /*
1094                          * FIXME err <= 0 here actually means that
1095                          * there were no tracepoints so its not really
1096                          * an error, just that we don't need to
1097                          * synthesize anything.  We really have to
1098                          * return this more properly and also
1099                          * propagate errors that now are calling die()
1100                          */
1101                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1102                                                                   process_synthesized_event);
1103                         if (err <= 0) {
1104                                 pr_err("Couldn't record tracing data.\n");
1105                                 goto out;
1106                         }
1107                         rec->bytes_written += err;
1108                 }
1109         }
1110
1111         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1112                                           process_synthesized_event, machine);
1113         if (err)
1114                 goto out;
1115
1116         if (rec->opts.full_auxtrace) {
1117                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1118                                         session, process_synthesized_event);
1119                 if (err)
1120                         goto out;
1121         }
1122
1123         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1124                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1125                                                          machine);
1126                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1127                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1128                                    "Check /proc/kallsyms permission or run as root.\n");
1129
1130                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1131                                                      machine);
1132                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1133                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1134                                    "Check /proc/modules permission or run as root.\n");
1135         }
1136
1137         if (perf_guest) {
1138                 machines__process_guests(&session->machines,
1139                                          perf_event__synthesize_guest_os, tool);
1140         }
1141
1142         err = perf_event__synthesize_extra_attr(&rec->tool,
1143                                                 rec->evlist,
1144                                                 process_synthesized_event,
1145                                                 data->is_pipe);
1146         if (err)
1147                 goto out;
1148
1149         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1150                                                  process_synthesized_event,
1151                                                 NULL);
1152         if (err < 0) {
1153                 pr_err("Couldn't synthesize thread map.\n");
1154                 return err;
1155         }
1156
1157         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1158                                              process_synthesized_event, NULL);
1159         if (err < 0) {
1160                 pr_err("Couldn't synthesize cpu map.\n");
1161                 return err;
1162         }
1163
1164         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1165                                                 machine, opts);
1166         if (err < 0)
1167                 pr_warning("Couldn't synthesize bpf events.\n");
1168
1169         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1170                                             process_synthesized_event, opts->sample_address,
1171                                             1);
1172 out:
1173         return err;
1174 }
1175
1176 static int __cmd_record(struct record *rec, int argc, const char **argv)
1177 {
1178         int err;
1179         int status = 0;
1180         unsigned long waking = 0;
1181         const bool forks = argc > 0;
1182         struct perf_tool *tool = &rec->tool;
1183         struct record_opts *opts = &rec->opts;
1184         struct perf_data *data = &rec->data;
1185         struct perf_session *session;
1186         bool disabled = false, draining = false;
1187         struct perf_evlist *sb_evlist = NULL;
1188         int fd;
1189
1190         atexit(record__sig_exit);
1191         signal(SIGCHLD, sig_handler);
1192         signal(SIGINT, sig_handler);
1193         signal(SIGTERM, sig_handler);
1194         signal(SIGSEGV, sigsegv_handler);
1195
1196         if (rec->opts.record_namespaces)
1197                 tool->namespace_events = true;
1198
1199         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1200                 signal(SIGUSR2, snapshot_sig_handler);
1201                 if (rec->opts.auxtrace_snapshot_mode)
1202                         trigger_on(&auxtrace_snapshot_trigger);
1203                 if (rec->switch_output.enabled)
1204                         trigger_on(&switch_output_trigger);
1205         } else {
1206                 signal(SIGUSR2, SIG_IGN);
1207         }
1208
1209         session = perf_session__new(data, false, tool);
1210         if (session == NULL) {
1211                 pr_err("Perf session creation failed.\n");
1212                 return -1;
1213         }
1214
1215         fd = perf_data__fd(data);
1216         rec->session = session;
1217
1218         record__init_features(rec);
1219
1220         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1221                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1222
1223         if (forks) {
1224                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1225                                                     argv, data->is_pipe,
1226                                                     workload_exec_failed_signal);
1227                 if (err < 0) {
1228                         pr_err("Couldn't run the workload!\n");
1229                         status = err;
1230                         goto out_delete_session;
1231                 }
1232         }
1233
1234         /*
1235          * If we have just single event and are sending data
1236          * through pipe, we need to force the ids allocation,
1237          * because we synthesize event name through the pipe
1238          * and need the id for that.
1239          */
1240         if (data->is_pipe && rec->evlist->nr_entries == 1)
1241                 rec->opts.sample_id = true;
1242
1243         if (record__open(rec) != 0) {
1244                 err = -1;
1245                 goto out_child;
1246         }
1247
1248         err = bpf__apply_obj_config();
1249         if (err) {
1250                 char errbuf[BUFSIZ];
1251
1252                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1253                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1254                          errbuf);
1255                 goto out_child;
1256         }
1257
1258         /*
1259          * Normally perf_session__new would do this, but it doesn't have the
1260          * evlist.
1261          */
1262         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1263                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1264                 rec->tool.ordered_events = false;
1265         }
1266
1267         if (!rec->evlist->nr_groups)
1268                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1269
1270         if (data->is_pipe) {
1271                 err = perf_header__write_pipe(fd);
1272                 if (err < 0)
1273                         goto out_child;
1274         } else {
1275                 err = perf_session__write_header(session, rec->evlist, fd, false);
1276                 if (err < 0)
1277                         goto out_child;
1278         }
1279
1280         if (!rec->no_buildid
1281             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1282                 pr_err("Couldn't generate buildids. "
1283                        "Use --no-buildid to profile anyway.\n");
1284                 err = -1;
1285                 goto out_child;
1286         }
1287
1288         if (!opts->no_bpf_event)
1289                 bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1290
1291         if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1292                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1293                 opts->no_bpf_event = true;
1294         }
1295
1296         err = record__synthesize(rec, false);
1297         if (err < 0)
1298                 goto out_child;
1299
1300         if (rec->realtime_prio) {
1301                 struct sched_param param;
1302
1303                 param.sched_priority = rec->realtime_prio;
1304                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1305                         pr_err("Could not set realtime priority.\n");
1306                         err = -1;
1307                         goto out_child;
1308                 }
1309         }
1310
1311         /*
1312          * When perf is starting the traced process, all the events
1313          * (apart from group members) have enable_on_exec=1 set,
1314          * so don't spoil it by prematurely enabling them.
1315          */
1316         if (!target__none(&opts->target) && !opts->initial_delay)
1317                 perf_evlist__enable(rec->evlist);
1318
1319         /*
1320          * Let the child rip
1321          */
1322         if (forks) {
1323                 struct machine *machine = &session->machines.host;
1324                 union perf_event *event;
1325                 pid_t tgid;
1326
1327                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1328                 if (event == NULL) {
1329                         err = -ENOMEM;
1330                         goto out_child;
1331                 }
1332
1333                 /*
1334                  * Some H/W events are generated before COMM event
1335                  * which is emitted during exec(), so perf script
1336                  * cannot see a correct process name for those events.
1337                  * Synthesize COMM event to prevent it.
1338                  */
1339                 tgid = perf_event__synthesize_comm(tool, event,
1340                                                    rec->evlist->workload.pid,
1341                                                    process_synthesized_event,
1342                                                    machine);
1343                 free(event);
1344
1345                 if (tgid == -1)
1346                         goto out_child;
1347
1348                 event = malloc(sizeof(event->namespaces) +
1349                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1350                                machine->id_hdr_size);
1351                 if (event == NULL) {
1352                         err = -ENOMEM;
1353                         goto out_child;
1354                 }
1355
1356                 /*
1357                  * Synthesize NAMESPACES event for the command specified.
1358                  */
1359                 perf_event__synthesize_namespaces(tool, event,
1360                                                   rec->evlist->workload.pid,
1361                                                   tgid, process_synthesized_event,
1362                                                   machine);
1363                 free(event);
1364
1365                 perf_evlist__start_workload(rec->evlist);
1366         }
1367
1368         if (opts->initial_delay) {
1369                 usleep(opts->initial_delay * USEC_PER_MSEC);
1370                 perf_evlist__enable(rec->evlist);
1371         }
1372
1373         trigger_ready(&auxtrace_snapshot_trigger);
1374         trigger_ready(&switch_output_trigger);
1375         perf_hooks__invoke_record_start();
1376         for (;;) {
1377                 unsigned long long hits = rec->samples;
1378
1379                 /*
1380                  * rec->evlist->bkw_mmap_state is possible to be
1381                  * BKW_MMAP_EMPTY here: when done == true and
1382                  * hits != rec->samples in previous round.
1383                  *
1384                  * perf_evlist__toggle_bkw_mmap ensure we never
1385                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1386                  */
1387                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1388                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1389
1390                 if (record__mmap_read_all(rec, false) < 0) {
1391                         trigger_error(&auxtrace_snapshot_trigger);
1392                         trigger_error(&switch_output_trigger);
1393                         err = -1;
1394                         goto out_child;
1395                 }
1396
1397                 if (auxtrace_record__snapshot_started) {
1398                         auxtrace_record__snapshot_started = 0;
1399                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1400                                 record__read_auxtrace_snapshot(rec);
1401                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1402                                 pr_err("AUX area tracing snapshot failed\n");
1403                                 err = -1;
1404                                 goto out_child;
1405                         }
1406                 }
1407
1408                 if (trigger_is_hit(&switch_output_trigger)) {
1409                         /*
1410                          * If switch_output_trigger is hit, the data in
1411                          * overwritable ring buffer should have been collected,
1412                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1413                          *
1414                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1415                          * record__mmap_read_all() didn't collect data from
1416                          * overwritable ring buffer. Read again.
1417                          */
1418                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1419                                 continue;
1420                         trigger_ready(&switch_output_trigger);
1421
1422                         /*
1423                          * Reenable events in overwrite ring buffer after
1424                          * record__mmap_read_all(): we should have collected
1425                          * data from it.
1426                          */
1427                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1428
1429                         if (!quiet)
1430                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1431                                         waking);
1432                         waking = 0;
1433                         fd = record__switch_output(rec, false);
1434                         if (fd < 0) {
1435                                 pr_err("Failed to switch to new file\n");
1436                                 trigger_error(&switch_output_trigger);
1437                                 err = fd;
1438                                 goto out_child;
1439                         }
1440
1441                         /* re-arm the alarm */
1442                         if (rec->switch_output.time)
1443                                 alarm(rec->switch_output.time);
1444                 }
1445
1446                 if (hits == rec->samples) {
1447                         if (done || draining)
1448                                 break;
1449                         err = perf_evlist__poll(rec->evlist, -1);
1450                         /*
1451                          * Propagate error, only if there's any. Ignore positive
1452                          * number of returned events and interrupt error.
1453                          */
1454                         if (err > 0 || (err < 0 && errno == EINTR))
1455                                 err = 0;
1456                         waking++;
1457
1458                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1459                                 draining = true;
1460                 }
1461
1462                 /*
1463                  * When perf is starting the traced process, at the end events
1464                  * die with the process and we wait for that. Thus no need to
1465                  * disable events in this case.
1466                  */
1467                 if (done && !disabled && !target__none(&opts->target)) {
1468                         trigger_off(&auxtrace_snapshot_trigger);
1469                         perf_evlist__disable(rec->evlist);
1470                         disabled = true;
1471                 }
1472         }
1473         trigger_off(&auxtrace_snapshot_trigger);
1474         trigger_off(&switch_output_trigger);
1475
1476         if (forks && workload_exec_errno) {
1477                 char msg[STRERR_BUFSIZE];
1478                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1479                 pr_err("Workload failed: %s\n", emsg);
1480                 err = -1;
1481                 goto out_child;
1482         }
1483
1484         if (!quiet)
1485                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1486
1487         if (target__none(&rec->opts.target))
1488                 record__synthesize_workload(rec, true);
1489
1490 out_child:
1491         record__mmap_read_all(rec, true);
1492         record__aio_mmap_read_sync(rec);
1493
1494         if (forks) {
1495                 int exit_status;
1496
1497                 if (!child_finished)
1498                         kill(rec->evlist->workload.pid, SIGTERM);
1499
1500                 wait(&exit_status);
1501
1502                 if (err < 0)
1503                         status = err;
1504                 else if (WIFEXITED(exit_status))
1505                         status = WEXITSTATUS(exit_status);
1506                 else if (WIFSIGNALED(exit_status))
1507                         signr = WTERMSIG(exit_status);
1508         } else
1509                 status = err;
1510
1511         record__synthesize(rec, true);
1512         /* this will be recalculated during process_buildids() */
1513         rec->samples = 0;
1514
1515         if (!err) {
1516                 if (!rec->timestamp_filename) {
1517                         record__finish_output(rec);
1518                 } else {
1519                         fd = record__switch_output(rec, true);
1520                         if (fd < 0) {
1521                                 status = fd;
1522                                 goto out_delete_session;
1523                         }
1524                 }
1525         }
1526
1527         perf_hooks__invoke_record_end();
1528
1529         if (!err && !quiet) {
1530                 char samples[128];
1531                 const char *postfix = rec->timestamp_filename ?
1532                                         ".<timestamp>" : "";
1533
1534                 if (rec->samples && !rec->opts.full_auxtrace)
1535                         scnprintf(samples, sizeof(samples),
1536                                   " (%" PRIu64 " samples)", rec->samples);
1537                 else
1538                         samples[0] = '\0';
1539
1540                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1541                         perf_data__size(data) / 1024.0 / 1024.0,
1542                         data->path, postfix, samples);
1543         }
1544
1545 out_delete_session:
1546         perf_session__delete(session);
1547
1548         if (!opts->no_bpf_event)
1549                 perf_evlist__stop_sb_thread(sb_evlist);
1550         return status;
1551 }
1552
1553 static void callchain_debug(struct callchain_param *callchain)
1554 {
1555         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1556
1557         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1558
1559         if (callchain->record_mode == CALLCHAIN_DWARF)
1560                 pr_debug("callchain: stack dump size %d\n",
1561                          callchain->dump_size);
1562 }
1563
1564 int record_opts__parse_callchain(struct record_opts *record,
1565                                  struct callchain_param *callchain,
1566                                  const char *arg, bool unset)
1567 {
1568         int ret;
1569         callchain->enabled = !unset;
1570
1571         /* --no-call-graph */
1572         if (unset) {
1573                 callchain->record_mode = CALLCHAIN_NONE;
1574                 pr_debug("callchain: disabled\n");
1575                 return 0;
1576         }
1577
1578         ret = parse_callchain_record_opt(arg, callchain);
1579         if (!ret) {
1580                 /* Enable data address sampling for DWARF unwind. */
1581                 if (callchain->record_mode == CALLCHAIN_DWARF)
1582                         record->sample_address = true;
1583                 callchain_debug(callchain);
1584         }
1585
1586         return ret;
1587 }
1588
1589 int record_parse_callchain_opt(const struct option *opt,
1590                                const char *arg,
1591                                int unset)
1592 {
1593         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1594 }
1595
1596 int record_callchain_opt(const struct option *opt,
1597                          const char *arg __maybe_unused,
1598                          int unset __maybe_unused)
1599 {
1600         struct callchain_param *callchain = opt->value;
1601
1602         callchain->enabled = true;
1603
1604         if (callchain->record_mode == CALLCHAIN_NONE)
1605                 callchain->record_mode = CALLCHAIN_FP;
1606
1607         callchain_debug(callchain);
1608         return 0;
1609 }
1610
1611 static int perf_record_config(const char *var, const char *value, void *cb)
1612 {
1613         struct record *rec = cb;
1614
1615         if (!strcmp(var, "record.build-id")) {
1616                 if (!strcmp(value, "cache"))
1617                         rec->no_buildid_cache = false;
1618                 else if (!strcmp(value, "no-cache"))
1619                         rec->no_buildid_cache = true;
1620                 else if (!strcmp(value, "skip"))
1621                         rec->no_buildid = true;
1622                 else
1623                         return -1;
1624                 return 0;
1625         }
1626         if (!strcmp(var, "record.call-graph")) {
1627                 var = "call-graph.record-mode";
1628                 return perf_default_config(var, value, cb);
1629         }
1630 #ifdef HAVE_AIO_SUPPORT
1631         if (!strcmp(var, "record.aio")) {
1632                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1633                 if (!rec->opts.nr_cblocks)
1634                         rec->opts.nr_cblocks = nr_cblocks_default;
1635         }
1636 #endif
1637
1638         return 0;
1639 }
1640
1641 struct clockid_map {
1642         const char *name;
1643         int clockid;
1644 };
1645
1646 #define CLOCKID_MAP(n, c)       \
1647         { .name = n, .clockid = (c), }
1648
1649 #define CLOCKID_END     { .name = NULL, }
1650
1651
1652 /*
1653  * Add the missing ones, we need to build on many distros...
1654  */
1655 #ifndef CLOCK_MONOTONIC_RAW
1656 #define CLOCK_MONOTONIC_RAW 4
1657 #endif
1658 #ifndef CLOCK_BOOTTIME
1659 #define CLOCK_BOOTTIME 7
1660 #endif
1661 #ifndef CLOCK_TAI
1662 #define CLOCK_TAI 11
1663 #endif
1664
1665 static const struct clockid_map clockids[] = {
1666         /* available for all events, NMI safe */
1667         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1668         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1669
1670         /* available for some events */
1671         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1672         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1673         CLOCKID_MAP("tai", CLOCK_TAI),
1674
1675         /* available for the lazy */
1676         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1677         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1678         CLOCKID_MAP("real", CLOCK_REALTIME),
1679         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1680
1681         CLOCKID_END,
1682 };
1683
1684 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1685 {
1686         struct timespec res;
1687
1688         *res_ns = 0;
1689         if (!clock_getres(clk_id, &res))
1690                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1691         else
1692                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1693
1694         return 0;
1695 }
1696
1697 static int parse_clockid(const struct option *opt, const char *str, int unset)
1698 {
1699         struct record_opts *opts = (struct record_opts *)opt->value;
1700         const struct clockid_map *cm;
1701         const char *ostr = str;
1702
1703         if (unset) {
1704                 opts->use_clockid = 0;
1705                 return 0;
1706         }
1707
1708         /* no arg passed */
1709         if (!str)
1710                 return 0;
1711
1712         /* no setting it twice */
1713         if (opts->use_clockid)
1714                 return -1;
1715
1716         opts->use_clockid = true;
1717
1718         /* if its a number, we're done */
1719         if (sscanf(str, "%d", &opts->clockid) == 1)
1720                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1721
1722         /* allow a "CLOCK_" prefix to the name */
1723         if (!strncasecmp(str, "CLOCK_", 6))
1724                 str += 6;
1725
1726         for (cm = clockids; cm->name; cm++) {
1727                 if (!strcasecmp(str, cm->name)) {
1728                         opts->clockid = cm->clockid;
1729                         return get_clockid_res(opts->clockid,
1730                                                &opts->clockid_res_ns);
1731                 }
1732         }
1733
1734         opts->use_clockid = false;
1735         ui__warning("unknown clockid %s, check man page\n", ostr);
1736         return -1;
1737 }
1738
1739 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1740 {
1741         struct record_opts *opts = (struct record_opts *)opt->value;
1742
1743         if (unset || !str)
1744                 return 0;
1745
1746         if (!strcasecmp(str, "node"))
1747                 opts->affinity = PERF_AFFINITY_NODE;
1748         else if (!strcasecmp(str, "cpu"))
1749                 opts->affinity = PERF_AFFINITY_CPU;
1750
1751         return 0;
1752 }
1753
1754 static int record__parse_mmap_pages(const struct option *opt,
1755                                     const char *str,
1756                                     int unset __maybe_unused)
1757 {
1758         struct record_opts *opts = opt->value;
1759         char *s, *p;
1760         unsigned int mmap_pages;
1761         int ret;
1762
1763         if (!str)
1764                 return -EINVAL;
1765
1766         s = strdup(str);
1767         if (!s)
1768                 return -ENOMEM;
1769
1770         p = strchr(s, ',');
1771         if (p)
1772                 *p = '\0';
1773
1774         if (*s) {
1775                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1776                 if (ret)
1777                         goto out_free;
1778                 opts->mmap_pages = mmap_pages;
1779         }
1780
1781         if (!p) {
1782                 ret = 0;
1783                 goto out_free;
1784         }
1785
1786         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1787         if (ret)
1788                 goto out_free;
1789
1790         opts->auxtrace_mmap_pages = mmap_pages;
1791
1792 out_free:
1793         free(s);
1794         return ret;
1795 }
1796
1797 static void switch_output_size_warn(struct record *rec)
1798 {
1799         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1800         struct switch_output *s = &rec->switch_output;
1801
1802         wakeup_size /= 2;
1803
1804         if (s->size < wakeup_size) {
1805                 char buf[100];
1806
1807                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1808                 pr_warning("WARNING: switch-output data size lower than "
1809                            "wakeup kernel buffer size (%s) "
1810                            "expect bigger perf.data sizes\n", buf);
1811         }
1812 }
1813
1814 static int switch_output_setup(struct record *rec)
1815 {
1816         struct switch_output *s = &rec->switch_output;
1817         static struct parse_tag tags_size[] = {
1818                 { .tag  = 'B', .mult = 1       },
1819                 { .tag  = 'K', .mult = 1 << 10 },
1820                 { .tag  = 'M', .mult = 1 << 20 },
1821                 { .tag  = 'G', .mult = 1 << 30 },
1822                 { .tag  = 0 },
1823         };
1824         static struct parse_tag tags_time[] = {
1825                 { .tag  = 's', .mult = 1        },
1826                 { .tag  = 'm', .mult = 60       },
1827                 { .tag  = 'h', .mult = 60*60    },
1828                 { .tag  = 'd', .mult = 60*60*24 },
1829                 { .tag  = 0 },
1830         };
1831         unsigned long val;
1832
1833         if (!s->set)
1834                 return 0;
1835
1836         if (!strcmp(s->str, "signal")) {
1837                 s->signal = true;
1838                 pr_debug("switch-output with SIGUSR2 signal\n");
1839                 goto enabled;
1840         }
1841
1842         val = parse_tag_value(s->str, tags_size);
1843         if (val != (unsigned long) -1) {
1844                 s->size = val;
1845                 pr_debug("switch-output with %s size threshold\n", s->str);
1846                 goto enabled;
1847         }
1848
1849         val = parse_tag_value(s->str, tags_time);
1850         if (val != (unsigned long) -1) {
1851                 s->time = val;
1852                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1853                          s->str, s->time);
1854                 goto enabled;
1855         }
1856
1857         return -1;
1858
1859 enabled:
1860         rec->timestamp_filename = true;
1861         s->enabled              = true;
1862
1863         if (s->size && !rec->opts.no_buffering)
1864                 switch_output_size_warn(rec);
1865
1866         return 0;
1867 }
1868
1869 static const char * const __record_usage[] = {
1870         "perf record [<options>] [<command>]",
1871         "perf record [<options>] -- <command> [<options>]",
1872         NULL
1873 };
1874 const char * const *record_usage = __record_usage;
1875
1876 /*
1877  * XXX Ideally would be local to cmd_record() and passed to a record__new
1878  * because we need to have access to it in record__exit, that is called
1879  * after cmd_record() exits, but since record_options need to be accessible to
1880  * builtin-script, leave it here.
1881  *
1882  * At least we don't ouch it in all the other functions here directly.
1883  *
1884  * Just say no to tons of global variables, sigh.
1885  */
1886 static struct record record = {
1887         .opts = {
1888                 .sample_time         = true,
1889                 .mmap_pages          = UINT_MAX,
1890                 .user_freq           = UINT_MAX,
1891                 .user_interval       = ULLONG_MAX,
1892                 .freq                = 4000,
1893                 .target              = {
1894                         .uses_mmap   = true,
1895                         .default_per_cpu = true,
1896                 },
1897                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
1898         },
1899         .tool = {
1900                 .sample         = process_sample_event,
1901                 .fork           = perf_event__process_fork,
1902                 .exit           = perf_event__process_exit,
1903                 .comm           = perf_event__process_comm,
1904                 .namespaces     = perf_event__process_namespaces,
1905                 .mmap           = perf_event__process_mmap,
1906                 .mmap2          = perf_event__process_mmap2,
1907                 .ordered_events = true,
1908         },
1909 };
1910
1911 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1912         "\n\t\t\t\tDefault: fp";
1913
1914 static bool dry_run;
1915
1916 /*
1917  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1918  * with it and switch to use the library functions in perf_evlist that came
1919  * from builtin-record.c, i.e. use record_opts,
1920  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1921  * using pipes, etc.
1922  */
1923 static struct option __record_options[] = {
1924         OPT_CALLBACK('e', "event", &record.evlist, "event",
1925                      "event selector. use 'perf list' to list available events",
1926                      parse_events_option),
1927         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1928                      "event filter", parse_filter),
1929         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1930                            NULL, "don't record events from perf itself",
1931                            exclude_perf),
1932         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1933                     "record events on existing process id"),
1934         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1935                     "record events on existing thread id"),
1936         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1937                     "collect data with this RT SCHED_FIFO priority"),
1938         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1939                     "collect data without buffering"),
1940         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1941                     "collect raw sample records from all opened counters"),
1942         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1943                             "system-wide collection from all CPUs"),
1944         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1945                     "list of cpus to monitor"),
1946         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1947         OPT_STRING('o', "output", &record.data.path, "file",
1948                     "output file name"),
1949         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1950                         &record.opts.no_inherit_set,
1951                         "child tasks do not inherit counters"),
1952         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1953                     "synthesize non-sample events at the end of output"),
1954         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1955         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
1956         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1957                     "Fail if the specified frequency can't be used"),
1958         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1959                      "profile at this frequency",
1960                       record__parse_freq),
1961         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1962                      "number of mmap data pages and AUX area tracing mmap pages",
1963                      record__parse_mmap_pages),
1964         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
1965                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
1966                      record__mmap_flush_parse),
1967         OPT_BOOLEAN(0, "group", &record.opts.group,
1968                     "put the counters into a counter group"),
1969         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1970                            NULL, "enables call-graph recording" ,
1971                            &record_callchain_opt),
1972         OPT_CALLBACK(0, "call-graph", &record.opts,
1973                      "record_mode[,record_size]", record_callchain_help,
1974                      &record_parse_callchain_opt),
1975         OPT_INCR('v', "verbose", &verbose,
1976                     "be more verbose (show counter open errors, etc)"),
1977         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1978         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1979                     "per thread counts"),
1980         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1981         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1982                     "Record the sample physical addresses"),
1983         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1984         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1985                         &record.opts.sample_time_set,
1986                         "Record the sample timestamps"),
1987         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1988                         "Record the sample period"),
1989         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1990                     "don't sample"),
1991         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1992                         &record.no_buildid_cache_set,
1993                         "do not update the buildid cache"),
1994         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1995                         &record.no_buildid_set,
1996                         "do not collect buildids in perf.data"),
1997         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1998                      "monitor event in cgroup name only",
1999                      parse_cgroups),
2000         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2001                   "ms to wait before starting measurement after program start"),
2002         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2003                    "user to profile"),
2004
2005         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2006                      "branch any", "sample any taken branches",
2007                      parse_branch_stack),
2008
2009         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2010                      "branch filter mask", "branch stack filter modes",
2011                      parse_branch_stack),
2012         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2013                     "sample by weight (on special events only)"),
2014         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2015                     "sample transaction flags (special events only)"),
2016         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2017                     "use per-thread mmaps"),
2018         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2019                     "sample selected machine registers on interrupt,"
2020                     " use -I ? to list register names", parse_regs),
2021         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2022                     "sample selected machine registers on interrupt,"
2023                     " use -I ? to list register names", parse_regs),
2024         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2025                     "Record running/enabled time of read (:S) events"),
2026         OPT_CALLBACK('k', "clockid", &record.opts,
2027         "clockid", "clockid to use for events, see clock_gettime()",
2028         parse_clockid),
2029         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2030                           "opts", "AUX area tracing Snapshot Mode", ""),
2031         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2032                         "per thread proc mmap processing timeout in ms"),
2033         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2034                     "Record namespaces events"),
2035         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2036                     "Record context switch events"),
2037         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2038                          "Configure all used events to run in kernel space.",
2039                          PARSE_OPT_EXCLUSIVE),
2040         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2041                          "Configure all used events to run in user space.",
2042                          PARSE_OPT_EXCLUSIVE),
2043         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2044                    "clang binary to use for compiling BPF scriptlets"),
2045         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2046                    "options passed to clang when compiling BPF scriptlets"),
2047         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2048                    "file", "vmlinux pathname"),
2049         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2050                     "Record build-id of all DSOs regardless of hits"),
2051         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2052                     "append timestamp to output filename"),
2053         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2054                     "Record timestamp boundary (time of first/last samples)"),
2055         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2056                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2057                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2058                           "signal"),
2059         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2060                    "Limit number of switch output generated files"),
2061         OPT_BOOLEAN(0, "dry-run", &dry_run,
2062                     "Parse options then exit"),
2063 #ifdef HAVE_AIO_SUPPORT
2064         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2065                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2066                      record__aio_parse),
2067 #endif
2068         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2069                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2070                      record__parse_affinity),
2071         OPT_END()
2072 };
2073
2074 struct option *record_options = __record_options;
2075
2076 int cmd_record(int argc, const char **argv)
2077 {
2078         int err;
2079         struct record *rec = &record;
2080         char errbuf[BUFSIZ];
2081
2082         setlocale(LC_ALL, "");
2083
2084 #ifndef HAVE_LIBBPF_SUPPORT
2085 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2086         set_nobuild('\0', "clang-path", true);
2087         set_nobuild('\0', "clang-opt", true);
2088 # undef set_nobuild
2089 #endif
2090
2091 #ifndef HAVE_BPF_PROLOGUE
2092 # if !defined (HAVE_DWARF_SUPPORT)
2093 #  define REASON  "NO_DWARF=1"
2094 # elif !defined (HAVE_LIBBPF_SUPPORT)
2095 #  define REASON  "NO_LIBBPF=1"
2096 # else
2097 #  define REASON  "this architecture doesn't support BPF prologue"
2098 # endif
2099 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2100         set_nobuild('\0', "vmlinux", true);
2101 # undef set_nobuild
2102 # undef REASON
2103 #endif
2104
2105         CPU_ZERO(&rec->affinity_mask);
2106         rec->opts.affinity = PERF_AFFINITY_SYS;
2107
2108         rec->evlist = perf_evlist__new();
2109         if (rec->evlist == NULL)
2110                 return -ENOMEM;
2111
2112         err = perf_config(perf_record_config, rec);
2113         if (err)
2114                 return err;
2115
2116         argc = parse_options(argc, argv, record_options, record_usage,
2117                             PARSE_OPT_STOP_AT_NON_OPTION);
2118         if (quiet)
2119                 perf_quiet_option();
2120
2121         /* Make system wide (-a) the default target. */
2122         if (!argc && target__none(&rec->opts.target))
2123                 rec->opts.target.system_wide = true;
2124
2125         if (nr_cgroups && !rec->opts.target.system_wide) {
2126                 usage_with_options_msg(record_usage, record_options,
2127                         "cgroup monitoring only available in system-wide mode");
2128
2129         }
2130         if (rec->opts.record_switch_events &&
2131             !perf_can_record_switch_events()) {
2132                 ui__error("kernel does not support recording context switch events\n");
2133                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2134                 return -EINVAL;
2135         }
2136
2137         if (switch_output_setup(rec)) {
2138                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2139                 return -EINVAL;
2140         }
2141
2142         if (rec->switch_output.time) {
2143                 signal(SIGALRM, alarm_sig_handler);
2144                 alarm(rec->switch_output.time);
2145         }
2146
2147         if (rec->switch_output.num_files) {
2148                 rec->switch_output.filenames = calloc(sizeof(char *),
2149                                                       rec->switch_output.num_files);
2150                 if (!rec->switch_output.filenames)
2151                         return -EINVAL;
2152         }
2153
2154         /*
2155          * Allow aliases to facilitate the lookup of symbols for address
2156          * filters. Refer to auxtrace_parse_filters().
2157          */
2158         symbol_conf.allow_aliases = true;
2159
2160         symbol__init(NULL);
2161
2162         err = record__auxtrace_init(rec);
2163         if (err)
2164                 goto out;
2165
2166         if (dry_run)
2167                 goto out;
2168
2169         err = bpf__setup_stdout(rec->evlist);
2170         if (err) {
2171                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2172                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2173                          errbuf);
2174                 goto out;
2175         }
2176
2177         err = -ENOMEM;
2178
2179         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2180                 pr_warning(
2181 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2182 "check /proc/sys/kernel/kptr_restrict.\n\n"
2183 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2184 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2185 "Samples in kernel modules won't be resolved at all.\n\n"
2186 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2187 "even with a suitable vmlinux or kallsyms file.\n\n");
2188
2189         if (rec->no_buildid_cache || rec->no_buildid) {
2190                 disable_buildid_cache();
2191         } else if (rec->switch_output.enabled) {
2192                 /*
2193                  * In 'perf record --switch-output', disable buildid
2194                  * generation by default to reduce data file switching
2195                  * overhead. Still generate buildid if they are required
2196                  * explicitly using
2197                  *
2198                  *  perf record --switch-output --no-no-buildid \
2199                  *              --no-no-buildid-cache
2200                  *
2201                  * Following code equals to:
2202                  *
2203                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2204                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2205                  *         disable_buildid_cache();
2206                  */
2207                 bool disable = true;
2208
2209                 if (rec->no_buildid_set && !rec->no_buildid)
2210                         disable = false;
2211                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2212                         disable = false;
2213                 if (disable) {
2214                         rec->no_buildid = true;
2215                         rec->no_buildid_cache = true;
2216                         disable_buildid_cache();
2217                 }
2218         }
2219
2220         if (record.opts.overwrite)
2221                 record.opts.tail_synthesize = true;
2222
2223         if (rec->evlist->nr_entries == 0 &&
2224             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2225                 pr_err("Not enough memory for event selector list\n");
2226                 goto out;
2227         }
2228
2229         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2230                 rec->opts.no_inherit = true;
2231
2232         err = target__validate(&rec->opts.target);
2233         if (err) {
2234                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2235                 ui__warning("%s\n", errbuf);
2236         }
2237
2238         err = target__parse_uid(&rec->opts.target);
2239         if (err) {
2240                 int saved_errno = errno;
2241
2242                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2243                 ui__error("%s", errbuf);
2244
2245                 err = -saved_errno;
2246                 goto out;
2247         }
2248
2249         /* Enable ignoring missing threads when -u/-p option is defined. */
2250         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2251
2252         err = -ENOMEM;
2253         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2254                 usage_with_options(record_usage, record_options);
2255
2256         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2257         if (err)
2258                 goto out;
2259
2260         /*
2261          * We take all buildids when the file contains
2262          * AUX area tracing data because we do not decode the
2263          * trace because it would take too long.
2264          */
2265         if (rec->opts.full_auxtrace)
2266                 rec->buildid_all = true;
2267
2268         if (record_opts__config(&rec->opts)) {
2269                 err = -EINVAL;
2270                 goto out;
2271         }
2272
2273         if (rec->opts.nr_cblocks > nr_cblocks_max)
2274                 rec->opts.nr_cblocks = nr_cblocks_max;
2275         if (verbose > 0)
2276                 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2277
2278         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2279         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2280
2281         err = __cmd_record(&record, argc, argv);
2282 out:
2283         perf_evlist__delete(rec->evlist);
2284         symbol__exit();
2285         auxtrace_record__free(rec->itr);
2286         return err;
2287 }
2288
2289 static void snapshot_sig_handler(int sig __maybe_unused)
2290 {
2291         struct record *rec = &record;
2292
2293         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2294                 trigger_hit(&auxtrace_snapshot_trigger);
2295                 auxtrace_record__snapshot_started = 1;
2296                 if (auxtrace_record__snapshot_start(record.itr))
2297                         trigger_error(&auxtrace_snapshot_trigger);
2298         }
2299
2300         if (switch_output_signal(rec))
2301                 trigger_hit(&switch_output_trigger);
2302 }
2303
2304 static void alarm_sig_handler(int sig __maybe_unused)
2305 {
2306         struct record *rec = &record;
2307
2308         if (switch_output_time(rec))
2309                 trigger_hit(&switch_output_trigger);
2310 }