c69f16361958621546818c5917c608b7d11c9889
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "asm/bug.h"
49 #include "perf.h"
50
51 #include <errno.h>
52 #include <inttypes.h>
53 #include <locale.h>
54 #include <poll.h>
55 #include <pthread.h>
56 #include <unistd.h>
57 #include <sched.h>
58 #include <signal.h>
59 #ifdef HAVE_EVENTFD_SUPPORT
60 #include <sys/eventfd.h>
61 #endif
62 #include <sys/mman.h>
63 #include <sys/wait.h>
64 #include <sys/types.h>
65 #include <sys/stat.h>
66 #include <fcntl.h>
67 #include <linux/err.h>
68 #include <linux/string.h>
69 #include <linux/time64.h>
70 #include <linux/zalloc.h>
71 #include <linux/bitmap.h>
72
73 struct switch_output {
74         bool             enabled;
75         bool             signal;
76         unsigned long    size;
77         unsigned long    time;
78         const char      *str;
79         bool             set;
80         char             **filenames;
81         int              num_files;
82         int              cur_file;
83 };
84
85 struct record {
86         struct perf_tool        tool;
87         struct record_opts      opts;
88         u64                     bytes_written;
89         struct perf_data        data;
90         struct auxtrace_record  *itr;
91         struct evlist   *evlist;
92         struct perf_session     *session;
93         struct evlist           *sb_evlist;
94         pthread_t               thread_id;
95         int                     realtime_prio;
96         bool                    switch_output_event_set;
97         bool                    no_buildid;
98         bool                    no_buildid_set;
99         bool                    no_buildid_cache;
100         bool                    no_buildid_cache_set;
101         bool                    buildid_all;
102         bool                    timestamp_filename;
103         bool                    timestamp_boundary;
104         struct switch_output    switch_output;
105         unsigned long long      samples;
106         struct mmap_cpu_mask    affinity_mask;
107         unsigned long           output_max_size;        /* = 0: unlimited */
108 };
109
110 static volatile int done;
111
112 static volatile int auxtrace_record__snapshot_started;
113 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
114 static DEFINE_TRIGGER(switch_output_trigger);
115
116 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
117         "SYS", "NODE", "CPU"
118 };
119
120 static bool switch_output_signal(struct record *rec)
121 {
122         return rec->switch_output.signal &&
123                trigger_is_ready(&switch_output_trigger);
124 }
125
126 static bool switch_output_size(struct record *rec)
127 {
128         return rec->switch_output.size &&
129                trigger_is_ready(&switch_output_trigger) &&
130                (rec->bytes_written >= rec->switch_output.size);
131 }
132
133 static bool switch_output_time(struct record *rec)
134 {
135         return rec->switch_output.time &&
136                trigger_is_ready(&switch_output_trigger);
137 }
138
139 static bool record__output_max_size_exceeded(struct record *rec)
140 {
141         return rec->output_max_size &&
142                (rec->bytes_written >= rec->output_max_size);
143 }
144
145 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
146                          void *bf, size_t size)
147 {
148         struct perf_data_file *file = &rec->session->data->file;
149
150         if (perf_data_file__write(file, bf, size) < 0) {
151                 pr_err("failed to write perf data, error: %m\n");
152                 return -1;
153         }
154
155         rec->bytes_written += size;
156
157         if (record__output_max_size_exceeded(rec) && !done) {
158                 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
159                                 " stopping session ]\n",
160                                 rec->bytes_written >> 10);
161                 done = 1;
162         }
163
164         if (switch_output_size(rec))
165                 trigger_hit(&switch_output_trigger);
166
167         return 0;
168 }
169
170 static int record__aio_enabled(struct record *rec);
171 static int record__comp_enabled(struct record *rec);
172 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
173                             void *src, size_t src_size);
174
175 #ifdef HAVE_AIO_SUPPORT
176 static int record__aio_write(struct aiocb *cblock, int trace_fd,
177                 void *buf, size_t size, off_t off)
178 {
179         int rc;
180
181         cblock->aio_fildes = trace_fd;
182         cblock->aio_buf    = buf;
183         cblock->aio_nbytes = size;
184         cblock->aio_offset = off;
185         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
186
187         do {
188                 rc = aio_write(cblock);
189                 if (rc == 0) {
190                         break;
191                 } else if (errno != EAGAIN) {
192                         cblock->aio_fildes = -1;
193                         pr_err("failed to queue perf data, error: %m\n");
194                         break;
195                 }
196         } while (1);
197
198         return rc;
199 }
200
201 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
202 {
203         void *rem_buf;
204         off_t rem_off;
205         size_t rem_size;
206         int rc, aio_errno;
207         ssize_t aio_ret, written;
208
209         aio_errno = aio_error(cblock);
210         if (aio_errno == EINPROGRESS)
211                 return 0;
212
213         written = aio_ret = aio_return(cblock);
214         if (aio_ret < 0) {
215                 if (aio_errno != EINTR)
216                         pr_err("failed to write perf data, error: %m\n");
217                 written = 0;
218         }
219
220         rem_size = cblock->aio_nbytes - written;
221
222         if (rem_size == 0) {
223                 cblock->aio_fildes = -1;
224                 /*
225                  * md->refcount is incremented in record__aio_pushfn() for
226                  * every aio write request started in record__aio_push() so
227                  * decrement it because the request is now complete.
228                  */
229                 perf_mmap__put(&md->core);
230                 rc = 1;
231         } else {
232                 /*
233                  * aio write request may require restart with the
234                  * reminder if the kernel didn't write whole
235                  * chunk at once.
236                  */
237                 rem_off = cblock->aio_offset + written;
238                 rem_buf = (void *)(cblock->aio_buf + written);
239                 record__aio_write(cblock, cblock->aio_fildes,
240                                 rem_buf, rem_size, rem_off);
241                 rc = 0;
242         }
243
244         return rc;
245 }
246
247 static int record__aio_sync(struct mmap *md, bool sync_all)
248 {
249         struct aiocb **aiocb = md->aio.aiocb;
250         struct aiocb *cblocks = md->aio.cblocks;
251         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
252         int i, do_suspend;
253
254         do {
255                 do_suspend = 0;
256                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
257                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
258                                 if (sync_all)
259                                         aiocb[i] = NULL;
260                                 else
261                                         return i;
262                         } else {
263                                 /*
264                                  * Started aio write is not complete yet
265                                  * so it has to be waited before the
266                                  * next allocation.
267                                  */
268                                 aiocb[i] = &cblocks[i];
269                                 do_suspend = 1;
270                         }
271                 }
272                 if (!do_suspend)
273                         return -1;
274
275                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
276                         if (!(errno == EAGAIN || errno == EINTR))
277                                 pr_err("failed to sync perf data, error: %m\n");
278                 }
279         } while (1);
280 }
281
282 struct record_aio {
283         struct record   *rec;
284         void            *data;
285         size_t          size;
286 };
287
288 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
289 {
290         struct record_aio *aio = to;
291
292         /*
293          * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
294          * to release space in the kernel buffer as fast as possible, calling
295          * perf_mmap__consume() from perf_mmap__push() function.
296          *
297          * That lets the kernel to proceed with storing more profiling data into
298          * the kernel buffer earlier than other per-cpu kernel buffers are handled.
299          *
300          * Coping can be done in two steps in case the chunk of profiling data
301          * crosses the upper bound of the kernel buffer. In this case we first move
302          * part of data from map->start till the upper bound and then the reminder
303          * from the beginning of the kernel buffer till the end of the data chunk.
304          */
305
306         if (record__comp_enabled(aio->rec)) {
307                 size = zstd_compress(aio->rec->session, aio->data + aio->size,
308                                      mmap__mmap_len(map) - aio->size,
309                                      buf, size);
310         } else {
311                 memcpy(aio->data + aio->size, buf, size);
312         }
313
314         if (!aio->size) {
315                 /*
316                  * Increment map->refcount to guard map->aio.data[] buffer
317                  * from premature deallocation because map object can be
318                  * released earlier than aio write request started on
319                  * map->aio.data[] buffer is complete.
320                  *
321                  * perf_mmap__put() is done at record__aio_complete()
322                  * after started aio request completion or at record__aio_push()
323                  * if the request failed to start.
324                  */
325                 perf_mmap__get(&map->core);
326         }
327
328         aio->size += size;
329
330         return size;
331 }
332
333 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
334 {
335         int ret, idx;
336         int trace_fd = rec->session->data->file.fd;
337         struct record_aio aio = { .rec = rec, .size = 0 };
338
339         /*
340          * Call record__aio_sync() to wait till map->aio.data[] buffer
341          * becomes available after previous aio write operation.
342          */
343
344         idx = record__aio_sync(map, false);
345         aio.data = map->aio.data[idx];
346         ret = perf_mmap__push(map, &aio, record__aio_pushfn);
347         if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
348                 return ret;
349
350         rec->samples++;
351         ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
352         if (!ret) {
353                 *off += aio.size;
354                 rec->bytes_written += aio.size;
355                 if (switch_output_size(rec))
356                         trigger_hit(&switch_output_trigger);
357         } else {
358                 /*
359                  * Decrement map->refcount incremented in record__aio_pushfn()
360                  * back if record__aio_write() operation failed to start, otherwise
361                  * map->refcount is decremented in record__aio_complete() after
362                  * aio write operation finishes successfully.
363                  */
364                 perf_mmap__put(&map->core);
365         }
366
367         return ret;
368 }
369
370 static off_t record__aio_get_pos(int trace_fd)
371 {
372         return lseek(trace_fd, 0, SEEK_CUR);
373 }
374
375 static void record__aio_set_pos(int trace_fd, off_t pos)
376 {
377         lseek(trace_fd, pos, SEEK_SET);
378 }
379
380 static void record__aio_mmap_read_sync(struct record *rec)
381 {
382         int i;
383         struct evlist *evlist = rec->evlist;
384         struct mmap *maps = evlist->mmap;
385
386         if (!record__aio_enabled(rec))
387                 return;
388
389         for (i = 0; i < evlist->core.nr_mmaps; i++) {
390                 struct mmap *map = &maps[i];
391
392                 if (map->core.base)
393                         record__aio_sync(map, true);
394         }
395 }
396
397 static int nr_cblocks_default = 1;
398 static int nr_cblocks_max = 4;
399
400 static int record__aio_parse(const struct option *opt,
401                              const char *str,
402                              int unset)
403 {
404         struct record_opts *opts = (struct record_opts *)opt->value;
405
406         if (unset) {
407                 opts->nr_cblocks = 0;
408         } else {
409                 if (str)
410                         opts->nr_cblocks = strtol(str, NULL, 0);
411                 if (!opts->nr_cblocks)
412                         opts->nr_cblocks = nr_cblocks_default;
413         }
414
415         return 0;
416 }
417 #else /* HAVE_AIO_SUPPORT */
418 static int nr_cblocks_max = 0;
419
420 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
421                             off_t *off __maybe_unused)
422 {
423         return -1;
424 }
425
426 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
427 {
428         return -1;
429 }
430
431 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
432 {
433 }
434
435 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
436 {
437 }
438 #endif
439
440 static int record__aio_enabled(struct record *rec)
441 {
442         return rec->opts.nr_cblocks > 0;
443 }
444
445 #define MMAP_FLUSH_DEFAULT 1
446 static int record__mmap_flush_parse(const struct option *opt,
447                                     const char *str,
448                                     int unset)
449 {
450         int flush_max;
451         struct record_opts *opts = (struct record_opts *)opt->value;
452         static struct parse_tag tags[] = {
453                         { .tag  = 'B', .mult = 1       },
454                         { .tag  = 'K', .mult = 1 << 10 },
455                         { .tag  = 'M', .mult = 1 << 20 },
456                         { .tag  = 'G', .mult = 1 << 30 },
457                         { .tag  = 0 },
458         };
459
460         if (unset)
461                 return 0;
462
463         if (str) {
464                 opts->mmap_flush = parse_tag_value(str, tags);
465                 if (opts->mmap_flush == (int)-1)
466                         opts->mmap_flush = strtol(str, NULL, 0);
467         }
468
469         if (!opts->mmap_flush)
470                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
471
472         flush_max = evlist__mmap_size(opts->mmap_pages);
473         flush_max /= 4;
474         if (opts->mmap_flush > flush_max)
475                 opts->mmap_flush = flush_max;
476
477         return 0;
478 }
479
480 #ifdef HAVE_ZSTD_SUPPORT
481 static unsigned int comp_level_default = 1;
482
483 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
484 {
485         struct record_opts *opts = opt->value;
486
487         if (unset) {
488                 opts->comp_level = 0;
489         } else {
490                 if (str)
491                         opts->comp_level = strtol(str, NULL, 0);
492                 if (!opts->comp_level)
493                         opts->comp_level = comp_level_default;
494         }
495
496         return 0;
497 }
498 #endif
499 static unsigned int comp_level_max = 22;
500
501 static int record__comp_enabled(struct record *rec)
502 {
503         return rec->opts.comp_level > 0;
504 }
505
506 static int process_synthesized_event(struct perf_tool *tool,
507                                      union perf_event *event,
508                                      struct perf_sample *sample __maybe_unused,
509                                      struct machine *machine __maybe_unused)
510 {
511         struct record *rec = container_of(tool, struct record, tool);
512         return record__write(rec, NULL, event, event->header.size);
513 }
514
515 static int process_locked_synthesized_event(struct perf_tool *tool,
516                                      union perf_event *event,
517                                      struct perf_sample *sample __maybe_unused,
518                                      struct machine *machine __maybe_unused)
519 {
520         static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
521         int ret;
522
523         pthread_mutex_lock(&synth_lock);
524         ret = process_synthesized_event(tool, event, sample, machine);
525         pthread_mutex_unlock(&synth_lock);
526         return ret;
527 }
528
529 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
530 {
531         struct record *rec = to;
532
533         if (record__comp_enabled(rec)) {
534                 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
535                 bf   = map->data;
536         }
537
538         rec->samples++;
539         return record__write(rec, map, bf, size);
540 }
541
542 static volatile int signr = -1;
543 static volatile int child_finished;
544 #ifdef HAVE_EVENTFD_SUPPORT
545 static int done_fd = -1;
546 #endif
547
548 static void sig_handler(int sig)
549 {
550         if (sig == SIGCHLD)
551                 child_finished = 1;
552         else
553                 signr = sig;
554
555         done = 1;
556 #ifdef HAVE_EVENTFD_SUPPORT
557 {
558         u64 tmp = 1;
559         /*
560          * It is possible for this signal handler to run after done is checked
561          * in the main loop, but before the perf counter fds are polled. If this
562          * happens, the poll() will continue to wait even though done is set,
563          * and will only break out if either another signal is received, or the
564          * counters are ready for read. To ensure the poll() doesn't sleep when
565          * done is set, use an eventfd (done_fd) to wake up the poll().
566          */
567         if (write(done_fd, &tmp, sizeof(tmp)) < 0)
568                 pr_err("failed to signal wakeup fd, error: %m\n");
569 }
570 #endif // HAVE_EVENTFD_SUPPORT
571 }
572
573 static void sigsegv_handler(int sig)
574 {
575         perf_hooks__recover();
576         sighandler_dump_stack(sig);
577 }
578
579 static void record__sig_exit(void)
580 {
581         if (signr == -1)
582                 return;
583
584         signal(signr, SIG_DFL);
585         raise(signr);
586 }
587
588 #ifdef HAVE_AUXTRACE_SUPPORT
589
590 static int record__process_auxtrace(struct perf_tool *tool,
591                                     struct mmap *map,
592                                     union perf_event *event, void *data1,
593                                     size_t len1, void *data2, size_t len2)
594 {
595         struct record *rec = container_of(tool, struct record, tool);
596         struct perf_data *data = &rec->data;
597         size_t padding;
598         u8 pad[8] = {0};
599
600         if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
601                 off_t file_offset;
602                 int fd = perf_data__fd(data);
603                 int err;
604
605                 file_offset = lseek(fd, 0, SEEK_CUR);
606                 if (file_offset == -1)
607                         return -1;
608                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
609                                                      event, file_offset);
610                 if (err)
611                         return err;
612         }
613
614         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
615         padding = (len1 + len2) & 7;
616         if (padding)
617                 padding = 8 - padding;
618
619         record__write(rec, map, event, event->header.size);
620         record__write(rec, map, data1, len1);
621         if (len2)
622                 record__write(rec, map, data2, len2);
623         record__write(rec, map, &pad, padding);
624
625         return 0;
626 }
627
628 static int record__auxtrace_mmap_read(struct record *rec,
629                                       struct mmap *map)
630 {
631         int ret;
632
633         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
634                                   record__process_auxtrace);
635         if (ret < 0)
636                 return ret;
637
638         if (ret)
639                 rec->samples++;
640
641         return 0;
642 }
643
644 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
645                                                struct mmap *map)
646 {
647         int ret;
648
649         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
650                                            record__process_auxtrace,
651                                            rec->opts.auxtrace_snapshot_size);
652         if (ret < 0)
653                 return ret;
654
655         if (ret)
656                 rec->samples++;
657
658         return 0;
659 }
660
661 static int record__auxtrace_read_snapshot_all(struct record *rec)
662 {
663         int i;
664         int rc = 0;
665
666         for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
667                 struct mmap *map = &rec->evlist->mmap[i];
668
669                 if (!map->auxtrace_mmap.base)
670                         continue;
671
672                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
673                         rc = -1;
674                         goto out;
675                 }
676         }
677 out:
678         return rc;
679 }
680
681 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
682 {
683         pr_debug("Recording AUX area tracing snapshot\n");
684         if (record__auxtrace_read_snapshot_all(rec) < 0) {
685                 trigger_error(&auxtrace_snapshot_trigger);
686         } else {
687                 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
688                         trigger_error(&auxtrace_snapshot_trigger);
689                 else
690                         trigger_ready(&auxtrace_snapshot_trigger);
691         }
692 }
693
694 static int record__auxtrace_snapshot_exit(struct record *rec)
695 {
696         if (trigger_is_error(&auxtrace_snapshot_trigger))
697                 return 0;
698
699         if (!auxtrace_record__snapshot_started &&
700             auxtrace_record__snapshot_start(rec->itr))
701                 return -1;
702
703         record__read_auxtrace_snapshot(rec, true);
704         if (trigger_is_error(&auxtrace_snapshot_trigger))
705                 return -1;
706
707         return 0;
708 }
709
710 static int record__auxtrace_init(struct record *rec)
711 {
712         int err;
713
714         if (!rec->itr) {
715                 rec->itr = auxtrace_record__init(rec->evlist, &err);
716                 if (err)
717                         return err;
718         }
719
720         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
721                                               rec->opts.auxtrace_snapshot_opts);
722         if (err)
723                 return err;
724
725         err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
726                                             rec->opts.auxtrace_sample_opts);
727         if (err)
728                 return err;
729
730         return auxtrace_parse_filters(rec->evlist);
731 }
732
733 #else
734
735 static inline
736 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
737                                struct mmap *map __maybe_unused)
738 {
739         return 0;
740 }
741
742 static inline
743 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
744                                     bool on_exit __maybe_unused)
745 {
746 }
747
748 static inline
749 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
750 {
751         return 0;
752 }
753
754 static inline
755 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
756 {
757         return 0;
758 }
759
760 static int record__auxtrace_init(struct record *rec __maybe_unused)
761 {
762         return 0;
763 }
764
765 #endif
766
767 static bool record__kcore_readable(struct machine *machine)
768 {
769         char kcore[PATH_MAX];
770         int fd;
771
772         scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
773
774         fd = open(kcore, O_RDONLY);
775         if (fd < 0)
776                 return false;
777
778         close(fd);
779
780         return true;
781 }
782
783 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
784 {
785         char from_dir[PATH_MAX];
786         char kcore_dir[PATH_MAX];
787         int ret;
788
789         snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
790
791         ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
792         if (ret)
793                 return ret;
794
795         return kcore_copy(from_dir, kcore_dir);
796 }
797
798 static int record__mmap_evlist(struct record *rec,
799                                struct evlist *evlist)
800 {
801         struct record_opts *opts = &rec->opts;
802         bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
803                                   opts->auxtrace_sample_mode;
804         char msg[512];
805
806         if (opts->affinity != PERF_AFFINITY_SYS)
807                 cpu__setup_cpunode_map();
808
809         if (evlist__mmap_ex(evlist, opts->mmap_pages,
810                                  opts->auxtrace_mmap_pages,
811                                  auxtrace_overwrite,
812                                  opts->nr_cblocks, opts->affinity,
813                                  opts->mmap_flush, opts->comp_level) < 0) {
814                 if (errno == EPERM) {
815                         pr_err("Permission error mapping pages.\n"
816                                "Consider increasing "
817                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
818                                "or try again with a smaller value of -m/--mmap_pages.\n"
819                                "(current value: %u,%u)\n",
820                                opts->mmap_pages, opts->auxtrace_mmap_pages);
821                         return -errno;
822                 } else {
823                         pr_err("failed to mmap with %d (%s)\n", errno,
824                                 str_error_r(errno, msg, sizeof(msg)));
825                         if (errno)
826                                 return -errno;
827                         else
828                                 return -EINVAL;
829                 }
830         }
831         return 0;
832 }
833
834 static int record__mmap(struct record *rec)
835 {
836         return record__mmap_evlist(rec, rec->evlist);
837 }
838
839 static int record__open(struct record *rec)
840 {
841         char msg[BUFSIZ];
842         struct evsel *pos;
843         struct evlist *evlist = rec->evlist;
844         struct perf_session *session = rec->session;
845         struct record_opts *opts = &rec->opts;
846         int rc = 0;
847
848         /*
849          * For initial_delay or system wide, we need to add a dummy event so
850          * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
851          * event synthesis.
852          */
853         if (opts->initial_delay || target__has_cpu(&opts->target)) {
854                 if (perf_evlist__add_dummy(evlist))
855                         return -ENOMEM;
856
857                 /* Disable tracking of mmaps on lead event. */
858                 pos = evlist__first(evlist);
859                 pos->tracking = 0;
860                 /* Set up dummy event. */
861                 pos = evlist__last(evlist);
862                 pos->tracking = 1;
863                 /*
864                  * Enable the dummy event when the process is forked for
865                  * initial_delay, immediately for system wide.
866                  */
867                 if (opts->initial_delay)
868                         pos->core.attr.enable_on_exec = 1;
869                 else
870                         pos->immediate = 1;
871         }
872
873         perf_evlist__config(evlist, opts, &callchain_param);
874
875         evlist__for_each_entry(evlist, pos) {
876 try_again:
877                 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
878                         if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
879                                 if (verbose > 0)
880                                         ui__warning("%s\n", msg);
881                                 goto try_again;
882                         }
883                         if ((errno == EINVAL || errno == EBADF) &&
884                             pos->leader != pos &&
885                             pos->weak_group) {
886                                 pos = perf_evlist__reset_weak_group(evlist, pos, true);
887                                 goto try_again;
888                         }
889                         rc = -errno;
890                         evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
891                         ui__error("%s\n", msg);
892                         goto out;
893                 }
894
895                 pos->supported = true;
896         }
897
898         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
899                 pr_warning(
900 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
901 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
902 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
903 "file is not found in the buildid cache or in the vmlinux path.\n\n"
904 "Samples in kernel modules won't be resolved at all.\n\n"
905 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
906 "even with a suitable vmlinux or kallsyms file.\n\n");
907         }
908
909         if (perf_evlist__apply_filters(evlist, &pos)) {
910                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
911                         pos->filter, evsel__name(pos), errno,
912                         str_error_r(errno, msg, sizeof(msg)));
913                 rc = -1;
914                 goto out;
915         }
916
917         rc = record__mmap(rec);
918         if (rc)
919                 goto out;
920
921         session->evlist = evlist;
922         perf_session__set_id_hdr_size(session);
923 out:
924         return rc;
925 }
926
927 static int process_sample_event(struct perf_tool *tool,
928                                 union perf_event *event,
929                                 struct perf_sample *sample,
930                                 struct evsel *evsel,
931                                 struct machine *machine)
932 {
933         struct record *rec = container_of(tool, struct record, tool);
934
935         if (rec->evlist->first_sample_time == 0)
936                 rec->evlist->first_sample_time = sample->time;
937
938         rec->evlist->last_sample_time = sample->time;
939
940         if (rec->buildid_all)
941                 return 0;
942
943         rec->samples++;
944         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
945 }
946
947 static int process_buildids(struct record *rec)
948 {
949         struct perf_session *session = rec->session;
950
951         if (perf_data__size(&rec->data) == 0)
952                 return 0;
953
954         /*
955          * During this process, it'll load kernel map and replace the
956          * dso->long_name to a real pathname it found.  In this case
957          * we prefer the vmlinux path like
958          *   /lib/modules/3.16.4/build/vmlinux
959          *
960          * rather than build-id path (in debug directory).
961          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
962          */
963         symbol_conf.ignore_vmlinux_buildid = true;
964
965         /*
966          * If --buildid-all is given, it marks all DSO regardless of hits,
967          * so no need to process samples. But if timestamp_boundary is enabled,
968          * it still needs to walk on all samples to get the timestamps of
969          * first/last samples.
970          */
971         if (rec->buildid_all && !rec->timestamp_boundary)
972                 rec->tool.sample = NULL;
973
974         return perf_session__process_events(session);
975 }
976
977 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
978 {
979         int err;
980         struct perf_tool *tool = data;
981         /*
982          *As for guest kernel when processing subcommand record&report,
983          *we arrange module mmap prior to guest kernel mmap and trigger
984          *a preload dso because default guest module symbols are loaded
985          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
986          *method is used to avoid symbol missing when the first addr is
987          *in module instead of in guest kernel.
988          */
989         err = perf_event__synthesize_modules(tool, process_synthesized_event,
990                                              machine);
991         if (err < 0)
992                 pr_err("Couldn't record guest kernel [%d]'s reference"
993                        " relocation symbol.\n", machine->pid);
994
995         /*
996          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
997          * have no _text sometimes.
998          */
999         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1000                                                  machine);
1001         if (err < 0)
1002                 pr_err("Couldn't record guest kernel [%d]'s reference"
1003                        " relocation symbol.\n", machine->pid);
1004 }
1005
1006 static struct perf_event_header finished_round_event = {
1007         .size = sizeof(struct perf_event_header),
1008         .type = PERF_RECORD_FINISHED_ROUND,
1009 };
1010
1011 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1012 {
1013         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1014             !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
1015                           rec->affinity_mask.nbits)) {
1016                 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
1017                 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
1018                           map->affinity_mask.bits, rec->affinity_mask.nbits);
1019                 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
1020                                   (cpu_set_t *)rec->affinity_mask.bits);
1021                 if (verbose == 2)
1022                         mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
1023         }
1024 }
1025
1026 static size_t process_comp_header(void *record, size_t increment)
1027 {
1028         struct perf_record_compressed *event = record;
1029         size_t size = sizeof(*event);
1030
1031         if (increment) {
1032                 event->header.size += increment;
1033                 return increment;
1034         }
1035
1036         event->header.type = PERF_RECORD_COMPRESSED;
1037         event->header.size = size;
1038
1039         return size;
1040 }
1041
1042 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1043                             void *src, size_t src_size)
1044 {
1045         size_t compressed;
1046         size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1047
1048         compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1049                                                      max_record_size, process_comp_header);
1050
1051         session->bytes_transferred += src_size;
1052         session->bytes_compressed  += compressed;
1053
1054         return compressed;
1055 }
1056
1057 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1058                                     bool overwrite, bool synch)
1059 {
1060         u64 bytes_written = rec->bytes_written;
1061         int i;
1062         int rc = 0;
1063         struct mmap *maps;
1064         int trace_fd = rec->data.file.fd;
1065         off_t off = 0;
1066
1067         if (!evlist)
1068                 return 0;
1069
1070         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1071         if (!maps)
1072                 return 0;
1073
1074         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1075                 return 0;
1076
1077         if (record__aio_enabled(rec))
1078                 off = record__aio_get_pos(trace_fd);
1079
1080         for (i = 0; i < evlist->core.nr_mmaps; i++) {
1081                 u64 flush = 0;
1082                 struct mmap *map = &maps[i];
1083
1084                 if (map->core.base) {
1085                         record__adjust_affinity(rec, map);
1086                         if (synch) {
1087                                 flush = map->core.flush;
1088                                 map->core.flush = 1;
1089                         }
1090                         if (!record__aio_enabled(rec)) {
1091                                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1092                                         if (synch)
1093                                                 map->core.flush = flush;
1094                                         rc = -1;
1095                                         goto out;
1096                                 }
1097                         } else {
1098                                 if (record__aio_push(rec, map, &off) < 0) {
1099                                         record__aio_set_pos(trace_fd, off);
1100                                         if (synch)
1101                                                 map->core.flush = flush;
1102                                         rc = -1;
1103                                         goto out;
1104                                 }
1105                         }
1106                         if (synch)
1107                                 map->core.flush = flush;
1108                 }
1109
1110                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1111                     !rec->opts.auxtrace_sample_mode &&
1112                     record__auxtrace_mmap_read(rec, map) != 0) {
1113                         rc = -1;
1114                         goto out;
1115                 }
1116         }
1117
1118         if (record__aio_enabled(rec))
1119                 record__aio_set_pos(trace_fd, off);
1120
1121         /*
1122          * Mark the round finished in case we wrote
1123          * at least one event.
1124          */
1125         if (bytes_written != rec->bytes_written)
1126                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1127
1128         if (overwrite)
1129                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1130 out:
1131         return rc;
1132 }
1133
1134 static int record__mmap_read_all(struct record *rec, bool synch)
1135 {
1136         int err;
1137
1138         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1139         if (err)
1140                 return err;
1141
1142         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1143 }
1144
1145 static void record__init_features(struct record *rec)
1146 {
1147         struct perf_session *session = rec->session;
1148         int feat;
1149
1150         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1151                 perf_header__set_feat(&session->header, feat);
1152
1153         if (rec->no_buildid)
1154                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1155
1156         if (!have_tracepoints(&rec->evlist->core.entries))
1157                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1158
1159         if (!rec->opts.branch_stack)
1160                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1161
1162         if (!rec->opts.full_auxtrace)
1163                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1164
1165         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1166                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1167
1168         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1169         if (!record__comp_enabled(rec))
1170                 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1171
1172         perf_header__clear_feat(&session->header, HEADER_STAT);
1173 }
1174
1175 static void
1176 record__finish_output(struct record *rec)
1177 {
1178         struct perf_data *data = &rec->data;
1179         int fd = perf_data__fd(data);
1180
1181         if (data->is_pipe)
1182                 return;
1183
1184         rec->session->header.data_size += rec->bytes_written;
1185         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1186
1187         if (!rec->no_buildid) {
1188                 process_buildids(rec);
1189
1190                 if (rec->buildid_all)
1191                         dsos__hit_all(rec->session);
1192         }
1193         perf_session__write_header(rec->session, rec->evlist, fd, true);
1194
1195         return;
1196 }
1197
1198 static int record__synthesize_workload(struct record *rec, bool tail)
1199 {
1200         int err;
1201         struct perf_thread_map *thread_map;
1202
1203         if (rec->opts.tail_synthesize != tail)
1204                 return 0;
1205
1206         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1207         if (thread_map == NULL)
1208                 return -1;
1209
1210         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1211                                                  process_synthesized_event,
1212                                                  &rec->session->machines.host,
1213                                                  rec->opts.sample_address);
1214         perf_thread_map__put(thread_map);
1215         return err;
1216 }
1217
1218 static int record__synthesize(struct record *rec, bool tail);
1219
1220 static int
1221 record__switch_output(struct record *rec, bool at_exit)
1222 {
1223         struct perf_data *data = &rec->data;
1224         int fd, err;
1225         char *new_filename;
1226
1227         /* Same Size:      "2015122520103046"*/
1228         char timestamp[] = "InvalidTimestamp";
1229
1230         record__aio_mmap_read_sync(rec);
1231
1232         record__synthesize(rec, true);
1233         if (target__none(&rec->opts.target))
1234                 record__synthesize_workload(rec, true);
1235
1236         rec->samples = 0;
1237         record__finish_output(rec);
1238         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1239         if (err) {
1240                 pr_err("Failed to get current timestamp\n");
1241                 return -EINVAL;
1242         }
1243
1244         fd = perf_data__switch(data, timestamp,
1245                                     rec->session->header.data_offset,
1246                                     at_exit, &new_filename);
1247         if (fd >= 0 && !at_exit) {
1248                 rec->bytes_written = 0;
1249                 rec->session->header.data_size = 0;
1250         }
1251
1252         if (!quiet)
1253                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1254                         data->path, timestamp);
1255
1256         if (rec->switch_output.num_files) {
1257                 int n = rec->switch_output.cur_file + 1;
1258
1259                 if (n >= rec->switch_output.num_files)
1260                         n = 0;
1261                 rec->switch_output.cur_file = n;
1262                 if (rec->switch_output.filenames[n]) {
1263                         remove(rec->switch_output.filenames[n]);
1264                         zfree(&rec->switch_output.filenames[n]);
1265                 }
1266                 rec->switch_output.filenames[n] = new_filename;
1267         } else {
1268                 free(new_filename);
1269         }
1270
1271         /* Output tracking events */
1272         if (!at_exit) {
1273                 record__synthesize(rec, false);
1274
1275                 /*
1276                  * In 'perf record --switch-output' without -a,
1277                  * record__synthesize() in record__switch_output() won't
1278                  * generate tracking events because there's no thread_map
1279                  * in evlist. Which causes newly created perf.data doesn't
1280                  * contain map and comm information.
1281                  * Create a fake thread_map and directly call
1282                  * perf_event__synthesize_thread_map() for those events.
1283                  */
1284                 if (target__none(&rec->opts.target))
1285                         record__synthesize_workload(rec, false);
1286         }
1287         return fd;
1288 }
1289
1290 static volatile int workload_exec_errno;
1291
1292 /*
1293  * perf_evlist__prepare_workload will send a SIGUSR1
1294  * if the fork fails, since we asked by setting its
1295  * want_signal to true.
1296  */
1297 static void workload_exec_failed_signal(int signo __maybe_unused,
1298                                         siginfo_t *info,
1299                                         void *ucontext __maybe_unused)
1300 {
1301         workload_exec_errno = info->si_value.sival_int;
1302         done = 1;
1303         child_finished = 1;
1304 }
1305
1306 static void snapshot_sig_handler(int sig);
1307 static void alarm_sig_handler(int sig);
1308
1309 static const struct perf_event_mmap_page *
1310 perf_evlist__pick_pc(struct evlist *evlist)
1311 {
1312         if (evlist) {
1313                 if (evlist->mmap && evlist->mmap[0].core.base)
1314                         return evlist->mmap[0].core.base;
1315                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1316                         return evlist->overwrite_mmap[0].core.base;
1317         }
1318         return NULL;
1319 }
1320
1321 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1322 {
1323         const struct perf_event_mmap_page *pc;
1324
1325         pc = perf_evlist__pick_pc(rec->evlist);
1326         if (pc)
1327                 return pc;
1328         return NULL;
1329 }
1330
1331 static int record__synthesize(struct record *rec, bool tail)
1332 {
1333         struct perf_session *session = rec->session;
1334         struct machine *machine = &session->machines.host;
1335         struct perf_data *data = &rec->data;
1336         struct record_opts *opts = &rec->opts;
1337         struct perf_tool *tool = &rec->tool;
1338         int fd = perf_data__fd(data);
1339         int err = 0;
1340         event_op f = process_synthesized_event;
1341
1342         if (rec->opts.tail_synthesize != tail)
1343                 return 0;
1344
1345         if (data->is_pipe) {
1346                 /*
1347                  * We need to synthesize events first, because some
1348                  * features works on top of them (on report side).
1349                  */
1350                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1351                                                    process_synthesized_event);
1352                 if (err < 0) {
1353                         pr_err("Couldn't synthesize attrs.\n");
1354                         goto out;
1355                 }
1356
1357                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1358                                                       process_synthesized_event);
1359                 if (err < 0) {
1360                         pr_err("Couldn't synthesize features.\n");
1361                         return err;
1362                 }
1363
1364                 if (have_tracepoints(&rec->evlist->core.entries)) {
1365                         /*
1366                          * FIXME err <= 0 here actually means that
1367                          * there were no tracepoints so its not really
1368                          * an error, just that we don't need to
1369                          * synthesize anything.  We really have to
1370                          * return this more properly and also
1371                          * propagate errors that now are calling die()
1372                          */
1373                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1374                                                                   process_synthesized_event);
1375                         if (err <= 0) {
1376                                 pr_err("Couldn't record tracing data.\n");
1377                                 goto out;
1378                         }
1379                         rec->bytes_written += err;
1380                 }
1381         }
1382
1383         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1384                                           process_synthesized_event, machine);
1385         if (err)
1386                 goto out;
1387
1388         /* Synthesize id_index before auxtrace_info */
1389         if (rec->opts.auxtrace_sample_mode) {
1390                 err = perf_event__synthesize_id_index(tool,
1391                                                       process_synthesized_event,
1392                                                       session->evlist, machine);
1393                 if (err)
1394                         goto out;
1395         }
1396
1397         if (rec->opts.full_auxtrace) {
1398                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1399                                         session, process_synthesized_event);
1400                 if (err)
1401                         goto out;
1402         }
1403
1404         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1405                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1406                                                          machine);
1407                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1408                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1409                                    "Check /proc/kallsyms permission or run as root.\n");
1410
1411                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1412                                                      machine);
1413                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1414                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1415                                    "Check /proc/modules permission or run as root.\n");
1416         }
1417
1418         if (perf_guest) {
1419                 machines__process_guests(&session->machines,
1420                                          perf_event__synthesize_guest_os, tool);
1421         }
1422
1423         err = perf_event__synthesize_extra_attr(&rec->tool,
1424                                                 rec->evlist,
1425                                                 process_synthesized_event,
1426                                                 data->is_pipe);
1427         if (err)
1428                 goto out;
1429
1430         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1431                                                  process_synthesized_event,
1432                                                 NULL);
1433         if (err < 0) {
1434                 pr_err("Couldn't synthesize thread map.\n");
1435                 return err;
1436         }
1437
1438         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1439                                              process_synthesized_event, NULL);
1440         if (err < 0) {
1441                 pr_err("Couldn't synthesize cpu map.\n");
1442                 return err;
1443         }
1444
1445         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1446                                                 machine, opts);
1447         if (err < 0)
1448                 pr_warning("Couldn't synthesize bpf events.\n");
1449
1450         err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1451                                              machine);
1452         if (err < 0)
1453                 pr_warning("Couldn't synthesize cgroup events.\n");
1454
1455         if (rec->opts.nr_threads_synthesize > 1) {
1456                 perf_set_multithreaded();
1457                 f = process_locked_synthesized_event;
1458         }
1459
1460         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1461                                             f, opts->sample_address,
1462                                             rec->opts.nr_threads_synthesize);
1463
1464         if (rec->opts.nr_threads_synthesize > 1)
1465                 perf_set_singlethreaded();
1466
1467 out:
1468         return err;
1469 }
1470
1471 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1472 {
1473         struct record *rec = data;
1474         pthread_kill(rec->thread_id, SIGUSR2);
1475         return 0;
1476 }
1477
1478 static int record__setup_sb_evlist(struct record *rec)
1479 {
1480         struct record_opts *opts = &rec->opts;
1481
1482         if (rec->sb_evlist != NULL) {
1483                 /*
1484                  * We get here if --switch-output-event populated the
1485                  * sb_evlist, so associate a callback that will send a SIGUSR2
1486                  * to the main thread.
1487                  */
1488                 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1489                 rec->thread_id = pthread_self();
1490         }
1491
1492         if (!opts->no_bpf_event) {
1493                 if (rec->sb_evlist == NULL) {
1494                         rec->sb_evlist = evlist__new();
1495
1496                         if (rec->sb_evlist == NULL) {
1497                                 pr_err("Couldn't create side band evlist.\n.");
1498                                 return -1;
1499                         }
1500                 }
1501
1502                 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1503                         pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1504                         return -1;
1505                 }
1506         }
1507
1508         if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1509                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1510                 opts->no_bpf_event = true;
1511         }
1512
1513         return 0;
1514 }
1515
1516 static int __cmd_record(struct record *rec, int argc, const char **argv)
1517 {
1518         int err;
1519         int status = 0;
1520         unsigned long waking = 0;
1521         const bool forks = argc > 0;
1522         struct perf_tool *tool = &rec->tool;
1523         struct record_opts *opts = &rec->opts;
1524         struct perf_data *data = &rec->data;
1525         struct perf_session *session;
1526         bool disabled = false, draining = false;
1527         int fd;
1528         float ratio = 0;
1529
1530         atexit(record__sig_exit);
1531         signal(SIGCHLD, sig_handler);
1532         signal(SIGINT, sig_handler);
1533         signal(SIGTERM, sig_handler);
1534         signal(SIGSEGV, sigsegv_handler);
1535
1536         if (rec->opts.record_namespaces)
1537                 tool->namespace_events = true;
1538
1539         if (rec->opts.record_cgroup) {
1540 #ifdef HAVE_FILE_HANDLE
1541                 tool->cgroup_events = true;
1542 #else
1543                 pr_err("cgroup tracking is not supported\n");
1544                 return -1;
1545 #endif
1546         }
1547
1548         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1549                 signal(SIGUSR2, snapshot_sig_handler);
1550                 if (rec->opts.auxtrace_snapshot_mode)
1551                         trigger_on(&auxtrace_snapshot_trigger);
1552                 if (rec->switch_output.enabled)
1553                         trigger_on(&switch_output_trigger);
1554         } else {
1555                 signal(SIGUSR2, SIG_IGN);
1556         }
1557
1558         session = perf_session__new(data, false, tool);
1559         if (IS_ERR(session)) {
1560                 pr_err("Perf session creation failed.\n");
1561                 return PTR_ERR(session);
1562         }
1563
1564         fd = perf_data__fd(data);
1565         rec->session = session;
1566
1567         if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1568                 pr_err("Compression initialization failed.\n");
1569                 return -1;
1570         }
1571 #ifdef HAVE_EVENTFD_SUPPORT
1572         done_fd = eventfd(0, EFD_NONBLOCK);
1573         if (done_fd < 0) {
1574                 pr_err("Failed to create wakeup eventfd, error: %m\n");
1575                 status = -1;
1576                 goto out_delete_session;
1577         }
1578         err = evlist__add_pollfd(rec->evlist, done_fd);
1579         if (err < 0) {
1580                 pr_err("Failed to add wakeup eventfd to poll list\n");
1581                 status = err;
1582                 goto out_delete_session;
1583         }
1584 #endif // HAVE_EVENTFD_SUPPORT
1585
1586         session->header.env.comp_type  = PERF_COMP_ZSTD;
1587         session->header.env.comp_level = rec->opts.comp_level;
1588
1589         if (rec->opts.kcore &&
1590             !record__kcore_readable(&session->machines.host)) {
1591                 pr_err("ERROR: kcore is not readable.\n");
1592                 return -1;
1593         }
1594
1595         record__init_features(rec);
1596
1597         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1598                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1599
1600         if (forks) {
1601                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1602                                                     argv, data->is_pipe,
1603                                                     workload_exec_failed_signal);
1604                 if (err < 0) {
1605                         pr_err("Couldn't run the workload!\n");
1606                         status = err;
1607                         goto out_delete_session;
1608                 }
1609         }
1610
1611         /*
1612          * If we have just single event and are sending data
1613          * through pipe, we need to force the ids allocation,
1614          * because we synthesize event name through the pipe
1615          * and need the id for that.
1616          */
1617         if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1618                 rec->opts.sample_id = true;
1619
1620         if (record__open(rec) != 0) {
1621                 err = -1;
1622                 goto out_child;
1623         }
1624         session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1625
1626         if (rec->opts.kcore) {
1627                 err = record__kcore_copy(&session->machines.host, data);
1628                 if (err) {
1629                         pr_err("ERROR: Failed to copy kcore\n");
1630                         goto out_child;
1631                 }
1632         }
1633
1634         err = bpf__apply_obj_config();
1635         if (err) {
1636                 char errbuf[BUFSIZ];
1637
1638                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1639                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1640                          errbuf);
1641                 goto out_child;
1642         }
1643
1644         /*
1645          * Normally perf_session__new would do this, but it doesn't have the
1646          * evlist.
1647          */
1648         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1649                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1650                 rec->tool.ordered_events = false;
1651         }
1652
1653         if (!rec->evlist->nr_groups)
1654                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1655
1656         if (data->is_pipe) {
1657                 err = perf_header__write_pipe(fd);
1658                 if (err < 0)
1659                         goto out_child;
1660         } else {
1661                 err = perf_session__write_header(session, rec->evlist, fd, false);
1662                 if (err < 0)
1663                         goto out_child;
1664         }
1665
1666         err = -1;
1667         if (!rec->no_buildid
1668             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1669                 pr_err("Couldn't generate buildids. "
1670                        "Use --no-buildid to profile anyway.\n");
1671                 goto out_child;
1672         }
1673
1674         err = record__setup_sb_evlist(rec);
1675         if (err)
1676                 goto out_child;
1677
1678         err = record__synthesize(rec, false);
1679         if (err < 0)
1680                 goto out_child;
1681
1682         if (rec->realtime_prio) {
1683                 struct sched_param param;
1684
1685                 param.sched_priority = rec->realtime_prio;
1686                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1687                         pr_err("Could not set realtime priority.\n");
1688                         err = -1;
1689                         goto out_child;
1690                 }
1691         }
1692
1693         /*
1694          * When perf is starting the traced process, all the events
1695          * (apart from group members) have enable_on_exec=1 set,
1696          * so don't spoil it by prematurely enabling them.
1697          */
1698         if (!target__none(&opts->target) && !opts->initial_delay)
1699                 evlist__enable(rec->evlist);
1700
1701         /*
1702          * Let the child rip
1703          */
1704         if (forks) {
1705                 struct machine *machine = &session->machines.host;
1706                 union perf_event *event;
1707                 pid_t tgid;
1708
1709                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1710                 if (event == NULL) {
1711                         err = -ENOMEM;
1712                         goto out_child;
1713                 }
1714
1715                 /*
1716                  * Some H/W events are generated before COMM event
1717                  * which is emitted during exec(), so perf script
1718                  * cannot see a correct process name for those events.
1719                  * Synthesize COMM event to prevent it.
1720                  */
1721                 tgid = perf_event__synthesize_comm(tool, event,
1722                                                    rec->evlist->workload.pid,
1723                                                    process_synthesized_event,
1724                                                    machine);
1725                 free(event);
1726
1727                 if (tgid == -1)
1728                         goto out_child;
1729
1730                 event = malloc(sizeof(event->namespaces) +
1731                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1732                                machine->id_hdr_size);
1733                 if (event == NULL) {
1734                         err = -ENOMEM;
1735                         goto out_child;
1736                 }
1737
1738                 /*
1739                  * Synthesize NAMESPACES event for the command specified.
1740                  */
1741                 perf_event__synthesize_namespaces(tool, event,
1742                                                   rec->evlist->workload.pid,
1743                                                   tgid, process_synthesized_event,
1744                                                   machine);
1745                 free(event);
1746
1747                 perf_evlist__start_workload(rec->evlist);
1748         }
1749
1750         if (opts->initial_delay) {
1751                 usleep(opts->initial_delay * USEC_PER_MSEC);
1752                 evlist__enable(rec->evlist);
1753         }
1754
1755         trigger_ready(&auxtrace_snapshot_trigger);
1756         trigger_ready(&switch_output_trigger);
1757         perf_hooks__invoke_record_start();
1758         for (;;) {
1759                 unsigned long long hits = rec->samples;
1760
1761                 /*
1762                  * rec->evlist->bkw_mmap_state is possible to be
1763                  * BKW_MMAP_EMPTY here: when done == true and
1764                  * hits != rec->samples in previous round.
1765                  *
1766                  * perf_evlist__toggle_bkw_mmap ensure we never
1767                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1768                  */
1769                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1770                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1771
1772                 if (record__mmap_read_all(rec, false) < 0) {
1773                         trigger_error(&auxtrace_snapshot_trigger);
1774                         trigger_error(&switch_output_trigger);
1775                         err = -1;
1776                         goto out_child;
1777                 }
1778
1779                 if (auxtrace_record__snapshot_started) {
1780                         auxtrace_record__snapshot_started = 0;
1781                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1782                                 record__read_auxtrace_snapshot(rec, false);
1783                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1784                                 pr_err("AUX area tracing snapshot failed\n");
1785                                 err = -1;
1786                                 goto out_child;
1787                         }
1788                 }
1789
1790                 if (trigger_is_hit(&switch_output_trigger)) {
1791                         /*
1792                          * If switch_output_trigger is hit, the data in
1793                          * overwritable ring buffer should have been collected,
1794                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1795                          *
1796                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1797                          * record__mmap_read_all() didn't collect data from
1798                          * overwritable ring buffer. Read again.
1799                          */
1800                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1801                                 continue;
1802                         trigger_ready(&switch_output_trigger);
1803
1804                         /*
1805                          * Reenable events in overwrite ring buffer after
1806                          * record__mmap_read_all(): we should have collected
1807                          * data from it.
1808                          */
1809                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1810
1811                         if (!quiet)
1812                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1813                                         waking);
1814                         waking = 0;
1815                         fd = record__switch_output(rec, false);
1816                         if (fd < 0) {
1817                                 pr_err("Failed to switch to new file\n");
1818                                 trigger_error(&switch_output_trigger);
1819                                 err = fd;
1820                                 goto out_child;
1821                         }
1822
1823                         /* re-arm the alarm */
1824                         if (rec->switch_output.time)
1825                                 alarm(rec->switch_output.time);
1826                 }
1827
1828                 if (hits == rec->samples) {
1829                         if (done || draining)
1830                                 break;
1831                         err = evlist__poll(rec->evlist, -1);
1832                         /*
1833                          * Propagate error, only if there's any. Ignore positive
1834                          * number of returned events and interrupt error.
1835                          */
1836                         if (err > 0 || (err < 0 && errno == EINTR))
1837                                 err = 0;
1838                         waking++;
1839
1840                         if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1841                                 draining = true;
1842                 }
1843
1844                 /*
1845                  * When perf is starting the traced process, at the end events
1846                  * die with the process and we wait for that. Thus no need to
1847                  * disable events in this case.
1848                  */
1849                 if (done && !disabled && !target__none(&opts->target)) {
1850                         trigger_off(&auxtrace_snapshot_trigger);
1851                         evlist__disable(rec->evlist);
1852                         disabled = true;
1853                 }
1854         }
1855
1856         trigger_off(&auxtrace_snapshot_trigger);
1857         trigger_off(&switch_output_trigger);
1858
1859         if (opts->auxtrace_snapshot_on_exit)
1860                 record__auxtrace_snapshot_exit(rec);
1861
1862         if (forks && workload_exec_errno) {
1863                 char msg[STRERR_BUFSIZE];
1864                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1865                 pr_err("Workload failed: %s\n", emsg);
1866                 err = -1;
1867                 goto out_child;
1868         }
1869
1870         if (!quiet)
1871                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1872
1873         if (target__none(&rec->opts.target))
1874                 record__synthesize_workload(rec, true);
1875
1876 out_child:
1877         record__mmap_read_all(rec, true);
1878         record__aio_mmap_read_sync(rec);
1879
1880         if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1881                 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1882                 session->header.env.comp_ratio = ratio + 0.5;
1883         }
1884
1885         if (forks) {
1886                 int exit_status;
1887
1888                 if (!child_finished)
1889                         kill(rec->evlist->workload.pid, SIGTERM);
1890
1891                 wait(&exit_status);
1892
1893                 if (err < 0)
1894                         status = err;
1895                 else if (WIFEXITED(exit_status))
1896                         status = WEXITSTATUS(exit_status);
1897                 else if (WIFSIGNALED(exit_status))
1898                         signr = WTERMSIG(exit_status);
1899         } else
1900                 status = err;
1901
1902         record__synthesize(rec, true);
1903         /* this will be recalculated during process_buildids() */
1904         rec->samples = 0;
1905
1906         if (!err) {
1907                 if (!rec->timestamp_filename) {
1908                         record__finish_output(rec);
1909                 } else {
1910                         fd = record__switch_output(rec, true);
1911                         if (fd < 0) {
1912                                 status = fd;
1913                                 goto out_delete_session;
1914                         }
1915                 }
1916         }
1917
1918         perf_hooks__invoke_record_end();
1919
1920         if (!err && !quiet) {
1921                 char samples[128];
1922                 const char *postfix = rec->timestamp_filename ?
1923                                         ".<timestamp>" : "";
1924
1925                 if (rec->samples && !rec->opts.full_auxtrace)
1926                         scnprintf(samples, sizeof(samples),
1927                                   " (%" PRIu64 " samples)", rec->samples);
1928                 else
1929                         samples[0] = '\0';
1930
1931                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1932                         perf_data__size(data) / 1024.0 / 1024.0,
1933                         data->path, postfix, samples);
1934                 if (ratio) {
1935                         fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1936                                         rec->session->bytes_transferred / 1024.0 / 1024.0,
1937                                         ratio);
1938                 }
1939                 fprintf(stderr, " ]\n");
1940         }
1941
1942 out_delete_session:
1943 #ifdef HAVE_EVENTFD_SUPPORT
1944         if (done_fd >= 0)
1945                 close(done_fd);
1946 #endif
1947         zstd_fini(&session->zstd_data);
1948         perf_session__delete(session);
1949
1950         if (!opts->no_bpf_event)
1951                 perf_evlist__stop_sb_thread(rec->sb_evlist);
1952         return status;
1953 }
1954
1955 static void callchain_debug(struct callchain_param *callchain)
1956 {
1957         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1958
1959         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1960
1961         if (callchain->record_mode == CALLCHAIN_DWARF)
1962                 pr_debug("callchain: stack dump size %d\n",
1963                          callchain->dump_size);
1964 }
1965
1966 int record_opts__parse_callchain(struct record_opts *record,
1967                                  struct callchain_param *callchain,
1968                                  const char *arg, bool unset)
1969 {
1970         int ret;
1971         callchain->enabled = !unset;
1972
1973         /* --no-call-graph */
1974         if (unset) {
1975                 callchain->record_mode = CALLCHAIN_NONE;
1976                 pr_debug("callchain: disabled\n");
1977                 return 0;
1978         }
1979
1980         ret = parse_callchain_record_opt(arg, callchain);
1981         if (!ret) {
1982                 /* Enable data address sampling for DWARF unwind. */
1983                 if (callchain->record_mode == CALLCHAIN_DWARF)
1984                         record->sample_address = true;
1985                 callchain_debug(callchain);
1986         }
1987
1988         return ret;
1989 }
1990
1991 int record_parse_callchain_opt(const struct option *opt,
1992                                const char *arg,
1993                                int unset)
1994 {
1995         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1996 }
1997
1998 int record_callchain_opt(const struct option *opt,
1999                          const char *arg __maybe_unused,
2000                          int unset __maybe_unused)
2001 {
2002         struct callchain_param *callchain = opt->value;
2003
2004         callchain->enabled = true;
2005
2006         if (callchain->record_mode == CALLCHAIN_NONE)
2007                 callchain->record_mode = CALLCHAIN_FP;
2008
2009         callchain_debug(callchain);
2010         return 0;
2011 }
2012
2013 static int perf_record_config(const char *var, const char *value, void *cb)
2014 {
2015         struct record *rec = cb;
2016
2017         if (!strcmp(var, "record.build-id")) {
2018                 if (!strcmp(value, "cache"))
2019                         rec->no_buildid_cache = false;
2020                 else if (!strcmp(value, "no-cache"))
2021                         rec->no_buildid_cache = true;
2022                 else if (!strcmp(value, "skip"))
2023                         rec->no_buildid = true;
2024                 else
2025                         return -1;
2026                 return 0;
2027         }
2028         if (!strcmp(var, "record.call-graph")) {
2029                 var = "call-graph.record-mode";
2030                 return perf_default_config(var, value, cb);
2031         }
2032 #ifdef HAVE_AIO_SUPPORT
2033         if (!strcmp(var, "record.aio")) {
2034                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2035                 if (!rec->opts.nr_cblocks)
2036                         rec->opts.nr_cblocks = nr_cblocks_default;
2037         }
2038 #endif
2039
2040         return 0;
2041 }
2042
2043 struct clockid_map {
2044         const char *name;
2045         int clockid;
2046 };
2047
2048 #define CLOCKID_MAP(n, c)       \
2049         { .name = n, .clockid = (c), }
2050
2051 #define CLOCKID_END     { .name = NULL, }
2052
2053
2054 /*
2055  * Add the missing ones, we need to build on many distros...
2056  */
2057 #ifndef CLOCK_MONOTONIC_RAW
2058 #define CLOCK_MONOTONIC_RAW 4
2059 #endif
2060 #ifndef CLOCK_BOOTTIME
2061 #define CLOCK_BOOTTIME 7
2062 #endif
2063 #ifndef CLOCK_TAI
2064 #define CLOCK_TAI 11
2065 #endif
2066
2067 static const struct clockid_map clockids[] = {
2068         /* available for all events, NMI safe */
2069         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2070         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2071
2072         /* available for some events */
2073         CLOCKID_MAP("realtime", CLOCK_REALTIME),
2074         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2075         CLOCKID_MAP("tai", CLOCK_TAI),
2076
2077         /* available for the lazy */
2078         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2079         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2080         CLOCKID_MAP("real", CLOCK_REALTIME),
2081         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2082
2083         CLOCKID_END,
2084 };
2085
2086 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2087 {
2088         struct timespec res;
2089
2090         *res_ns = 0;
2091         if (!clock_getres(clk_id, &res))
2092                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2093         else
2094                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2095
2096         return 0;
2097 }
2098
2099 static int parse_clockid(const struct option *opt, const char *str, int unset)
2100 {
2101         struct record_opts *opts = (struct record_opts *)opt->value;
2102         const struct clockid_map *cm;
2103         const char *ostr = str;
2104
2105         if (unset) {
2106                 opts->use_clockid = 0;
2107                 return 0;
2108         }
2109
2110         /* no arg passed */
2111         if (!str)
2112                 return 0;
2113
2114         /* no setting it twice */
2115         if (opts->use_clockid)
2116                 return -1;
2117
2118         opts->use_clockid = true;
2119
2120         /* if its a number, we're done */
2121         if (sscanf(str, "%d", &opts->clockid) == 1)
2122                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2123
2124         /* allow a "CLOCK_" prefix to the name */
2125         if (!strncasecmp(str, "CLOCK_", 6))
2126                 str += 6;
2127
2128         for (cm = clockids; cm->name; cm++) {
2129                 if (!strcasecmp(str, cm->name)) {
2130                         opts->clockid = cm->clockid;
2131                         return get_clockid_res(opts->clockid,
2132                                                &opts->clockid_res_ns);
2133                 }
2134         }
2135
2136         opts->use_clockid = false;
2137         ui__warning("unknown clockid %s, check man page\n", ostr);
2138         return -1;
2139 }
2140
2141 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2142 {
2143         struct record_opts *opts = (struct record_opts *)opt->value;
2144
2145         if (unset || !str)
2146                 return 0;
2147
2148         if (!strcasecmp(str, "node"))
2149                 opts->affinity = PERF_AFFINITY_NODE;
2150         else if (!strcasecmp(str, "cpu"))
2151                 opts->affinity = PERF_AFFINITY_CPU;
2152
2153         return 0;
2154 }
2155
2156 static int parse_output_max_size(const struct option *opt,
2157                                  const char *str, int unset)
2158 {
2159         unsigned long *s = (unsigned long *)opt->value;
2160         static struct parse_tag tags_size[] = {
2161                 { .tag  = 'B', .mult = 1       },
2162                 { .tag  = 'K', .mult = 1 << 10 },
2163                 { .tag  = 'M', .mult = 1 << 20 },
2164                 { .tag  = 'G', .mult = 1 << 30 },
2165                 { .tag  = 0 },
2166         };
2167         unsigned long val;
2168
2169         if (unset) {
2170                 *s = 0;
2171                 return 0;
2172         }
2173
2174         val = parse_tag_value(str, tags_size);
2175         if (val != (unsigned long) -1) {
2176                 *s = val;
2177                 return 0;
2178         }
2179
2180         return -1;
2181 }
2182
2183 static int record__parse_mmap_pages(const struct option *opt,
2184                                     const char *str,
2185                                     int unset __maybe_unused)
2186 {
2187         struct record_opts *opts = opt->value;
2188         char *s, *p;
2189         unsigned int mmap_pages;
2190         int ret;
2191
2192         if (!str)
2193                 return -EINVAL;
2194
2195         s = strdup(str);
2196         if (!s)
2197                 return -ENOMEM;
2198
2199         p = strchr(s, ',');
2200         if (p)
2201                 *p = '\0';
2202
2203         if (*s) {
2204                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2205                 if (ret)
2206                         goto out_free;
2207                 opts->mmap_pages = mmap_pages;
2208         }
2209
2210         if (!p) {
2211                 ret = 0;
2212                 goto out_free;
2213         }
2214
2215         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2216         if (ret)
2217                 goto out_free;
2218
2219         opts->auxtrace_mmap_pages = mmap_pages;
2220
2221 out_free:
2222         free(s);
2223         return ret;
2224 }
2225
2226 static void switch_output_size_warn(struct record *rec)
2227 {
2228         u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2229         struct switch_output *s = &rec->switch_output;
2230
2231         wakeup_size /= 2;
2232
2233         if (s->size < wakeup_size) {
2234                 char buf[100];
2235
2236                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2237                 pr_warning("WARNING: switch-output data size lower than "
2238                            "wakeup kernel buffer size (%s) "
2239                            "expect bigger perf.data sizes\n", buf);
2240         }
2241 }
2242
2243 static int switch_output_setup(struct record *rec)
2244 {
2245         struct switch_output *s = &rec->switch_output;
2246         static struct parse_tag tags_size[] = {
2247                 { .tag  = 'B', .mult = 1       },
2248                 { .tag  = 'K', .mult = 1 << 10 },
2249                 { .tag  = 'M', .mult = 1 << 20 },
2250                 { .tag  = 'G', .mult = 1 << 30 },
2251                 { .tag  = 0 },
2252         };
2253         static struct parse_tag tags_time[] = {
2254                 { .tag  = 's', .mult = 1        },
2255                 { .tag  = 'm', .mult = 60       },
2256                 { .tag  = 'h', .mult = 60*60    },
2257                 { .tag  = 'd', .mult = 60*60*24 },
2258                 { .tag  = 0 },
2259         };
2260         unsigned long val;
2261
2262         /*
2263          * If we're using --switch-output-events, then we imply its 
2264          * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2265          *  thread to its parent.
2266          */
2267         if (rec->switch_output_event_set)
2268                 goto do_signal;
2269
2270         if (!s->set)
2271                 return 0;
2272
2273         if (!strcmp(s->str, "signal")) {
2274 do_signal:
2275                 s->signal = true;
2276                 pr_debug("switch-output with SIGUSR2 signal\n");
2277                 goto enabled;
2278         }
2279
2280         val = parse_tag_value(s->str, tags_size);
2281         if (val != (unsigned long) -1) {
2282                 s->size = val;
2283                 pr_debug("switch-output with %s size threshold\n", s->str);
2284                 goto enabled;
2285         }
2286
2287         val = parse_tag_value(s->str, tags_time);
2288         if (val != (unsigned long) -1) {
2289                 s->time = val;
2290                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2291                          s->str, s->time);
2292                 goto enabled;
2293         }
2294
2295         return -1;
2296
2297 enabled:
2298         rec->timestamp_filename = true;
2299         s->enabled              = true;
2300
2301         if (s->size && !rec->opts.no_buffering)
2302                 switch_output_size_warn(rec);
2303
2304         return 0;
2305 }
2306
2307 static const char * const __record_usage[] = {
2308         "perf record [<options>] [<command>]",
2309         "perf record [<options>] -- <command> [<options>]",
2310         NULL
2311 };
2312 const char * const *record_usage = __record_usage;
2313
2314 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2315                                   struct perf_sample *sample, struct machine *machine)
2316 {
2317         /*
2318          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2319          * no need to add them twice.
2320          */
2321         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2322                 return 0;
2323         return perf_event__process_mmap(tool, event, sample, machine);
2324 }
2325
2326 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2327                                    struct perf_sample *sample, struct machine *machine)
2328 {
2329         /*
2330          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2331          * no need to add them twice.
2332          */
2333         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2334                 return 0;
2335
2336         return perf_event__process_mmap2(tool, event, sample, machine);
2337 }
2338
2339 /*
2340  * XXX Ideally would be local to cmd_record() and passed to a record__new
2341  * because we need to have access to it in record__exit, that is called
2342  * after cmd_record() exits, but since record_options need to be accessible to
2343  * builtin-script, leave it here.
2344  *
2345  * At least we don't ouch it in all the other functions here directly.
2346  *
2347  * Just say no to tons of global variables, sigh.
2348  */
2349 static struct record record = {
2350         .opts = {
2351                 .sample_time         = true,
2352                 .mmap_pages          = UINT_MAX,
2353                 .user_freq           = UINT_MAX,
2354                 .user_interval       = ULLONG_MAX,
2355                 .freq                = 4000,
2356                 .target              = {
2357                         .uses_mmap   = true,
2358                         .default_per_cpu = true,
2359                 },
2360                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
2361                 .nr_threads_synthesize = 1,
2362         },
2363         .tool = {
2364                 .sample         = process_sample_event,
2365                 .fork           = perf_event__process_fork,
2366                 .exit           = perf_event__process_exit,
2367                 .comm           = perf_event__process_comm,
2368                 .namespaces     = perf_event__process_namespaces,
2369                 .mmap           = build_id__process_mmap,
2370                 .mmap2          = build_id__process_mmap2,
2371                 .ordered_events = true,
2372         },
2373 };
2374
2375 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2376         "\n\t\t\t\tDefault: fp";
2377
2378 static bool dry_run;
2379
2380 /*
2381  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2382  * with it and switch to use the library functions in perf_evlist that came
2383  * from builtin-record.c, i.e. use record_opts,
2384  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2385  * using pipes, etc.
2386  */
2387 static struct option __record_options[] = {
2388         OPT_CALLBACK('e', "event", &record.evlist, "event",
2389                      "event selector. use 'perf list' to list available events",
2390                      parse_events_option),
2391         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2392                      "event filter", parse_filter),
2393         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2394                            NULL, "don't record events from perf itself",
2395                            exclude_perf),
2396         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2397                     "record events on existing process id"),
2398         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2399                     "record events on existing thread id"),
2400         OPT_INTEGER('r', "realtime", &record.realtime_prio,
2401                     "collect data with this RT SCHED_FIFO priority"),
2402         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2403                     "collect data without buffering"),
2404         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2405                     "collect raw sample records from all opened counters"),
2406         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2407                             "system-wide collection from all CPUs"),
2408         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2409                     "list of cpus to monitor"),
2410         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2411         OPT_STRING('o', "output", &record.data.path, "file",
2412                     "output file name"),
2413         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2414                         &record.opts.no_inherit_set,
2415                         "child tasks do not inherit counters"),
2416         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2417                     "synthesize non-sample events at the end of output"),
2418         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2419         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2420         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2421                     "Fail if the specified frequency can't be used"),
2422         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2423                      "profile at this frequency",
2424                       record__parse_freq),
2425         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2426                      "number of mmap data pages and AUX area tracing mmap pages",
2427                      record__parse_mmap_pages),
2428         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2429                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2430                      record__mmap_flush_parse),
2431         OPT_BOOLEAN(0, "group", &record.opts.group,
2432                     "put the counters into a counter group"),
2433         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2434                            NULL, "enables call-graph recording" ,
2435                            &record_callchain_opt),
2436         OPT_CALLBACK(0, "call-graph", &record.opts,
2437                      "record_mode[,record_size]", record_callchain_help,
2438                      &record_parse_callchain_opt),
2439         OPT_INCR('v', "verbose", &verbose,
2440                     "be more verbose (show counter open errors, etc)"),
2441         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2442         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2443                     "per thread counts"),
2444         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2445         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2446                     "Record the sample physical addresses"),
2447         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2448         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2449                         &record.opts.sample_time_set,
2450                         "Record the sample timestamps"),
2451         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2452                         "Record the sample period"),
2453         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2454                     "don't sample"),
2455         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2456                         &record.no_buildid_cache_set,
2457                         "do not update the buildid cache"),
2458         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2459                         &record.no_buildid_set,
2460                         "do not collect buildids in perf.data"),
2461         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2462                      "monitor event in cgroup name only",
2463                      parse_cgroups),
2464         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2465                   "ms to wait before starting measurement after program start"),
2466         OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2467         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2468                    "user to profile"),
2469
2470         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2471                      "branch any", "sample any taken branches",
2472                      parse_branch_stack),
2473
2474         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2475                      "branch filter mask", "branch stack filter modes",
2476                      parse_branch_stack),
2477         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2478                     "sample by weight (on special events only)"),
2479         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2480                     "sample transaction flags (special events only)"),
2481         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2482                     "use per-thread mmaps"),
2483         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2484                     "sample selected machine registers on interrupt,"
2485                     " use '-I?' to list register names", parse_intr_regs),
2486         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2487                     "sample selected machine registers on interrupt,"
2488                     " use '--user-regs=?' to list register names", parse_user_regs),
2489         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2490                     "Record running/enabled time of read (:S) events"),
2491         OPT_CALLBACK('k', "clockid", &record.opts,
2492         "clockid", "clockid to use for events, see clock_gettime()",
2493         parse_clockid),
2494         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2495                           "opts", "AUX area tracing Snapshot Mode", ""),
2496         OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2497                           "opts", "sample AUX area", ""),
2498         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2499                         "per thread proc mmap processing timeout in ms"),
2500         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2501                     "Record namespaces events"),
2502         OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2503                     "Record cgroup events"),
2504         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2505                     "Record context switch events"),
2506         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2507                          "Configure all used events to run in kernel space.",
2508                          PARSE_OPT_EXCLUSIVE),
2509         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2510                          "Configure all used events to run in user space.",
2511                          PARSE_OPT_EXCLUSIVE),
2512         OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2513                     "collect kernel callchains"),
2514         OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2515                     "collect user callchains"),
2516         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2517                    "clang binary to use for compiling BPF scriptlets"),
2518         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2519                    "options passed to clang when compiling BPF scriptlets"),
2520         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2521                    "file", "vmlinux pathname"),
2522         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2523                     "Record build-id of all DSOs regardless of hits"),
2524         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2525                     "append timestamp to output filename"),
2526         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2527                     "Record timestamp boundary (time of first/last samples)"),
2528         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2529                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2530                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2531                           "signal"),
2532         OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2533                          "switch output event selector. use 'perf list' to list available events",
2534                          parse_events_option_new_evlist),
2535         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2536                    "Limit number of switch output generated files"),
2537         OPT_BOOLEAN(0, "dry-run", &dry_run,
2538                     "Parse options then exit"),
2539 #ifdef HAVE_AIO_SUPPORT
2540         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2541                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2542                      record__aio_parse),
2543 #endif
2544         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2545                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2546                      record__parse_affinity),
2547 #ifdef HAVE_ZSTD_SUPPORT
2548         OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2549                             "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2550                             record__parse_comp_level),
2551 #endif
2552         OPT_CALLBACK(0, "max-size", &record.output_max_size,
2553                      "size", "Limit the maximum size of the output file", parse_output_max_size),
2554         OPT_UINTEGER(0, "num-thread-synthesize",
2555                      &record.opts.nr_threads_synthesize,
2556                      "number of threads to run for event synthesis"),
2557         OPT_END()
2558 };
2559
2560 struct option *record_options = __record_options;
2561
2562 int cmd_record(int argc, const char **argv)
2563 {
2564         int err;
2565         struct record *rec = &record;
2566         char errbuf[BUFSIZ];
2567
2568         setlocale(LC_ALL, "");
2569
2570 #ifndef HAVE_LIBBPF_SUPPORT
2571 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2572         set_nobuild('\0', "clang-path", true);
2573         set_nobuild('\0', "clang-opt", true);
2574 # undef set_nobuild
2575 #endif
2576
2577 #ifndef HAVE_BPF_PROLOGUE
2578 # if !defined (HAVE_DWARF_SUPPORT)
2579 #  define REASON  "NO_DWARF=1"
2580 # elif !defined (HAVE_LIBBPF_SUPPORT)
2581 #  define REASON  "NO_LIBBPF=1"
2582 # else
2583 #  define REASON  "this architecture doesn't support BPF prologue"
2584 # endif
2585 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2586         set_nobuild('\0', "vmlinux", true);
2587 # undef set_nobuild
2588 # undef REASON
2589 #endif
2590
2591         rec->opts.affinity = PERF_AFFINITY_SYS;
2592
2593         rec->evlist = evlist__new();
2594         if (rec->evlist == NULL)
2595                 return -ENOMEM;
2596
2597         err = perf_config(perf_record_config, rec);
2598         if (err)
2599                 return err;
2600
2601         argc = parse_options(argc, argv, record_options, record_usage,
2602                             PARSE_OPT_STOP_AT_NON_OPTION);
2603         if (quiet)
2604                 perf_quiet_option();
2605
2606         /* Make system wide (-a) the default target. */
2607         if (!argc && target__none(&rec->opts.target))
2608                 rec->opts.target.system_wide = true;
2609
2610         if (nr_cgroups && !rec->opts.target.system_wide) {
2611                 usage_with_options_msg(record_usage, record_options,
2612                         "cgroup monitoring only available in system-wide mode");
2613
2614         }
2615
2616         if (rec->opts.kcore)
2617                 rec->data.is_dir = true;
2618
2619         if (rec->opts.comp_level != 0) {
2620                 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2621                 rec->no_buildid = true;
2622         }
2623
2624         if (rec->opts.record_switch_events &&
2625             !perf_can_record_switch_events()) {
2626                 ui__error("kernel does not support recording context switch events\n");
2627                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2628                 return -EINVAL;
2629         }
2630
2631         if (switch_output_setup(rec)) {
2632                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2633                 return -EINVAL;
2634         }
2635
2636         if (rec->switch_output.time) {
2637                 signal(SIGALRM, alarm_sig_handler);
2638                 alarm(rec->switch_output.time);
2639         }
2640
2641         if (rec->switch_output.num_files) {
2642                 rec->switch_output.filenames = calloc(sizeof(char *),
2643                                                       rec->switch_output.num_files);
2644                 if (!rec->switch_output.filenames)
2645                         return -EINVAL;
2646         }
2647
2648         /*
2649          * Allow aliases to facilitate the lookup of symbols for address
2650          * filters. Refer to auxtrace_parse_filters().
2651          */
2652         symbol_conf.allow_aliases = true;
2653
2654         symbol__init(NULL);
2655
2656         if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2657                 rec->affinity_mask.nbits = cpu__max_cpu();
2658                 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2659                 if (!rec->affinity_mask.bits) {
2660                         pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2661                         return -ENOMEM;
2662                 }
2663                 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2664         }
2665
2666         err = record__auxtrace_init(rec);
2667         if (err)
2668                 goto out;
2669
2670         if (dry_run)
2671                 goto out;
2672
2673         err = bpf__setup_stdout(rec->evlist);
2674         if (err) {
2675                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2676                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2677                          errbuf);
2678                 goto out;
2679         }
2680
2681         err = -ENOMEM;
2682
2683         if (rec->no_buildid_cache || rec->no_buildid) {
2684                 disable_buildid_cache();
2685         } else if (rec->switch_output.enabled) {
2686                 /*
2687                  * In 'perf record --switch-output', disable buildid
2688                  * generation by default to reduce data file switching
2689                  * overhead. Still generate buildid if they are required
2690                  * explicitly using
2691                  *
2692                  *  perf record --switch-output --no-no-buildid \
2693                  *              --no-no-buildid-cache
2694                  *
2695                  * Following code equals to:
2696                  *
2697                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2698                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2699                  *         disable_buildid_cache();
2700                  */
2701                 bool disable = true;
2702
2703                 if (rec->no_buildid_set && !rec->no_buildid)
2704                         disable = false;
2705                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2706                         disable = false;
2707                 if (disable) {
2708                         rec->no_buildid = true;
2709                         rec->no_buildid_cache = true;
2710                         disable_buildid_cache();
2711                 }
2712         }
2713
2714         if (record.opts.overwrite)
2715                 record.opts.tail_synthesize = true;
2716
2717         if (rec->evlist->core.nr_entries == 0 &&
2718             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2719                 pr_err("Not enough memory for event selector list\n");
2720                 goto out;
2721         }
2722
2723         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2724                 rec->opts.no_inherit = true;
2725
2726         err = target__validate(&rec->opts.target);
2727         if (err) {
2728                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2729                 ui__warning("%s\n", errbuf);
2730         }
2731
2732         err = target__parse_uid(&rec->opts.target);
2733         if (err) {
2734                 int saved_errno = errno;
2735
2736                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2737                 ui__error("%s", errbuf);
2738
2739                 err = -saved_errno;
2740                 goto out;
2741         }
2742
2743         /* Enable ignoring missing threads when -u/-p option is defined. */
2744         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2745
2746         err = -ENOMEM;
2747         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2748                 usage_with_options(record_usage, record_options);
2749
2750         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2751         if (err)
2752                 goto out;
2753
2754         /*
2755          * We take all buildids when the file contains
2756          * AUX area tracing data because we do not decode the
2757          * trace because it would take too long.
2758          */
2759         if (rec->opts.full_auxtrace)
2760                 rec->buildid_all = true;
2761
2762         if (record_opts__config(&rec->opts)) {
2763                 err = -EINVAL;
2764                 goto out;
2765         }
2766
2767         if (rec->opts.nr_cblocks > nr_cblocks_max)
2768                 rec->opts.nr_cblocks = nr_cblocks_max;
2769         pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2770
2771         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2772         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2773
2774         if (rec->opts.comp_level > comp_level_max)
2775                 rec->opts.comp_level = comp_level_max;
2776         pr_debug("comp level: %d\n", rec->opts.comp_level);
2777
2778         err = __cmd_record(&record, argc, argv);
2779 out:
2780         bitmap_free(rec->affinity_mask.bits);
2781         evlist__delete(rec->evlist);
2782         symbol__exit();
2783         auxtrace_record__free(rec->itr);
2784         return err;
2785 }
2786
2787 static void snapshot_sig_handler(int sig __maybe_unused)
2788 {
2789         struct record *rec = &record;
2790
2791         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2792                 trigger_hit(&auxtrace_snapshot_trigger);
2793                 auxtrace_record__snapshot_started = 1;
2794                 if (auxtrace_record__snapshot_start(record.itr))
2795                         trigger_error(&auxtrace_snapshot_trigger);
2796         }
2797
2798         if (switch_output_signal(rec))
2799                 trigger_hit(&switch_output_trigger);
2800 }
2801
2802 static void alarm_sig_handler(int sig __maybe_unused)
2803 {
2804         struct record *rec = &record;
2805
2806         if (switch_output_time(rec))
2807                 trigger_hit(&switch_output_trigger);
2808 }