Merge tag 'sound-5.18-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
52 #include "asm/bug.h"
53 #include "perf.h"
54 #include "cputopo.h"
55
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <locale.h>
59 #include <poll.h>
60 #include <pthread.h>
61 #include <unistd.h>
62 #ifndef HAVE_GETTID
63 #include <syscall.h>
64 #endif
65 #include <sched.h>
66 #include <signal.h>
67 #ifdef HAVE_EVENTFD_SUPPORT
68 #include <sys/eventfd.h>
69 #endif
70 #include <sys/mman.h>
71 #include <sys/wait.h>
72 #include <sys/types.h>
73 #include <sys/stat.h>
74 #include <fcntl.h>
75 #include <linux/err.h>
76 #include <linux/string.h>
77 #include <linux/time64.h>
78 #include <linux/zalloc.h>
79 #include <linux/bitmap.h>
80 #include <sys/time.h>
81
82 struct switch_output {
83         bool             enabled;
84         bool             signal;
85         unsigned long    size;
86         unsigned long    time;
87         const char      *str;
88         bool             set;
89         char             **filenames;
90         int              num_files;
91         int              cur_file;
92 };
93
94 struct thread_mask {
95         struct mmap_cpu_mask    maps;
96         struct mmap_cpu_mask    affinity;
97 };
98
99 struct record_thread {
100         pid_t                   tid;
101         struct thread_mask      *mask;
102         struct {
103                 int             msg[2];
104                 int             ack[2];
105         } pipes;
106         struct fdarray          pollfd;
107         int                     ctlfd_pos;
108         int                     nr_mmaps;
109         struct mmap             **maps;
110         struct mmap             **overwrite_maps;
111         struct record           *rec;
112         unsigned long long      samples;
113         unsigned long           waking;
114         u64                     bytes_written;
115         u64                     bytes_transferred;
116         u64                     bytes_compressed;
117 };
118
119 static __thread struct record_thread *thread;
120
121 enum thread_msg {
122         THREAD_MSG__UNDEFINED = 0,
123         THREAD_MSG__READY,
124         THREAD_MSG__MAX,
125 };
126
127 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
128         "UNDEFINED", "READY"
129 };
130
131 enum thread_spec {
132         THREAD_SPEC__UNDEFINED = 0,
133         THREAD_SPEC__CPU,
134         THREAD_SPEC__CORE,
135         THREAD_SPEC__PACKAGE,
136         THREAD_SPEC__NUMA,
137         THREAD_SPEC__USER,
138         THREAD_SPEC__MAX,
139 };
140
141 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
142         "undefined", "cpu", "core", "package", "numa", "user"
143 };
144
145 struct record {
146         struct perf_tool        tool;
147         struct record_opts      opts;
148         u64                     bytes_written;
149         struct perf_data        data;
150         struct auxtrace_record  *itr;
151         struct evlist   *evlist;
152         struct perf_session     *session;
153         struct evlist           *sb_evlist;
154         pthread_t               thread_id;
155         int                     realtime_prio;
156         bool                    switch_output_event_set;
157         bool                    no_buildid;
158         bool                    no_buildid_set;
159         bool                    no_buildid_cache;
160         bool                    no_buildid_cache_set;
161         bool                    buildid_all;
162         bool                    buildid_mmap;
163         bool                    timestamp_filename;
164         bool                    timestamp_boundary;
165         struct switch_output    switch_output;
166         unsigned long long      samples;
167         unsigned long           output_max_size;        /* = 0: unlimited */
168         struct perf_debuginfod  debuginfod;
169         int                     nr_threads;
170         struct thread_mask      *thread_masks;
171         struct record_thread    *thread_data;
172 };
173
174 static volatile int done;
175
176 static volatile int auxtrace_record__snapshot_started;
177 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
178 static DEFINE_TRIGGER(switch_output_trigger);
179
180 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
181         "SYS", "NODE", "CPU"
182 };
183
184 #ifndef HAVE_GETTID
185 static inline pid_t gettid(void)
186 {
187         return (pid_t)syscall(__NR_gettid);
188 }
189 #endif
190
191 static int record__threads_enabled(struct record *rec)
192 {
193         return rec->opts.threads_spec;
194 }
195
196 static bool switch_output_signal(struct record *rec)
197 {
198         return rec->switch_output.signal &&
199                trigger_is_ready(&switch_output_trigger);
200 }
201
202 static bool switch_output_size(struct record *rec)
203 {
204         return rec->switch_output.size &&
205                trigger_is_ready(&switch_output_trigger) &&
206                (rec->bytes_written >= rec->switch_output.size);
207 }
208
209 static bool switch_output_time(struct record *rec)
210 {
211         return rec->switch_output.time &&
212                trigger_is_ready(&switch_output_trigger);
213 }
214
215 static u64 record__bytes_written(struct record *rec)
216 {
217         int t;
218         u64 bytes_written = rec->bytes_written;
219         struct record_thread *thread_data = rec->thread_data;
220
221         for (t = 0; t < rec->nr_threads; t++)
222                 bytes_written += thread_data[t].bytes_written;
223
224         return bytes_written;
225 }
226
227 static bool record__output_max_size_exceeded(struct record *rec)
228 {
229         return rec->output_max_size &&
230                (record__bytes_written(rec) >= rec->output_max_size);
231 }
232
233 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
234                          void *bf, size_t size)
235 {
236         struct perf_data_file *file = &rec->session->data->file;
237
238         if (map && map->file)
239                 file = map->file;
240
241         if (perf_data_file__write(file, bf, size) < 0) {
242                 pr_err("failed to write perf data, error: %m\n");
243                 return -1;
244         }
245
246         if (map && map->file)
247                 thread->bytes_written += size;
248         else
249                 rec->bytes_written += size;
250
251         if (record__output_max_size_exceeded(rec) && !done) {
252                 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
253                                 " stopping session ]\n",
254                                 record__bytes_written(rec) >> 10);
255                 done = 1;
256         }
257
258         if (switch_output_size(rec))
259                 trigger_hit(&switch_output_trigger);
260
261         return 0;
262 }
263
264 static int record__aio_enabled(struct record *rec);
265 static int record__comp_enabled(struct record *rec);
266 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
267                             void *dst, size_t dst_size, void *src, size_t src_size);
268
269 #ifdef HAVE_AIO_SUPPORT
270 static int record__aio_write(struct aiocb *cblock, int trace_fd,
271                 void *buf, size_t size, off_t off)
272 {
273         int rc;
274
275         cblock->aio_fildes = trace_fd;
276         cblock->aio_buf    = buf;
277         cblock->aio_nbytes = size;
278         cblock->aio_offset = off;
279         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
280
281         do {
282                 rc = aio_write(cblock);
283                 if (rc == 0) {
284                         break;
285                 } else if (errno != EAGAIN) {
286                         cblock->aio_fildes = -1;
287                         pr_err("failed to queue perf data, error: %m\n");
288                         break;
289                 }
290         } while (1);
291
292         return rc;
293 }
294
295 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
296 {
297         void *rem_buf;
298         off_t rem_off;
299         size_t rem_size;
300         int rc, aio_errno;
301         ssize_t aio_ret, written;
302
303         aio_errno = aio_error(cblock);
304         if (aio_errno == EINPROGRESS)
305                 return 0;
306
307         written = aio_ret = aio_return(cblock);
308         if (aio_ret < 0) {
309                 if (aio_errno != EINTR)
310                         pr_err("failed to write perf data, error: %m\n");
311                 written = 0;
312         }
313
314         rem_size = cblock->aio_nbytes - written;
315
316         if (rem_size == 0) {
317                 cblock->aio_fildes = -1;
318                 /*
319                  * md->refcount is incremented in record__aio_pushfn() for
320                  * every aio write request started in record__aio_push() so
321                  * decrement it because the request is now complete.
322                  */
323                 perf_mmap__put(&md->core);
324                 rc = 1;
325         } else {
326                 /*
327                  * aio write request may require restart with the
328                  * reminder if the kernel didn't write whole
329                  * chunk at once.
330                  */
331                 rem_off = cblock->aio_offset + written;
332                 rem_buf = (void *)(cblock->aio_buf + written);
333                 record__aio_write(cblock, cblock->aio_fildes,
334                                 rem_buf, rem_size, rem_off);
335                 rc = 0;
336         }
337
338         return rc;
339 }
340
341 static int record__aio_sync(struct mmap *md, bool sync_all)
342 {
343         struct aiocb **aiocb = md->aio.aiocb;
344         struct aiocb *cblocks = md->aio.cblocks;
345         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
346         int i, do_suspend;
347
348         do {
349                 do_suspend = 0;
350                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
351                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
352                                 if (sync_all)
353                                         aiocb[i] = NULL;
354                                 else
355                                         return i;
356                         } else {
357                                 /*
358                                  * Started aio write is not complete yet
359                                  * so it has to be waited before the
360                                  * next allocation.
361                                  */
362                                 aiocb[i] = &cblocks[i];
363                                 do_suspend = 1;
364                         }
365                 }
366                 if (!do_suspend)
367                         return -1;
368
369                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
370                         if (!(errno == EAGAIN || errno == EINTR))
371                                 pr_err("failed to sync perf data, error: %m\n");
372                 }
373         } while (1);
374 }
375
376 struct record_aio {
377         struct record   *rec;
378         void            *data;
379         size_t          size;
380 };
381
382 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
383 {
384         struct record_aio *aio = to;
385
386         /*
387          * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
388          * to release space in the kernel buffer as fast as possible, calling
389          * perf_mmap__consume() from perf_mmap__push() function.
390          *
391          * That lets the kernel to proceed with storing more profiling data into
392          * the kernel buffer earlier than other per-cpu kernel buffers are handled.
393          *
394          * Coping can be done in two steps in case the chunk of profiling data
395          * crosses the upper bound of the kernel buffer. In this case we first move
396          * part of data from map->start till the upper bound and then the reminder
397          * from the beginning of the kernel buffer till the end of the data chunk.
398          */
399
400         if (record__comp_enabled(aio->rec)) {
401                 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
402                                      mmap__mmap_len(map) - aio->size,
403                                      buf, size);
404         } else {
405                 memcpy(aio->data + aio->size, buf, size);
406         }
407
408         if (!aio->size) {
409                 /*
410                  * Increment map->refcount to guard map->aio.data[] buffer
411                  * from premature deallocation because map object can be
412                  * released earlier than aio write request started on
413                  * map->aio.data[] buffer is complete.
414                  *
415                  * perf_mmap__put() is done at record__aio_complete()
416                  * after started aio request completion or at record__aio_push()
417                  * if the request failed to start.
418                  */
419                 perf_mmap__get(&map->core);
420         }
421
422         aio->size += size;
423
424         return size;
425 }
426
427 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
428 {
429         int ret, idx;
430         int trace_fd = rec->session->data->file.fd;
431         struct record_aio aio = { .rec = rec, .size = 0 };
432
433         /*
434          * Call record__aio_sync() to wait till map->aio.data[] buffer
435          * becomes available after previous aio write operation.
436          */
437
438         idx = record__aio_sync(map, false);
439         aio.data = map->aio.data[idx];
440         ret = perf_mmap__push(map, &aio, record__aio_pushfn);
441         if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
442                 return ret;
443
444         rec->samples++;
445         ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
446         if (!ret) {
447                 *off += aio.size;
448                 rec->bytes_written += aio.size;
449                 if (switch_output_size(rec))
450                         trigger_hit(&switch_output_trigger);
451         } else {
452                 /*
453                  * Decrement map->refcount incremented in record__aio_pushfn()
454                  * back if record__aio_write() operation failed to start, otherwise
455                  * map->refcount is decremented in record__aio_complete() after
456                  * aio write operation finishes successfully.
457                  */
458                 perf_mmap__put(&map->core);
459         }
460
461         return ret;
462 }
463
464 static off_t record__aio_get_pos(int trace_fd)
465 {
466         return lseek(trace_fd, 0, SEEK_CUR);
467 }
468
469 static void record__aio_set_pos(int trace_fd, off_t pos)
470 {
471         lseek(trace_fd, pos, SEEK_SET);
472 }
473
474 static void record__aio_mmap_read_sync(struct record *rec)
475 {
476         int i;
477         struct evlist *evlist = rec->evlist;
478         struct mmap *maps = evlist->mmap;
479
480         if (!record__aio_enabled(rec))
481                 return;
482
483         for (i = 0; i < evlist->core.nr_mmaps; i++) {
484                 struct mmap *map = &maps[i];
485
486                 if (map->core.base)
487                         record__aio_sync(map, true);
488         }
489 }
490
491 static int nr_cblocks_default = 1;
492 static int nr_cblocks_max = 4;
493
494 static int record__aio_parse(const struct option *opt,
495                              const char *str,
496                              int unset)
497 {
498         struct record_opts *opts = (struct record_opts *)opt->value;
499
500         if (unset) {
501                 opts->nr_cblocks = 0;
502         } else {
503                 if (str)
504                         opts->nr_cblocks = strtol(str, NULL, 0);
505                 if (!opts->nr_cblocks)
506                         opts->nr_cblocks = nr_cblocks_default;
507         }
508
509         return 0;
510 }
511 #else /* HAVE_AIO_SUPPORT */
512 static int nr_cblocks_max = 0;
513
514 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
515                             off_t *off __maybe_unused)
516 {
517         return -1;
518 }
519
520 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
521 {
522         return -1;
523 }
524
525 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
526 {
527 }
528
529 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
530 {
531 }
532 #endif
533
534 static int record__aio_enabled(struct record *rec)
535 {
536         return rec->opts.nr_cblocks > 0;
537 }
538
539 #define MMAP_FLUSH_DEFAULT 1
540 static int record__mmap_flush_parse(const struct option *opt,
541                                     const char *str,
542                                     int unset)
543 {
544         int flush_max;
545         struct record_opts *opts = (struct record_opts *)opt->value;
546         static struct parse_tag tags[] = {
547                         { .tag  = 'B', .mult = 1       },
548                         { .tag  = 'K', .mult = 1 << 10 },
549                         { .tag  = 'M', .mult = 1 << 20 },
550                         { .tag  = 'G', .mult = 1 << 30 },
551                         { .tag  = 0 },
552         };
553
554         if (unset)
555                 return 0;
556
557         if (str) {
558                 opts->mmap_flush = parse_tag_value(str, tags);
559                 if (opts->mmap_flush == (int)-1)
560                         opts->mmap_flush = strtol(str, NULL, 0);
561         }
562
563         if (!opts->mmap_flush)
564                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
565
566         flush_max = evlist__mmap_size(opts->mmap_pages);
567         flush_max /= 4;
568         if (opts->mmap_flush > flush_max)
569                 opts->mmap_flush = flush_max;
570
571         return 0;
572 }
573
574 #ifdef HAVE_ZSTD_SUPPORT
575 static unsigned int comp_level_default = 1;
576
577 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
578 {
579         struct record_opts *opts = opt->value;
580
581         if (unset) {
582                 opts->comp_level = 0;
583         } else {
584                 if (str)
585                         opts->comp_level = strtol(str, NULL, 0);
586                 if (!opts->comp_level)
587                         opts->comp_level = comp_level_default;
588         }
589
590         return 0;
591 }
592 #endif
593 static unsigned int comp_level_max = 22;
594
595 static int record__comp_enabled(struct record *rec)
596 {
597         return rec->opts.comp_level > 0;
598 }
599
600 static int process_synthesized_event(struct perf_tool *tool,
601                                      union perf_event *event,
602                                      struct perf_sample *sample __maybe_unused,
603                                      struct machine *machine __maybe_unused)
604 {
605         struct record *rec = container_of(tool, struct record, tool);
606         return record__write(rec, NULL, event, event->header.size);
607 }
608
609 static int process_locked_synthesized_event(struct perf_tool *tool,
610                                      union perf_event *event,
611                                      struct perf_sample *sample __maybe_unused,
612                                      struct machine *machine __maybe_unused)
613 {
614         static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
615         int ret;
616
617         pthread_mutex_lock(&synth_lock);
618         ret = process_synthesized_event(tool, event, sample, machine);
619         pthread_mutex_unlock(&synth_lock);
620         return ret;
621 }
622
623 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
624 {
625         struct record *rec = to;
626
627         if (record__comp_enabled(rec)) {
628                 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
629                 bf   = map->data;
630         }
631
632         thread->samples++;
633         return record__write(rec, map, bf, size);
634 }
635
636 static volatile int signr = -1;
637 static volatile int child_finished;
638 #ifdef HAVE_EVENTFD_SUPPORT
639 static int done_fd = -1;
640 #endif
641
642 static void sig_handler(int sig)
643 {
644         if (sig == SIGCHLD)
645                 child_finished = 1;
646         else
647                 signr = sig;
648
649         done = 1;
650 #ifdef HAVE_EVENTFD_SUPPORT
651 {
652         u64 tmp = 1;
653         /*
654          * It is possible for this signal handler to run after done is checked
655          * in the main loop, but before the perf counter fds are polled. If this
656          * happens, the poll() will continue to wait even though done is set,
657          * and will only break out if either another signal is received, or the
658          * counters are ready for read. To ensure the poll() doesn't sleep when
659          * done is set, use an eventfd (done_fd) to wake up the poll().
660          */
661         if (write(done_fd, &tmp, sizeof(tmp)) < 0)
662                 pr_err("failed to signal wakeup fd, error: %m\n");
663 }
664 #endif // HAVE_EVENTFD_SUPPORT
665 }
666
667 static void sigsegv_handler(int sig)
668 {
669         perf_hooks__recover();
670         sighandler_dump_stack(sig);
671 }
672
673 static void record__sig_exit(void)
674 {
675         if (signr == -1)
676                 return;
677
678         signal(signr, SIG_DFL);
679         raise(signr);
680 }
681
682 #ifdef HAVE_AUXTRACE_SUPPORT
683
684 static int record__process_auxtrace(struct perf_tool *tool,
685                                     struct mmap *map,
686                                     union perf_event *event, void *data1,
687                                     size_t len1, void *data2, size_t len2)
688 {
689         struct record *rec = container_of(tool, struct record, tool);
690         struct perf_data *data = &rec->data;
691         size_t padding;
692         u8 pad[8] = {0};
693
694         if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
695                 off_t file_offset;
696                 int fd = perf_data__fd(data);
697                 int err;
698
699                 file_offset = lseek(fd, 0, SEEK_CUR);
700                 if (file_offset == -1)
701                         return -1;
702                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
703                                                      event, file_offset);
704                 if (err)
705                         return err;
706         }
707
708         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
709         padding = (len1 + len2) & 7;
710         if (padding)
711                 padding = 8 - padding;
712
713         record__write(rec, map, event, event->header.size);
714         record__write(rec, map, data1, len1);
715         if (len2)
716                 record__write(rec, map, data2, len2);
717         record__write(rec, map, &pad, padding);
718
719         return 0;
720 }
721
722 static int record__auxtrace_mmap_read(struct record *rec,
723                                       struct mmap *map)
724 {
725         int ret;
726
727         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
728                                   record__process_auxtrace);
729         if (ret < 0)
730                 return ret;
731
732         if (ret)
733                 rec->samples++;
734
735         return 0;
736 }
737
738 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
739                                                struct mmap *map)
740 {
741         int ret;
742
743         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
744                                            record__process_auxtrace,
745                                            rec->opts.auxtrace_snapshot_size);
746         if (ret < 0)
747                 return ret;
748
749         if (ret)
750                 rec->samples++;
751
752         return 0;
753 }
754
755 static int record__auxtrace_read_snapshot_all(struct record *rec)
756 {
757         int i;
758         int rc = 0;
759
760         for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
761                 struct mmap *map = &rec->evlist->mmap[i];
762
763                 if (!map->auxtrace_mmap.base)
764                         continue;
765
766                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
767                         rc = -1;
768                         goto out;
769                 }
770         }
771 out:
772         return rc;
773 }
774
775 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
776 {
777         pr_debug("Recording AUX area tracing snapshot\n");
778         if (record__auxtrace_read_snapshot_all(rec) < 0) {
779                 trigger_error(&auxtrace_snapshot_trigger);
780         } else {
781                 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
782                         trigger_error(&auxtrace_snapshot_trigger);
783                 else
784                         trigger_ready(&auxtrace_snapshot_trigger);
785         }
786 }
787
788 static int record__auxtrace_snapshot_exit(struct record *rec)
789 {
790         if (trigger_is_error(&auxtrace_snapshot_trigger))
791                 return 0;
792
793         if (!auxtrace_record__snapshot_started &&
794             auxtrace_record__snapshot_start(rec->itr))
795                 return -1;
796
797         record__read_auxtrace_snapshot(rec, true);
798         if (trigger_is_error(&auxtrace_snapshot_trigger))
799                 return -1;
800
801         return 0;
802 }
803
804 static int record__auxtrace_init(struct record *rec)
805 {
806         int err;
807
808         if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
809             && record__threads_enabled(rec)) {
810                 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
811                 return -EINVAL;
812         }
813
814         if (!rec->itr) {
815                 rec->itr = auxtrace_record__init(rec->evlist, &err);
816                 if (err)
817                         return err;
818         }
819
820         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
821                                               rec->opts.auxtrace_snapshot_opts);
822         if (err)
823                 return err;
824
825         err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
826                                             rec->opts.auxtrace_sample_opts);
827         if (err)
828                 return err;
829
830         auxtrace_regroup_aux_output(rec->evlist);
831
832         return auxtrace_parse_filters(rec->evlist);
833 }
834
835 #else
836
837 static inline
838 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
839                                struct mmap *map __maybe_unused)
840 {
841         return 0;
842 }
843
844 static inline
845 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
846                                     bool on_exit __maybe_unused)
847 {
848 }
849
850 static inline
851 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
852 {
853         return 0;
854 }
855
856 static inline
857 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
858 {
859         return 0;
860 }
861
862 static int record__auxtrace_init(struct record *rec __maybe_unused)
863 {
864         return 0;
865 }
866
867 #endif
868
869 static int record__config_text_poke(struct evlist *evlist)
870 {
871         struct evsel *evsel;
872         int err;
873
874         /* Nothing to do if text poke is already configured */
875         evlist__for_each_entry(evlist, evsel) {
876                 if (evsel->core.attr.text_poke)
877                         return 0;
878         }
879
880         err = parse_events(evlist, "dummy:u", NULL);
881         if (err)
882                 return err;
883
884         evsel = evlist__last(evlist);
885
886         evsel->core.attr.freq = 0;
887         evsel->core.attr.sample_period = 1;
888         evsel->core.attr.text_poke = 1;
889         evsel->core.attr.ksymbol = 1;
890
891         evsel->core.system_wide = true;
892         evsel->no_aux_samples = true;
893         evsel->immediate = true;
894
895         /* Text poke must be collected on all CPUs */
896         perf_cpu_map__put(evsel->core.own_cpus);
897         evsel->core.own_cpus = perf_cpu_map__new(NULL);
898         perf_cpu_map__put(evsel->core.cpus);
899         evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
900
901         evsel__set_sample_bit(evsel, TIME);
902
903         return 0;
904 }
905
906 static bool record__kcore_readable(struct machine *machine)
907 {
908         char kcore[PATH_MAX];
909         int fd;
910
911         scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
912
913         fd = open(kcore, O_RDONLY);
914         if (fd < 0)
915                 return false;
916
917         close(fd);
918
919         return true;
920 }
921
922 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
923 {
924         char from_dir[PATH_MAX];
925         char kcore_dir[PATH_MAX];
926         int ret;
927
928         snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
929
930         ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
931         if (ret)
932                 return ret;
933
934         return kcore_copy(from_dir, kcore_dir);
935 }
936
937 static void record__thread_data_init_pipes(struct record_thread *thread_data)
938 {
939         thread_data->pipes.msg[0] = -1;
940         thread_data->pipes.msg[1] = -1;
941         thread_data->pipes.ack[0] = -1;
942         thread_data->pipes.ack[1] = -1;
943 }
944
945 static int record__thread_data_open_pipes(struct record_thread *thread_data)
946 {
947         if (pipe(thread_data->pipes.msg))
948                 return -EINVAL;
949
950         if (pipe(thread_data->pipes.ack)) {
951                 close(thread_data->pipes.msg[0]);
952                 thread_data->pipes.msg[0] = -1;
953                 close(thread_data->pipes.msg[1]);
954                 thread_data->pipes.msg[1] = -1;
955                 return -EINVAL;
956         }
957
958         pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
959                  thread_data->pipes.msg[0], thread_data->pipes.msg[1],
960                  thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
961
962         return 0;
963 }
964
965 static void record__thread_data_close_pipes(struct record_thread *thread_data)
966 {
967         if (thread_data->pipes.msg[0] != -1) {
968                 close(thread_data->pipes.msg[0]);
969                 thread_data->pipes.msg[0] = -1;
970         }
971         if (thread_data->pipes.msg[1] != -1) {
972                 close(thread_data->pipes.msg[1]);
973                 thread_data->pipes.msg[1] = -1;
974         }
975         if (thread_data->pipes.ack[0] != -1) {
976                 close(thread_data->pipes.ack[0]);
977                 thread_data->pipes.ack[0] = -1;
978         }
979         if (thread_data->pipes.ack[1] != -1) {
980                 close(thread_data->pipes.ack[1]);
981                 thread_data->pipes.ack[1] = -1;
982         }
983 }
984
985 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
986 {
987         int m, tm, nr_mmaps = evlist->core.nr_mmaps;
988         struct mmap *mmap = evlist->mmap;
989         struct mmap *overwrite_mmap = evlist->overwrite_mmap;
990         struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
991
992         thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
993                                               thread_data->mask->maps.nbits);
994         if (mmap) {
995                 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
996                 if (!thread_data->maps)
997                         return -ENOMEM;
998         }
999         if (overwrite_mmap) {
1000                 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1001                 if (!thread_data->overwrite_maps) {
1002                         zfree(&thread_data->maps);
1003                         return -ENOMEM;
1004                 }
1005         }
1006         pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1007                  thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1008
1009         for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1010                 if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
1011                         if (thread_data->maps) {
1012                                 thread_data->maps[tm] = &mmap[m];
1013                                 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1014                                           thread_data, cpus->map[m].cpu, tm, m);
1015                         }
1016                         if (thread_data->overwrite_maps) {
1017                                 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1018                                 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1019                                           thread_data, cpus->map[m].cpu, tm, m);
1020                         }
1021                         tm++;
1022                 }
1023         }
1024
1025         return 0;
1026 }
1027
1028 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1029 {
1030         int f, tm, pos;
1031         struct mmap *map, *overwrite_map;
1032
1033         fdarray__init(&thread_data->pollfd, 64);
1034
1035         for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1036                 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1037                 overwrite_map = thread_data->overwrite_maps ?
1038                                 thread_data->overwrite_maps[tm] : NULL;
1039
1040                 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1041                         void *ptr = evlist->core.pollfd.priv[f].ptr;
1042
1043                         if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1044                                 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1045                                                               &evlist->core.pollfd);
1046                                 if (pos < 0)
1047                                         return pos;
1048                                 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1049                                          thread_data, pos, evlist->core.pollfd.entries[f].fd);
1050                         }
1051                 }
1052         }
1053
1054         return 0;
1055 }
1056
1057 static void record__free_thread_data(struct record *rec)
1058 {
1059         int t;
1060         struct record_thread *thread_data = rec->thread_data;
1061
1062         if (thread_data == NULL)
1063                 return;
1064
1065         for (t = 0; t < rec->nr_threads; t++) {
1066                 record__thread_data_close_pipes(&thread_data[t]);
1067                 zfree(&thread_data[t].maps);
1068                 zfree(&thread_data[t].overwrite_maps);
1069                 fdarray__exit(&thread_data[t].pollfd);
1070         }
1071
1072         zfree(&rec->thread_data);
1073 }
1074
1075 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1076 {
1077         int t, ret;
1078         struct record_thread *thread_data;
1079
1080         rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1081         if (!rec->thread_data) {
1082                 pr_err("Failed to allocate thread data\n");
1083                 return -ENOMEM;
1084         }
1085         thread_data = rec->thread_data;
1086
1087         for (t = 0; t < rec->nr_threads; t++)
1088                 record__thread_data_init_pipes(&thread_data[t]);
1089
1090         for (t = 0; t < rec->nr_threads; t++) {
1091                 thread_data[t].rec = rec;
1092                 thread_data[t].mask = &rec->thread_masks[t];
1093                 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1094                 if (ret) {
1095                         pr_err("Failed to initialize thread[%d] maps\n", t);
1096                         goto out_free;
1097                 }
1098                 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1099                 if (ret) {
1100                         pr_err("Failed to initialize thread[%d] pollfd\n", t);
1101                         goto out_free;
1102                 }
1103                 if (t) {
1104                         thread_data[t].tid = -1;
1105                         ret = record__thread_data_open_pipes(&thread_data[t]);
1106                         if (ret) {
1107                                 pr_err("Failed to open thread[%d] communication pipes\n", t);
1108                                 goto out_free;
1109                         }
1110                         ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1111                                            POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1112                         if (ret < 0) {
1113                                 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1114                                 goto out_free;
1115                         }
1116                         thread_data[t].ctlfd_pos = ret;
1117                         pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1118                                  thread_data, thread_data[t].ctlfd_pos,
1119                                  thread_data[t].pipes.msg[0]);
1120                 } else {
1121                         thread_data[t].tid = gettid();
1122                         if (evlist->ctl_fd.pos == -1)
1123                                 continue;
1124                         ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1125                                                       &evlist->core.pollfd);
1126                         if (ret < 0) {
1127                                 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1128                                 goto out_free;
1129                         }
1130                         thread_data[t].ctlfd_pos = ret;
1131                         pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1132                                  thread_data, thread_data[t].ctlfd_pos,
1133                                  evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1134                 }
1135         }
1136
1137         return 0;
1138
1139 out_free:
1140         record__free_thread_data(rec);
1141
1142         return ret;
1143 }
1144
1145 static int record__mmap_evlist(struct record *rec,
1146                                struct evlist *evlist)
1147 {
1148         int i, ret;
1149         struct record_opts *opts = &rec->opts;
1150         bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1151                                   opts->auxtrace_sample_mode;
1152         char msg[512];
1153
1154         if (opts->affinity != PERF_AFFINITY_SYS)
1155                 cpu__setup_cpunode_map();
1156
1157         if (evlist__mmap_ex(evlist, opts->mmap_pages,
1158                                  opts->auxtrace_mmap_pages,
1159                                  auxtrace_overwrite,
1160                                  opts->nr_cblocks, opts->affinity,
1161                                  opts->mmap_flush, opts->comp_level) < 0) {
1162                 if (errno == EPERM) {
1163                         pr_err("Permission error mapping pages.\n"
1164                                "Consider increasing "
1165                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
1166                                "or try again with a smaller value of -m/--mmap_pages.\n"
1167                                "(current value: %u,%u)\n",
1168                                opts->mmap_pages, opts->auxtrace_mmap_pages);
1169                         return -errno;
1170                 } else {
1171                         pr_err("failed to mmap with %d (%s)\n", errno,
1172                                 str_error_r(errno, msg, sizeof(msg)));
1173                         if (errno)
1174                                 return -errno;
1175                         else
1176                                 return -EINVAL;
1177                 }
1178         }
1179
1180         if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1181                 return -1;
1182
1183         ret = record__alloc_thread_data(rec, evlist);
1184         if (ret)
1185                 return ret;
1186
1187         if (record__threads_enabled(rec)) {
1188                 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1189                 if (ret) {
1190                         pr_err("Failed to create data directory: %s\n", strerror(-ret));
1191                         return ret;
1192                 }
1193                 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1194                         if (evlist->mmap)
1195                                 evlist->mmap[i].file = &rec->data.dir.files[i];
1196                         if (evlist->overwrite_mmap)
1197                                 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1198                 }
1199         }
1200
1201         return 0;
1202 }
1203
1204 static int record__mmap(struct record *rec)
1205 {
1206         return record__mmap_evlist(rec, rec->evlist);
1207 }
1208
1209 static int record__open(struct record *rec)
1210 {
1211         char msg[BUFSIZ];
1212         struct evsel *pos;
1213         struct evlist *evlist = rec->evlist;
1214         struct perf_session *session = rec->session;
1215         struct record_opts *opts = &rec->opts;
1216         int rc = 0;
1217
1218         /*
1219          * For initial_delay, system wide or a hybrid system, we need to add a
1220          * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1221          * of waiting or event synthesis.
1222          */
1223         if (opts->initial_delay || target__has_cpu(&opts->target) ||
1224             perf_pmu__has_hybrid()) {
1225                 pos = evlist__get_tracking_event(evlist);
1226                 if (!evsel__is_dummy_event(pos)) {
1227                         /* Set up dummy event. */
1228                         if (evlist__add_dummy(evlist))
1229                                 return -ENOMEM;
1230                         pos = evlist__last(evlist);
1231                         evlist__set_tracking_event(evlist, pos);
1232                 }
1233
1234                 /*
1235                  * Enable the dummy event when the process is forked for
1236                  * initial_delay, immediately for system wide.
1237                  */
1238                 if (opts->initial_delay && !pos->immediate &&
1239                     !target__has_cpu(&opts->target))
1240                         pos->core.attr.enable_on_exec = 1;
1241                 else
1242                         pos->immediate = 1;
1243         }
1244
1245         evlist__config(evlist, opts, &callchain_param);
1246
1247         evlist__for_each_entry(evlist, pos) {
1248 try_again:
1249                 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1250                         if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1251                                 if (verbose > 0)
1252                                         ui__warning("%s\n", msg);
1253                                 goto try_again;
1254                         }
1255                         if ((errno == EINVAL || errno == EBADF) &&
1256                             pos->core.leader != &pos->core &&
1257                             pos->weak_group) {
1258                                 pos = evlist__reset_weak_group(evlist, pos, true);
1259                                 goto try_again;
1260                         }
1261                         rc = -errno;
1262                         evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1263                         ui__error("%s\n", msg);
1264                         goto out;
1265                 }
1266
1267                 pos->supported = true;
1268         }
1269
1270         if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1271                 pr_warning(
1272 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1273 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1274 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1275 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1276 "Samples in kernel modules won't be resolved at all.\n\n"
1277 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1278 "even with a suitable vmlinux or kallsyms file.\n\n");
1279         }
1280
1281         if (evlist__apply_filters(evlist, &pos)) {
1282                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1283                         pos->filter, evsel__name(pos), errno,
1284                         str_error_r(errno, msg, sizeof(msg)));
1285                 rc = -1;
1286                 goto out;
1287         }
1288
1289         rc = record__mmap(rec);
1290         if (rc)
1291                 goto out;
1292
1293         session->evlist = evlist;
1294         perf_session__set_id_hdr_size(session);
1295 out:
1296         return rc;
1297 }
1298
1299 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1300 {
1301         if (rec->evlist->first_sample_time == 0)
1302                 rec->evlist->first_sample_time = sample_time;
1303
1304         if (sample_time)
1305                 rec->evlist->last_sample_time = sample_time;
1306 }
1307
1308 static int process_sample_event(struct perf_tool *tool,
1309                                 union perf_event *event,
1310                                 struct perf_sample *sample,
1311                                 struct evsel *evsel,
1312                                 struct machine *machine)
1313 {
1314         struct record *rec = container_of(tool, struct record, tool);
1315
1316         set_timestamp_boundary(rec, sample->time);
1317
1318         if (rec->buildid_all)
1319                 return 0;
1320
1321         rec->samples++;
1322         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1323 }
1324
1325 static int process_buildids(struct record *rec)
1326 {
1327         struct perf_session *session = rec->session;
1328
1329         if (perf_data__size(&rec->data) == 0)
1330                 return 0;
1331
1332         /*
1333          * During this process, it'll load kernel map and replace the
1334          * dso->long_name to a real pathname it found.  In this case
1335          * we prefer the vmlinux path like
1336          *   /lib/modules/3.16.4/build/vmlinux
1337          *
1338          * rather than build-id path (in debug directory).
1339          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1340          */
1341         symbol_conf.ignore_vmlinux_buildid = true;
1342
1343         /*
1344          * If --buildid-all is given, it marks all DSO regardless of hits,
1345          * so no need to process samples. But if timestamp_boundary is enabled,
1346          * it still needs to walk on all samples to get the timestamps of
1347          * first/last samples.
1348          */
1349         if (rec->buildid_all && !rec->timestamp_boundary)
1350                 rec->tool.sample = NULL;
1351
1352         return perf_session__process_events(session);
1353 }
1354
1355 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1356 {
1357         int err;
1358         struct perf_tool *tool = data;
1359         /*
1360          *As for guest kernel when processing subcommand record&report,
1361          *we arrange module mmap prior to guest kernel mmap and trigger
1362          *a preload dso because default guest module symbols are loaded
1363          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1364          *method is used to avoid symbol missing when the first addr is
1365          *in module instead of in guest kernel.
1366          */
1367         err = perf_event__synthesize_modules(tool, process_synthesized_event,
1368                                              machine);
1369         if (err < 0)
1370                 pr_err("Couldn't record guest kernel [%d]'s reference"
1371                        " relocation symbol.\n", machine->pid);
1372
1373         /*
1374          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1375          * have no _text sometimes.
1376          */
1377         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1378                                                  machine);
1379         if (err < 0)
1380                 pr_err("Couldn't record guest kernel [%d]'s reference"
1381                        " relocation symbol.\n", machine->pid);
1382 }
1383
1384 static struct perf_event_header finished_round_event = {
1385         .size = sizeof(struct perf_event_header),
1386         .type = PERF_RECORD_FINISHED_ROUND,
1387 };
1388
1389 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1390 {
1391         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1392             !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1393                           thread->mask->affinity.nbits)) {
1394                 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1395                 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1396                           map->affinity_mask.bits, thread->mask->affinity.nbits);
1397                 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1398                                         (cpu_set_t *)thread->mask->affinity.bits);
1399                 if (verbose == 2) {
1400                         pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1401                         mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1402                 }
1403         }
1404 }
1405
1406 static size_t process_comp_header(void *record, size_t increment)
1407 {
1408         struct perf_record_compressed *event = record;
1409         size_t size = sizeof(*event);
1410
1411         if (increment) {
1412                 event->header.size += increment;
1413                 return increment;
1414         }
1415
1416         event->header.type = PERF_RECORD_COMPRESSED;
1417         event->header.size = size;
1418
1419         return size;
1420 }
1421
1422 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1423                             void *dst, size_t dst_size, void *src, size_t src_size)
1424 {
1425         size_t compressed;
1426         size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1427         struct zstd_data *zstd_data = &session->zstd_data;
1428
1429         if (map && map->file)
1430                 zstd_data = &map->zstd_data;
1431
1432         compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1433                                                      max_record_size, process_comp_header);
1434
1435         if (map && map->file) {
1436                 thread->bytes_transferred += src_size;
1437                 thread->bytes_compressed  += compressed;
1438         } else {
1439                 session->bytes_transferred += src_size;
1440                 session->bytes_compressed  += compressed;
1441         }
1442
1443         return compressed;
1444 }
1445
1446 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1447                                     bool overwrite, bool synch)
1448 {
1449         u64 bytes_written = rec->bytes_written;
1450         int i;
1451         int rc = 0;
1452         int nr_mmaps;
1453         struct mmap **maps;
1454         int trace_fd = rec->data.file.fd;
1455         off_t off = 0;
1456
1457         if (!evlist)
1458                 return 0;
1459
1460         nr_mmaps = thread->nr_mmaps;
1461         maps = overwrite ? thread->overwrite_maps : thread->maps;
1462
1463         if (!maps)
1464                 return 0;
1465
1466         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1467                 return 0;
1468
1469         if (record__aio_enabled(rec))
1470                 off = record__aio_get_pos(trace_fd);
1471
1472         for (i = 0; i < nr_mmaps; i++) {
1473                 u64 flush = 0;
1474                 struct mmap *map = maps[i];
1475
1476                 if (map->core.base) {
1477                         record__adjust_affinity(rec, map);
1478                         if (synch) {
1479                                 flush = map->core.flush;
1480                                 map->core.flush = 1;
1481                         }
1482                         if (!record__aio_enabled(rec)) {
1483                                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1484                                         if (synch)
1485                                                 map->core.flush = flush;
1486                                         rc = -1;
1487                                         goto out;
1488                                 }
1489                         } else {
1490                                 if (record__aio_push(rec, map, &off) < 0) {
1491                                         record__aio_set_pos(trace_fd, off);
1492                                         if (synch)
1493                                                 map->core.flush = flush;
1494                                         rc = -1;
1495                                         goto out;
1496                                 }
1497                         }
1498                         if (synch)
1499                                 map->core.flush = flush;
1500                 }
1501
1502                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1503                     !rec->opts.auxtrace_sample_mode &&
1504                     record__auxtrace_mmap_read(rec, map) != 0) {
1505                         rc = -1;
1506                         goto out;
1507                 }
1508         }
1509
1510         if (record__aio_enabled(rec))
1511                 record__aio_set_pos(trace_fd, off);
1512
1513         /*
1514          * Mark the round finished in case we wrote
1515          * at least one event.
1516          *
1517          * No need for round events in directory mode,
1518          * because per-cpu maps and files have data
1519          * sorted by kernel.
1520          */
1521         if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1522                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1523
1524         if (overwrite)
1525                 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1526 out:
1527         return rc;
1528 }
1529
1530 static int record__mmap_read_all(struct record *rec, bool synch)
1531 {
1532         int err;
1533
1534         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1535         if (err)
1536                 return err;
1537
1538         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1539 }
1540
1541 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1542                                            void *arg __maybe_unused)
1543 {
1544         struct perf_mmap *map = fda->priv[fd].ptr;
1545
1546         if (map)
1547                 perf_mmap__put(map);
1548 }
1549
1550 static void *record__thread(void *arg)
1551 {
1552         enum thread_msg msg = THREAD_MSG__READY;
1553         bool terminate = false;
1554         struct fdarray *pollfd;
1555         int err, ctlfd_pos;
1556
1557         thread = arg;
1558         thread->tid = gettid();
1559
1560         err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1561         if (err == -1)
1562                 pr_warning("threads[%d]: failed to notify on start: %s\n",
1563                            thread->tid, strerror(errno));
1564
1565         pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1566
1567         pollfd = &thread->pollfd;
1568         ctlfd_pos = thread->ctlfd_pos;
1569
1570         for (;;) {
1571                 unsigned long long hits = thread->samples;
1572
1573                 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1574                         break;
1575
1576                 if (hits == thread->samples) {
1577
1578                         err = fdarray__poll(pollfd, -1);
1579                         /*
1580                          * Propagate error, only if there's any. Ignore positive
1581                          * number of returned events and interrupt error.
1582                          */
1583                         if (err > 0 || (err < 0 && errno == EINTR))
1584                                 err = 0;
1585                         thread->waking++;
1586
1587                         if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1588                                             record__thread_munmap_filtered, NULL) == 0)
1589                                 break;
1590                 }
1591
1592                 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1593                         terminate = true;
1594                         close(thread->pipes.msg[0]);
1595                         thread->pipes.msg[0] = -1;
1596                         pollfd->entries[ctlfd_pos].fd = -1;
1597                         pollfd->entries[ctlfd_pos].events = 0;
1598                 }
1599
1600                 pollfd->entries[ctlfd_pos].revents = 0;
1601         }
1602         record__mmap_read_all(thread->rec, true);
1603
1604         err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1605         if (err == -1)
1606                 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1607                            thread->tid, strerror(errno));
1608
1609         return NULL;
1610 }
1611
1612 static void record__init_features(struct record *rec)
1613 {
1614         struct perf_session *session = rec->session;
1615         int feat;
1616
1617         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1618                 perf_header__set_feat(&session->header, feat);
1619
1620         if (rec->no_buildid)
1621                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1622
1623         if (!have_tracepoints(&rec->evlist->core.entries))
1624                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1625
1626         if (!rec->opts.branch_stack)
1627                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1628
1629         if (!rec->opts.full_auxtrace)
1630                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1631
1632         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1633                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1634
1635         if (!rec->opts.use_clockid)
1636                 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1637
1638         if (!record__threads_enabled(rec))
1639                 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1640
1641         if (!record__comp_enabled(rec))
1642                 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1643
1644         perf_header__clear_feat(&session->header, HEADER_STAT);
1645 }
1646
1647 static void
1648 record__finish_output(struct record *rec)
1649 {
1650         int i;
1651         struct perf_data *data = &rec->data;
1652         int fd = perf_data__fd(data);
1653
1654         if (data->is_pipe)
1655                 return;
1656
1657         rec->session->header.data_size += rec->bytes_written;
1658         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1659         if (record__threads_enabled(rec)) {
1660                 for (i = 0; i < data->dir.nr; i++)
1661                         data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1662         }
1663
1664         if (!rec->no_buildid) {
1665                 process_buildids(rec);
1666
1667                 if (rec->buildid_all)
1668                         dsos__hit_all(rec->session);
1669         }
1670         perf_session__write_header(rec->session, rec->evlist, fd, true);
1671
1672         return;
1673 }
1674
1675 static int record__synthesize_workload(struct record *rec, bool tail)
1676 {
1677         int err;
1678         struct perf_thread_map *thread_map;
1679         bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1680
1681         if (rec->opts.tail_synthesize != tail)
1682                 return 0;
1683
1684         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1685         if (thread_map == NULL)
1686                 return -1;
1687
1688         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1689                                                  process_synthesized_event,
1690                                                  &rec->session->machines.host,
1691                                                  needs_mmap,
1692                                                  rec->opts.sample_address);
1693         perf_thread_map__put(thread_map);
1694         return err;
1695 }
1696
1697 static int record__synthesize(struct record *rec, bool tail);
1698
1699 static int
1700 record__switch_output(struct record *rec, bool at_exit)
1701 {
1702         struct perf_data *data = &rec->data;
1703         int fd, err;
1704         char *new_filename;
1705
1706         /* Same Size:      "2015122520103046"*/
1707         char timestamp[] = "InvalidTimestamp";
1708
1709         record__aio_mmap_read_sync(rec);
1710
1711         record__synthesize(rec, true);
1712         if (target__none(&rec->opts.target))
1713                 record__synthesize_workload(rec, true);
1714
1715         rec->samples = 0;
1716         record__finish_output(rec);
1717         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1718         if (err) {
1719                 pr_err("Failed to get current timestamp\n");
1720                 return -EINVAL;
1721         }
1722
1723         fd = perf_data__switch(data, timestamp,
1724                                     rec->session->header.data_offset,
1725                                     at_exit, &new_filename);
1726         if (fd >= 0 && !at_exit) {
1727                 rec->bytes_written = 0;
1728                 rec->session->header.data_size = 0;
1729         }
1730
1731         if (!quiet)
1732                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1733                         data->path, timestamp);
1734
1735         if (rec->switch_output.num_files) {
1736                 int n = rec->switch_output.cur_file + 1;
1737
1738                 if (n >= rec->switch_output.num_files)
1739                         n = 0;
1740                 rec->switch_output.cur_file = n;
1741                 if (rec->switch_output.filenames[n]) {
1742                         remove(rec->switch_output.filenames[n]);
1743                         zfree(&rec->switch_output.filenames[n]);
1744                 }
1745                 rec->switch_output.filenames[n] = new_filename;
1746         } else {
1747                 free(new_filename);
1748         }
1749
1750         /* Output tracking events */
1751         if (!at_exit) {
1752                 record__synthesize(rec, false);
1753
1754                 /*
1755                  * In 'perf record --switch-output' without -a,
1756                  * record__synthesize() in record__switch_output() won't
1757                  * generate tracking events because there's no thread_map
1758                  * in evlist. Which causes newly created perf.data doesn't
1759                  * contain map and comm information.
1760                  * Create a fake thread_map and directly call
1761                  * perf_event__synthesize_thread_map() for those events.
1762                  */
1763                 if (target__none(&rec->opts.target))
1764                         record__synthesize_workload(rec, false);
1765         }
1766         return fd;
1767 }
1768
1769 static volatile int workload_exec_errno;
1770
1771 /*
1772  * evlist__prepare_workload will send a SIGUSR1
1773  * if the fork fails, since we asked by setting its
1774  * want_signal to true.
1775  */
1776 static void workload_exec_failed_signal(int signo __maybe_unused,
1777                                         siginfo_t *info,
1778                                         void *ucontext __maybe_unused)
1779 {
1780         workload_exec_errno = info->si_value.sival_int;
1781         done = 1;
1782         child_finished = 1;
1783 }
1784
1785 static void snapshot_sig_handler(int sig);
1786 static void alarm_sig_handler(int sig);
1787
1788 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1789 {
1790         if (evlist) {
1791                 if (evlist->mmap && evlist->mmap[0].core.base)
1792                         return evlist->mmap[0].core.base;
1793                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1794                         return evlist->overwrite_mmap[0].core.base;
1795         }
1796         return NULL;
1797 }
1798
1799 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1800 {
1801         const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1802         if (pc)
1803                 return pc;
1804         return NULL;
1805 }
1806
1807 static int record__synthesize(struct record *rec, bool tail)
1808 {
1809         struct perf_session *session = rec->session;
1810         struct machine *machine = &session->machines.host;
1811         struct perf_data *data = &rec->data;
1812         struct record_opts *opts = &rec->opts;
1813         struct perf_tool *tool = &rec->tool;
1814         int err = 0;
1815         event_op f = process_synthesized_event;
1816
1817         if (rec->opts.tail_synthesize != tail)
1818                 return 0;
1819
1820         if (data->is_pipe) {
1821                 err = perf_event__synthesize_for_pipe(tool, session, data,
1822                                                       process_synthesized_event);
1823                 if (err < 0)
1824                         goto out;
1825
1826                 rec->bytes_written += err;
1827         }
1828
1829         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1830                                           process_synthesized_event, machine);
1831         if (err)
1832                 goto out;
1833
1834         /* Synthesize id_index before auxtrace_info */
1835         if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
1836                 err = perf_event__synthesize_id_index(tool,
1837                                                       process_synthesized_event,
1838                                                       session->evlist, machine);
1839                 if (err)
1840                         goto out;
1841         }
1842
1843         if (rec->opts.full_auxtrace) {
1844                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1845                                         session, process_synthesized_event);
1846                 if (err)
1847                         goto out;
1848         }
1849
1850         if (!evlist__exclude_kernel(rec->evlist)) {
1851                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1852                                                          machine);
1853                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1854                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1855                                    "Check /proc/kallsyms permission or run as root.\n");
1856
1857                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1858                                                      machine);
1859                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1860                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1861                                    "Check /proc/modules permission or run as root.\n");
1862         }
1863
1864         if (perf_guest) {
1865                 machines__process_guests(&session->machines,
1866                                          perf_event__synthesize_guest_os, tool);
1867         }
1868
1869         err = perf_event__synthesize_extra_attr(&rec->tool,
1870                                                 rec->evlist,
1871                                                 process_synthesized_event,
1872                                                 data->is_pipe);
1873         if (err)
1874                 goto out;
1875
1876         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1877                                                  process_synthesized_event,
1878                                                 NULL);
1879         if (err < 0) {
1880                 pr_err("Couldn't synthesize thread map.\n");
1881                 return err;
1882         }
1883
1884         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
1885                                              process_synthesized_event, NULL);
1886         if (err < 0) {
1887                 pr_err("Couldn't synthesize cpu map.\n");
1888                 return err;
1889         }
1890
1891         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1892                                                 machine, opts);
1893         if (err < 0)
1894                 pr_warning("Couldn't synthesize bpf events.\n");
1895
1896         if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1897                 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1898                                                      machine);
1899                 if (err < 0)
1900                         pr_warning("Couldn't synthesize cgroup events.\n");
1901         }
1902
1903         if (rec->opts.nr_threads_synthesize > 1) {
1904                 perf_set_multithreaded();
1905                 f = process_locked_synthesized_event;
1906         }
1907
1908         if (rec->opts.synth & PERF_SYNTH_TASK) {
1909                 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1910
1911                 err = __machine__synthesize_threads(machine, tool, &opts->target,
1912                                                     rec->evlist->core.threads,
1913                                                     f, needs_mmap, opts->sample_address,
1914                                                     rec->opts.nr_threads_synthesize);
1915         }
1916
1917         if (rec->opts.nr_threads_synthesize > 1)
1918                 perf_set_singlethreaded();
1919
1920 out:
1921         return err;
1922 }
1923
1924 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1925 {
1926         struct record *rec = data;
1927         pthread_kill(rec->thread_id, SIGUSR2);
1928         return 0;
1929 }
1930
1931 static int record__setup_sb_evlist(struct record *rec)
1932 {
1933         struct record_opts *opts = &rec->opts;
1934
1935         if (rec->sb_evlist != NULL) {
1936                 /*
1937                  * We get here if --switch-output-event populated the
1938                  * sb_evlist, so associate a callback that will send a SIGUSR2
1939                  * to the main thread.
1940                  */
1941                 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1942                 rec->thread_id = pthread_self();
1943         }
1944 #ifdef HAVE_LIBBPF_SUPPORT
1945         if (!opts->no_bpf_event) {
1946                 if (rec->sb_evlist == NULL) {
1947                         rec->sb_evlist = evlist__new();
1948
1949                         if (rec->sb_evlist == NULL) {
1950                                 pr_err("Couldn't create side band evlist.\n.");
1951                                 return -1;
1952                         }
1953                 }
1954
1955                 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1956                         pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1957                         return -1;
1958                 }
1959         }
1960 #endif
1961         if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1962                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1963                 opts->no_bpf_event = true;
1964         }
1965
1966         return 0;
1967 }
1968
1969 static int record__init_clock(struct record *rec)
1970 {
1971         struct perf_session *session = rec->session;
1972         struct timespec ref_clockid;
1973         struct timeval ref_tod;
1974         u64 ref;
1975
1976         if (!rec->opts.use_clockid)
1977                 return 0;
1978
1979         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1980                 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1981
1982         session->header.env.clock.clockid = rec->opts.clockid;
1983
1984         if (gettimeofday(&ref_tod, NULL) != 0) {
1985                 pr_err("gettimeofday failed, cannot set reference time.\n");
1986                 return -1;
1987         }
1988
1989         if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1990                 pr_err("clock_gettime failed, cannot set reference time.\n");
1991                 return -1;
1992         }
1993
1994         ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1995               (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1996
1997         session->header.env.clock.tod_ns = ref;
1998
1999         ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2000               (u64) ref_clockid.tv_nsec;
2001
2002         session->header.env.clock.clockid_ns = ref;
2003         return 0;
2004 }
2005
2006 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2007 {
2008         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2009                 trigger_hit(&auxtrace_snapshot_trigger);
2010                 auxtrace_record__snapshot_started = 1;
2011                 if (auxtrace_record__snapshot_start(rec->itr))
2012                         trigger_error(&auxtrace_snapshot_trigger);
2013         }
2014 }
2015
2016 static void record__uniquify_name(struct record *rec)
2017 {
2018         struct evsel *pos;
2019         struct evlist *evlist = rec->evlist;
2020         char *new_name;
2021         int ret;
2022
2023         if (!perf_pmu__has_hybrid())
2024                 return;
2025
2026         evlist__for_each_entry(evlist, pos) {
2027                 if (!evsel__is_hybrid(pos))
2028                         continue;
2029
2030                 if (strchr(pos->name, '/'))
2031                         continue;
2032
2033                 ret = asprintf(&new_name, "%s/%s/",
2034                                pos->pmu_name, pos->name);
2035                 if (ret) {
2036                         free(pos->name);
2037                         pos->name = new_name;
2038                 }
2039         }
2040 }
2041
2042 static int record__terminate_thread(struct record_thread *thread_data)
2043 {
2044         int err;
2045         enum thread_msg ack = THREAD_MSG__UNDEFINED;
2046         pid_t tid = thread_data->tid;
2047
2048         close(thread_data->pipes.msg[1]);
2049         thread_data->pipes.msg[1] = -1;
2050         err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2051         if (err > 0)
2052                 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2053         else
2054                 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2055                            thread->tid, tid);
2056
2057         return 0;
2058 }
2059
2060 static int record__start_threads(struct record *rec)
2061 {
2062         int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2063         struct record_thread *thread_data = rec->thread_data;
2064         sigset_t full, mask;
2065         pthread_t handle;
2066         pthread_attr_t attrs;
2067
2068         thread = &thread_data[0];
2069
2070         if (!record__threads_enabled(rec))
2071                 return 0;
2072
2073         sigfillset(&full);
2074         if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2075                 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2076                 return -1;
2077         }
2078
2079         pthread_attr_init(&attrs);
2080         pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2081
2082         for (t = 1; t < nr_threads; t++) {
2083                 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2084
2085 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2086                 pthread_attr_setaffinity_np(&attrs,
2087                                             MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2088                                             (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2089 #endif
2090                 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2091                         for (tt = 1; tt < t; tt++)
2092                                 record__terminate_thread(&thread_data[t]);
2093                         pr_err("Failed to start threads: %s\n", strerror(errno));
2094                         ret = -1;
2095                         goto out_err;
2096                 }
2097
2098                 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2099                 if (err > 0)
2100                         pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2101                                   thread_msg_tags[msg]);
2102                 else
2103                         pr_warning("threads[%d]: failed to receive start notification from %d\n",
2104                                    thread->tid, rec->thread_data[t].tid);
2105         }
2106
2107         sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2108                         (cpu_set_t *)thread->mask->affinity.bits);
2109
2110         pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2111
2112 out_err:
2113         pthread_attr_destroy(&attrs);
2114
2115         if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2116                 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2117                 ret = -1;
2118         }
2119
2120         return ret;
2121 }
2122
2123 static int record__stop_threads(struct record *rec)
2124 {
2125         int t;
2126         struct record_thread *thread_data = rec->thread_data;
2127
2128         for (t = 1; t < rec->nr_threads; t++)
2129                 record__terminate_thread(&thread_data[t]);
2130
2131         for (t = 0; t < rec->nr_threads; t++) {
2132                 rec->samples += thread_data[t].samples;
2133                 if (!record__threads_enabled(rec))
2134                         continue;
2135                 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2136                 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2137                 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2138                          thread_data[t].samples, thread_data[t].waking);
2139                 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2140                         pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2141                                  thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2142                 else
2143                         pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2144         }
2145
2146         return 0;
2147 }
2148
2149 static unsigned long record__waking(struct record *rec)
2150 {
2151         int t;
2152         unsigned long waking = 0;
2153         struct record_thread *thread_data = rec->thread_data;
2154
2155         for (t = 0; t < rec->nr_threads; t++)
2156                 waking += thread_data[t].waking;
2157
2158         return waking;
2159 }
2160
2161 static int __cmd_record(struct record *rec, int argc, const char **argv)
2162 {
2163         int err;
2164         int status = 0;
2165         const bool forks = argc > 0;
2166         struct perf_tool *tool = &rec->tool;
2167         struct record_opts *opts = &rec->opts;
2168         struct perf_data *data = &rec->data;
2169         struct perf_session *session;
2170         bool disabled = false, draining = false;
2171         int fd;
2172         float ratio = 0;
2173         enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2174
2175         atexit(record__sig_exit);
2176         signal(SIGCHLD, sig_handler);
2177         signal(SIGINT, sig_handler);
2178         signal(SIGTERM, sig_handler);
2179         signal(SIGSEGV, sigsegv_handler);
2180
2181         if (rec->opts.record_namespaces)
2182                 tool->namespace_events = true;
2183
2184         if (rec->opts.record_cgroup) {
2185 #ifdef HAVE_FILE_HANDLE
2186                 tool->cgroup_events = true;
2187 #else
2188                 pr_err("cgroup tracking is not supported\n");
2189                 return -1;
2190 #endif
2191         }
2192
2193         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2194                 signal(SIGUSR2, snapshot_sig_handler);
2195                 if (rec->opts.auxtrace_snapshot_mode)
2196                         trigger_on(&auxtrace_snapshot_trigger);
2197                 if (rec->switch_output.enabled)
2198                         trigger_on(&switch_output_trigger);
2199         } else {
2200                 signal(SIGUSR2, SIG_IGN);
2201         }
2202
2203         session = perf_session__new(data, tool);
2204         if (IS_ERR(session)) {
2205                 pr_err("Perf session creation failed.\n");
2206                 return PTR_ERR(session);
2207         }
2208
2209         if (record__threads_enabled(rec)) {
2210                 if (perf_data__is_pipe(&rec->data)) {
2211                         pr_err("Parallel trace streaming is not available in pipe mode.\n");
2212                         return -1;
2213                 }
2214                 if (rec->opts.full_auxtrace) {
2215                         pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2216                         return -1;
2217                 }
2218         }
2219
2220         fd = perf_data__fd(data);
2221         rec->session = session;
2222
2223         if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2224                 pr_err("Compression initialization failed.\n");
2225                 return -1;
2226         }
2227 #ifdef HAVE_EVENTFD_SUPPORT
2228         done_fd = eventfd(0, EFD_NONBLOCK);
2229         if (done_fd < 0) {
2230                 pr_err("Failed to create wakeup eventfd, error: %m\n");
2231                 status = -1;
2232                 goto out_delete_session;
2233         }
2234         err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2235         if (err < 0) {
2236                 pr_err("Failed to add wakeup eventfd to poll list\n");
2237                 status = err;
2238                 goto out_delete_session;
2239         }
2240 #endif // HAVE_EVENTFD_SUPPORT
2241
2242         session->header.env.comp_type  = PERF_COMP_ZSTD;
2243         session->header.env.comp_level = rec->opts.comp_level;
2244
2245         if (rec->opts.kcore &&
2246             !record__kcore_readable(&session->machines.host)) {
2247                 pr_err("ERROR: kcore is not readable.\n");
2248                 return -1;
2249         }
2250
2251         if (record__init_clock(rec))
2252                 return -1;
2253
2254         record__init_features(rec);
2255
2256         if (forks) {
2257                 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2258                                                workload_exec_failed_signal);
2259                 if (err < 0) {
2260                         pr_err("Couldn't run the workload!\n");
2261                         status = err;
2262                         goto out_delete_session;
2263                 }
2264         }
2265
2266         /*
2267          * If we have just single event and are sending data
2268          * through pipe, we need to force the ids allocation,
2269          * because we synthesize event name through the pipe
2270          * and need the id for that.
2271          */
2272         if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2273                 rec->opts.sample_id = true;
2274
2275         record__uniquify_name(rec);
2276
2277         if (record__open(rec) != 0) {
2278                 err = -1;
2279                 goto out_free_threads;
2280         }
2281         session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2282
2283         if (rec->opts.kcore) {
2284                 err = record__kcore_copy(&session->machines.host, data);
2285                 if (err) {
2286                         pr_err("ERROR: Failed to copy kcore\n");
2287                         goto out_free_threads;
2288                 }
2289         }
2290
2291         err = bpf__apply_obj_config();
2292         if (err) {
2293                 char errbuf[BUFSIZ];
2294
2295                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2296                 pr_err("ERROR: Apply config to BPF failed: %s\n",
2297                          errbuf);
2298                 goto out_free_threads;
2299         }
2300
2301         /*
2302          * Normally perf_session__new would do this, but it doesn't have the
2303          * evlist.
2304          */
2305         if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2306                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2307                 rec->tool.ordered_events = false;
2308         }
2309
2310         if (!rec->evlist->core.nr_groups)
2311                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2312
2313         if (data->is_pipe) {
2314                 err = perf_header__write_pipe(fd);
2315                 if (err < 0)
2316                         goto out_free_threads;
2317         } else {
2318                 err = perf_session__write_header(session, rec->evlist, fd, false);
2319                 if (err < 0)
2320                         goto out_free_threads;
2321         }
2322
2323         err = -1;
2324         if (!rec->no_buildid
2325             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2326                 pr_err("Couldn't generate buildids. "
2327                        "Use --no-buildid to profile anyway.\n");
2328                 goto out_free_threads;
2329         }
2330
2331         err = record__setup_sb_evlist(rec);
2332         if (err)
2333                 goto out_free_threads;
2334
2335         err = record__synthesize(rec, false);
2336         if (err < 0)
2337                 goto out_free_threads;
2338
2339         if (rec->realtime_prio) {
2340                 struct sched_param param;
2341
2342                 param.sched_priority = rec->realtime_prio;
2343                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2344                         pr_err("Could not set realtime priority.\n");
2345                         err = -1;
2346                         goto out_free_threads;
2347                 }
2348         }
2349
2350         if (record__start_threads(rec))
2351                 goto out_free_threads;
2352
2353         /*
2354          * When perf is starting the traced process, all the events
2355          * (apart from group members) have enable_on_exec=1 set,
2356          * so don't spoil it by prematurely enabling them.
2357          */
2358         if (!target__none(&opts->target) && !opts->initial_delay)
2359                 evlist__enable(rec->evlist);
2360
2361         /*
2362          * Let the child rip
2363          */
2364         if (forks) {
2365                 struct machine *machine = &session->machines.host;
2366                 union perf_event *event;
2367                 pid_t tgid;
2368
2369                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2370                 if (event == NULL) {
2371                         err = -ENOMEM;
2372                         goto out_child;
2373                 }
2374
2375                 /*
2376                  * Some H/W events are generated before COMM event
2377                  * which is emitted during exec(), so perf script
2378                  * cannot see a correct process name for those events.
2379                  * Synthesize COMM event to prevent it.
2380                  */
2381                 tgid = perf_event__synthesize_comm(tool, event,
2382                                                    rec->evlist->workload.pid,
2383                                                    process_synthesized_event,
2384                                                    machine);
2385                 free(event);
2386
2387                 if (tgid == -1)
2388                         goto out_child;
2389
2390                 event = malloc(sizeof(event->namespaces) +
2391                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2392                                machine->id_hdr_size);
2393                 if (event == NULL) {
2394                         err = -ENOMEM;
2395                         goto out_child;
2396                 }
2397
2398                 /*
2399                  * Synthesize NAMESPACES event for the command specified.
2400                  */
2401                 perf_event__synthesize_namespaces(tool, event,
2402                                                   rec->evlist->workload.pid,
2403                                                   tgid, process_synthesized_event,
2404                                                   machine);
2405                 free(event);
2406
2407                 evlist__start_workload(rec->evlist);
2408         }
2409
2410         if (opts->initial_delay) {
2411                 pr_info(EVLIST_DISABLED_MSG);
2412                 if (opts->initial_delay > 0) {
2413                         usleep(opts->initial_delay * USEC_PER_MSEC);
2414                         evlist__enable(rec->evlist);
2415                         pr_info(EVLIST_ENABLED_MSG);
2416                 }
2417         }
2418
2419         trigger_ready(&auxtrace_snapshot_trigger);
2420         trigger_ready(&switch_output_trigger);
2421         perf_hooks__invoke_record_start();
2422         for (;;) {
2423                 unsigned long long hits = thread->samples;
2424
2425                 /*
2426                  * rec->evlist->bkw_mmap_state is possible to be
2427                  * BKW_MMAP_EMPTY here: when done == true and
2428                  * hits != rec->samples in previous round.
2429                  *
2430                  * evlist__toggle_bkw_mmap ensure we never
2431                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2432                  */
2433                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2434                         evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2435
2436                 if (record__mmap_read_all(rec, false) < 0) {
2437                         trigger_error(&auxtrace_snapshot_trigger);
2438                         trigger_error(&switch_output_trigger);
2439                         err = -1;
2440                         goto out_child;
2441                 }
2442
2443                 if (auxtrace_record__snapshot_started) {
2444                         auxtrace_record__snapshot_started = 0;
2445                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
2446                                 record__read_auxtrace_snapshot(rec, false);
2447                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2448                                 pr_err("AUX area tracing snapshot failed\n");
2449                                 err = -1;
2450                                 goto out_child;
2451                         }
2452                 }
2453
2454                 if (trigger_is_hit(&switch_output_trigger)) {
2455                         /*
2456                          * If switch_output_trigger is hit, the data in
2457                          * overwritable ring buffer should have been collected,
2458                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2459                          *
2460                          * If SIGUSR2 raise after or during record__mmap_read_all(),
2461                          * record__mmap_read_all() didn't collect data from
2462                          * overwritable ring buffer. Read again.
2463                          */
2464                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2465                                 continue;
2466                         trigger_ready(&switch_output_trigger);
2467
2468                         /*
2469                          * Reenable events in overwrite ring buffer after
2470                          * record__mmap_read_all(): we should have collected
2471                          * data from it.
2472                          */
2473                         evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2474
2475                         if (!quiet)
2476                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2477                                         record__waking(rec));
2478                         thread->waking = 0;
2479                         fd = record__switch_output(rec, false);
2480                         if (fd < 0) {
2481                                 pr_err("Failed to switch to new file\n");
2482                                 trigger_error(&switch_output_trigger);
2483                                 err = fd;
2484                                 goto out_child;
2485                         }
2486
2487                         /* re-arm the alarm */
2488                         if (rec->switch_output.time)
2489                                 alarm(rec->switch_output.time);
2490                 }
2491
2492                 if (hits == thread->samples) {
2493                         if (done || draining)
2494                                 break;
2495                         err = fdarray__poll(&thread->pollfd, -1);
2496                         /*
2497                          * Propagate error, only if there's any. Ignore positive
2498                          * number of returned events and interrupt error.
2499                          */
2500                         if (err > 0 || (err < 0 && errno == EINTR))
2501                                 err = 0;
2502                         thread->waking++;
2503
2504                         if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2505                                             record__thread_munmap_filtered, NULL) == 0)
2506                                 draining = true;
2507
2508                         evlist__ctlfd_update(rec->evlist,
2509                                 &thread->pollfd.entries[thread->ctlfd_pos]);
2510                 }
2511
2512                 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2513                         switch (cmd) {
2514                         case EVLIST_CTL_CMD_SNAPSHOT:
2515                                 hit_auxtrace_snapshot_trigger(rec);
2516                                 evlist__ctlfd_ack(rec->evlist);
2517                                 break;
2518                         case EVLIST_CTL_CMD_STOP:
2519                                 done = 1;
2520                                 break;
2521                         case EVLIST_CTL_CMD_ACK:
2522                         case EVLIST_CTL_CMD_UNSUPPORTED:
2523                         case EVLIST_CTL_CMD_ENABLE:
2524                         case EVLIST_CTL_CMD_DISABLE:
2525                         case EVLIST_CTL_CMD_EVLIST:
2526                         case EVLIST_CTL_CMD_PING:
2527                         default:
2528                                 break;
2529                         }
2530                 }
2531
2532                 /*
2533                  * When perf is starting the traced process, at the end events
2534                  * die with the process and we wait for that. Thus no need to
2535                  * disable events in this case.
2536                  */
2537                 if (done && !disabled && !target__none(&opts->target)) {
2538                         trigger_off(&auxtrace_snapshot_trigger);
2539                         evlist__disable(rec->evlist);
2540                         disabled = true;
2541                 }
2542         }
2543
2544         trigger_off(&auxtrace_snapshot_trigger);
2545         trigger_off(&switch_output_trigger);
2546
2547         if (opts->auxtrace_snapshot_on_exit)
2548                 record__auxtrace_snapshot_exit(rec);
2549
2550         if (forks && workload_exec_errno) {
2551                 char msg[STRERR_BUFSIZE], strevsels[2048];
2552                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2553
2554                 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2555
2556                 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2557                         strevsels, argv[0], emsg);
2558                 err = -1;
2559                 goto out_child;
2560         }
2561
2562         if (!quiet)
2563                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2564                         record__waking(rec));
2565
2566         if (target__none(&rec->opts.target))
2567                 record__synthesize_workload(rec, true);
2568
2569 out_child:
2570         record__stop_threads(rec);
2571         record__mmap_read_all(rec, true);
2572 out_free_threads:
2573         record__free_thread_data(rec);
2574         evlist__finalize_ctlfd(rec->evlist);
2575         record__aio_mmap_read_sync(rec);
2576
2577         if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2578                 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2579                 session->header.env.comp_ratio = ratio + 0.5;
2580         }
2581
2582         if (forks) {
2583                 int exit_status;
2584
2585                 if (!child_finished)
2586                         kill(rec->evlist->workload.pid, SIGTERM);
2587
2588                 wait(&exit_status);
2589
2590                 if (err < 0)
2591                         status = err;
2592                 else if (WIFEXITED(exit_status))
2593                         status = WEXITSTATUS(exit_status);
2594                 else if (WIFSIGNALED(exit_status))
2595                         signr = WTERMSIG(exit_status);
2596         } else
2597                 status = err;
2598
2599         record__synthesize(rec, true);
2600         /* this will be recalculated during process_buildids() */
2601         rec->samples = 0;
2602
2603         if (!err) {
2604                 if (!rec->timestamp_filename) {
2605                         record__finish_output(rec);
2606                 } else {
2607                         fd = record__switch_output(rec, true);
2608                         if (fd < 0) {
2609                                 status = fd;
2610                                 goto out_delete_session;
2611                         }
2612                 }
2613         }
2614
2615         perf_hooks__invoke_record_end();
2616
2617         if (!err && !quiet) {
2618                 char samples[128];
2619                 const char *postfix = rec->timestamp_filename ?
2620                                         ".<timestamp>" : "";
2621
2622                 if (rec->samples && !rec->opts.full_auxtrace)
2623                         scnprintf(samples, sizeof(samples),
2624                                   " (%" PRIu64 " samples)", rec->samples);
2625                 else
2626                         samples[0] = '\0';
2627
2628                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2629                         perf_data__size(data) / 1024.0 / 1024.0,
2630                         data->path, postfix, samples);
2631                 if (ratio) {
2632                         fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2633                                         rec->session->bytes_transferred / 1024.0 / 1024.0,
2634                                         ratio);
2635                 }
2636                 fprintf(stderr, " ]\n");
2637         }
2638
2639 out_delete_session:
2640 #ifdef HAVE_EVENTFD_SUPPORT
2641         if (done_fd >= 0)
2642                 close(done_fd);
2643 #endif
2644         zstd_fini(&session->zstd_data);
2645         perf_session__delete(session);
2646
2647         if (!opts->no_bpf_event)
2648                 evlist__stop_sb_thread(rec->sb_evlist);
2649         return status;
2650 }
2651
2652 static void callchain_debug(struct callchain_param *callchain)
2653 {
2654         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2655
2656         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2657
2658         if (callchain->record_mode == CALLCHAIN_DWARF)
2659                 pr_debug("callchain: stack dump size %d\n",
2660                          callchain->dump_size);
2661 }
2662
2663 int record_opts__parse_callchain(struct record_opts *record,
2664                                  struct callchain_param *callchain,
2665                                  const char *arg, bool unset)
2666 {
2667         int ret;
2668         callchain->enabled = !unset;
2669
2670         /* --no-call-graph */
2671         if (unset) {
2672                 callchain->record_mode = CALLCHAIN_NONE;
2673                 pr_debug("callchain: disabled\n");
2674                 return 0;
2675         }
2676
2677         ret = parse_callchain_record_opt(arg, callchain);
2678         if (!ret) {
2679                 /* Enable data address sampling for DWARF unwind. */
2680                 if (callchain->record_mode == CALLCHAIN_DWARF)
2681                         record->sample_address = true;
2682                 callchain_debug(callchain);
2683         }
2684
2685         return ret;
2686 }
2687
2688 int record_parse_callchain_opt(const struct option *opt,
2689                                const char *arg,
2690                                int unset)
2691 {
2692         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2693 }
2694
2695 int record_callchain_opt(const struct option *opt,
2696                          const char *arg __maybe_unused,
2697                          int unset __maybe_unused)
2698 {
2699         struct callchain_param *callchain = opt->value;
2700
2701         callchain->enabled = true;
2702
2703         if (callchain->record_mode == CALLCHAIN_NONE)
2704                 callchain->record_mode = CALLCHAIN_FP;
2705
2706         callchain_debug(callchain);
2707         return 0;
2708 }
2709
2710 static int perf_record_config(const char *var, const char *value, void *cb)
2711 {
2712         struct record *rec = cb;
2713
2714         if (!strcmp(var, "record.build-id")) {
2715                 if (!strcmp(value, "cache"))
2716                         rec->no_buildid_cache = false;
2717                 else if (!strcmp(value, "no-cache"))
2718                         rec->no_buildid_cache = true;
2719                 else if (!strcmp(value, "skip"))
2720                         rec->no_buildid = true;
2721                 else if (!strcmp(value, "mmap"))
2722                         rec->buildid_mmap = true;
2723                 else
2724                         return -1;
2725                 return 0;
2726         }
2727         if (!strcmp(var, "record.call-graph")) {
2728                 var = "call-graph.record-mode";
2729                 return perf_default_config(var, value, cb);
2730         }
2731 #ifdef HAVE_AIO_SUPPORT
2732         if (!strcmp(var, "record.aio")) {
2733                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2734                 if (!rec->opts.nr_cblocks)
2735                         rec->opts.nr_cblocks = nr_cblocks_default;
2736         }
2737 #endif
2738         if (!strcmp(var, "record.debuginfod")) {
2739                 rec->debuginfod.urls = strdup(value);
2740                 if (!rec->debuginfod.urls)
2741                         return -ENOMEM;
2742                 rec->debuginfod.set = true;
2743         }
2744
2745         return 0;
2746 }
2747
2748
2749 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2750 {
2751         struct record_opts *opts = (struct record_opts *)opt->value;
2752
2753         if (unset || !str)
2754                 return 0;
2755
2756         if (!strcasecmp(str, "node"))
2757                 opts->affinity = PERF_AFFINITY_NODE;
2758         else if (!strcasecmp(str, "cpu"))
2759                 opts->affinity = PERF_AFFINITY_CPU;
2760
2761         return 0;
2762 }
2763
2764 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2765 {
2766         mask->nbits = nr_bits;
2767         mask->bits = bitmap_zalloc(mask->nbits);
2768         if (!mask->bits)
2769                 return -ENOMEM;
2770
2771         return 0;
2772 }
2773
2774 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2775 {
2776         bitmap_free(mask->bits);
2777         mask->nbits = 0;
2778 }
2779
2780 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2781 {
2782         int ret;
2783
2784         ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2785         if (ret) {
2786                 mask->affinity.bits = NULL;
2787                 return ret;
2788         }
2789
2790         ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2791         if (ret) {
2792                 record__mmap_cpu_mask_free(&mask->maps);
2793                 mask->maps.bits = NULL;
2794         }
2795
2796         return ret;
2797 }
2798
2799 static void record__thread_mask_free(struct thread_mask *mask)
2800 {
2801         record__mmap_cpu_mask_free(&mask->maps);
2802         record__mmap_cpu_mask_free(&mask->affinity);
2803 }
2804
2805 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2806 {
2807         int s;
2808         struct record_opts *opts = opt->value;
2809
2810         if (unset || !str || !strlen(str)) {
2811                 opts->threads_spec = THREAD_SPEC__CPU;
2812         } else {
2813                 for (s = 1; s < THREAD_SPEC__MAX; s++) {
2814                         if (s == THREAD_SPEC__USER) {
2815                                 opts->threads_user_spec = strdup(str);
2816                                 if (!opts->threads_user_spec)
2817                                         return -ENOMEM;
2818                                 opts->threads_spec = THREAD_SPEC__USER;
2819                                 break;
2820                         }
2821                         if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2822                                 opts->threads_spec = s;
2823                                 break;
2824                         }
2825                 }
2826         }
2827
2828         if (opts->threads_spec == THREAD_SPEC__USER)
2829                 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2830         else
2831                 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2832
2833         return 0;
2834 }
2835
2836 static int parse_output_max_size(const struct option *opt,
2837                                  const char *str, int unset)
2838 {
2839         unsigned long *s = (unsigned long *)opt->value;
2840         static struct parse_tag tags_size[] = {
2841                 { .tag  = 'B', .mult = 1       },
2842                 { .tag  = 'K', .mult = 1 << 10 },
2843                 { .tag  = 'M', .mult = 1 << 20 },
2844                 { .tag  = 'G', .mult = 1 << 30 },
2845                 { .tag  = 0 },
2846         };
2847         unsigned long val;
2848
2849         if (unset) {
2850                 *s = 0;
2851                 return 0;
2852         }
2853
2854         val = parse_tag_value(str, tags_size);
2855         if (val != (unsigned long) -1) {
2856                 *s = val;
2857                 return 0;
2858         }
2859
2860         return -1;
2861 }
2862
2863 static int record__parse_mmap_pages(const struct option *opt,
2864                                     const char *str,
2865                                     int unset __maybe_unused)
2866 {
2867         struct record_opts *opts = opt->value;
2868         char *s, *p;
2869         unsigned int mmap_pages;
2870         int ret;
2871
2872         if (!str)
2873                 return -EINVAL;
2874
2875         s = strdup(str);
2876         if (!s)
2877                 return -ENOMEM;
2878
2879         p = strchr(s, ',');
2880         if (p)
2881                 *p = '\0';
2882
2883         if (*s) {
2884                 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2885                 if (ret)
2886                         goto out_free;
2887                 opts->mmap_pages = mmap_pages;
2888         }
2889
2890         if (!p) {
2891                 ret = 0;
2892                 goto out_free;
2893         }
2894
2895         ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2896         if (ret)
2897                 goto out_free;
2898
2899         opts->auxtrace_mmap_pages = mmap_pages;
2900
2901 out_free:
2902         free(s);
2903         return ret;
2904 }
2905
2906 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2907 {
2908 }
2909
2910 static int parse_control_option(const struct option *opt,
2911                                 const char *str,
2912                                 int unset __maybe_unused)
2913 {
2914         struct record_opts *opts = opt->value;
2915
2916         return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2917 }
2918
2919 static void switch_output_size_warn(struct record *rec)
2920 {
2921         u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2922         struct switch_output *s = &rec->switch_output;
2923
2924         wakeup_size /= 2;
2925
2926         if (s->size < wakeup_size) {
2927                 char buf[100];
2928
2929                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2930                 pr_warning("WARNING: switch-output data size lower than "
2931                            "wakeup kernel buffer size (%s) "
2932                            "expect bigger perf.data sizes\n", buf);
2933         }
2934 }
2935
2936 static int switch_output_setup(struct record *rec)
2937 {
2938         struct switch_output *s = &rec->switch_output;
2939         static struct parse_tag tags_size[] = {
2940                 { .tag  = 'B', .mult = 1       },
2941                 { .tag  = 'K', .mult = 1 << 10 },
2942                 { .tag  = 'M', .mult = 1 << 20 },
2943                 { .tag  = 'G', .mult = 1 << 30 },
2944                 { .tag  = 0 },
2945         };
2946         static struct parse_tag tags_time[] = {
2947                 { .tag  = 's', .mult = 1        },
2948                 { .tag  = 'm', .mult = 60       },
2949                 { .tag  = 'h', .mult = 60*60    },
2950                 { .tag  = 'd', .mult = 60*60*24 },
2951                 { .tag  = 0 },
2952         };
2953         unsigned long val;
2954
2955         /*
2956          * If we're using --switch-output-events, then we imply its 
2957          * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2958          *  thread to its parent.
2959          */
2960         if (rec->switch_output_event_set) {
2961                 if (record__threads_enabled(rec)) {
2962                         pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2963                         return 0;
2964                 }
2965                 goto do_signal;
2966         }
2967
2968         if (!s->set)
2969                 return 0;
2970
2971         if (record__threads_enabled(rec)) {
2972                 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2973                 return 0;
2974         }
2975
2976         if (!strcmp(s->str, "signal")) {
2977 do_signal:
2978                 s->signal = true;
2979                 pr_debug("switch-output with SIGUSR2 signal\n");
2980                 goto enabled;
2981         }
2982
2983         val = parse_tag_value(s->str, tags_size);
2984         if (val != (unsigned long) -1) {
2985                 s->size = val;
2986                 pr_debug("switch-output with %s size threshold\n", s->str);
2987                 goto enabled;
2988         }
2989
2990         val = parse_tag_value(s->str, tags_time);
2991         if (val != (unsigned long) -1) {
2992                 s->time = val;
2993                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2994                          s->str, s->time);
2995                 goto enabled;
2996         }
2997
2998         return -1;
2999
3000 enabled:
3001         rec->timestamp_filename = true;
3002         s->enabled              = true;
3003
3004         if (s->size && !rec->opts.no_buffering)
3005                 switch_output_size_warn(rec);
3006
3007         return 0;
3008 }
3009
3010 static const char * const __record_usage[] = {
3011         "perf record [<options>] [<command>]",
3012         "perf record [<options>] -- <command> [<options>]",
3013         NULL
3014 };
3015 const char * const *record_usage = __record_usage;
3016
3017 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3018                                   struct perf_sample *sample, struct machine *machine)
3019 {
3020         /*
3021          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3022          * no need to add them twice.
3023          */
3024         if (!(event->header.misc & PERF_RECORD_MISC_USER))
3025                 return 0;
3026         return perf_event__process_mmap(tool, event, sample, machine);
3027 }
3028
3029 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3030                                    struct perf_sample *sample, struct machine *machine)
3031 {
3032         /*
3033          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3034          * no need to add them twice.
3035          */
3036         if (!(event->header.misc & PERF_RECORD_MISC_USER))
3037                 return 0;
3038
3039         return perf_event__process_mmap2(tool, event, sample, machine);
3040 }
3041
3042 static int process_timestamp_boundary(struct perf_tool *tool,
3043                                       union perf_event *event __maybe_unused,
3044                                       struct perf_sample *sample,
3045                                       struct machine *machine __maybe_unused)
3046 {
3047         struct record *rec = container_of(tool, struct record, tool);
3048
3049         set_timestamp_boundary(rec, sample->time);
3050         return 0;
3051 }
3052
3053 static int parse_record_synth_option(const struct option *opt,
3054                                      const char *str,
3055                                      int unset __maybe_unused)
3056 {
3057         struct record_opts *opts = opt->value;
3058         char *p = strdup(str);
3059
3060         if (p == NULL)
3061                 return -1;
3062
3063         opts->synth = parse_synth_opt(p);
3064         free(p);
3065
3066         if (opts->synth < 0) {
3067                 pr_err("Invalid synth option: %s\n", str);
3068                 return -1;
3069         }
3070         return 0;
3071 }
3072
3073 /*
3074  * XXX Ideally would be local to cmd_record() and passed to a record__new
3075  * because we need to have access to it in record__exit, that is called
3076  * after cmd_record() exits, but since record_options need to be accessible to
3077  * builtin-script, leave it here.
3078  *
3079  * At least we don't ouch it in all the other functions here directly.
3080  *
3081  * Just say no to tons of global variables, sigh.
3082  */
3083 static struct record record = {
3084         .opts = {
3085                 .sample_time         = true,
3086                 .mmap_pages          = UINT_MAX,
3087                 .user_freq           = UINT_MAX,
3088                 .user_interval       = ULLONG_MAX,
3089                 .freq                = 4000,
3090                 .target              = {
3091                         .uses_mmap   = true,
3092                         .default_per_cpu = true,
3093                 },
3094                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
3095                 .nr_threads_synthesize = 1,
3096                 .ctl_fd              = -1,
3097                 .ctl_fd_ack          = -1,
3098                 .synth               = PERF_SYNTH_ALL,
3099         },
3100         .tool = {
3101                 .sample         = process_sample_event,
3102                 .fork           = perf_event__process_fork,
3103                 .exit           = perf_event__process_exit,
3104                 .comm           = perf_event__process_comm,
3105                 .namespaces     = perf_event__process_namespaces,
3106                 .mmap           = build_id__process_mmap,
3107                 .mmap2          = build_id__process_mmap2,
3108                 .itrace_start   = process_timestamp_boundary,
3109                 .aux            = process_timestamp_boundary,
3110                 .ordered_events = true,
3111         },
3112 };
3113
3114 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3115         "\n\t\t\t\tDefault: fp";
3116
3117 static bool dry_run;
3118
3119 /*
3120  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3121  * with it and switch to use the library functions in perf_evlist that came
3122  * from builtin-record.c, i.e. use record_opts,
3123  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3124  * using pipes, etc.
3125  */
3126 static struct option __record_options[] = {
3127         OPT_CALLBACK('e', "event", &record.evlist, "event",
3128                      "event selector. use 'perf list' to list available events",
3129                      parse_events_option),
3130         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3131                      "event filter", parse_filter),
3132         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3133                            NULL, "don't record events from perf itself",
3134                            exclude_perf),
3135         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3136                     "record events on existing process id"),
3137         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3138                     "record events on existing thread id"),
3139         OPT_INTEGER('r', "realtime", &record.realtime_prio,
3140                     "collect data with this RT SCHED_FIFO priority"),
3141         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3142                     "collect data without buffering"),
3143         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3144                     "collect raw sample records from all opened counters"),
3145         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3146                             "system-wide collection from all CPUs"),
3147         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3148                     "list of cpus to monitor"),
3149         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3150         OPT_STRING('o', "output", &record.data.path, "file",
3151                     "output file name"),
3152         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3153                         &record.opts.no_inherit_set,
3154                         "child tasks do not inherit counters"),
3155         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3156                     "synthesize non-sample events at the end of output"),
3157         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3158         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3159         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3160                     "Fail if the specified frequency can't be used"),
3161         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3162                      "profile at this frequency",
3163                       record__parse_freq),
3164         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3165                      "number of mmap data pages and AUX area tracing mmap pages",
3166                      record__parse_mmap_pages),
3167         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3168                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3169                      record__mmap_flush_parse),
3170         OPT_BOOLEAN(0, "group", &record.opts.group,
3171                     "put the counters into a counter group"),
3172         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3173                            NULL, "enables call-graph recording" ,
3174                            &record_callchain_opt),
3175         OPT_CALLBACK(0, "call-graph", &record.opts,
3176                      "record_mode[,record_size]", record_callchain_help,
3177                      &record_parse_callchain_opt),
3178         OPT_INCR('v', "verbose", &verbose,
3179                     "be more verbose (show counter open errors, etc)"),
3180         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3181         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3182                     "per thread counts"),
3183         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3184         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3185                     "Record the sample physical addresses"),
3186         OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3187                     "Record the sampled data address data page size"),
3188         OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3189                     "Record the sampled code address (ip) page size"),
3190         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3191         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3192                         &record.opts.sample_time_set,
3193                         "Record the sample timestamps"),
3194         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3195                         "Record the sample period"),
3196         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3197                     "don't sample"),
3198         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3199                         &record.no_buildid_cache_set,
3200                         "do not update the buildid cache"),
3201         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3202                         &record.no_buildid_set,
3203                         "do not collect buildids in perf.data"),
3204         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3205                      "monitor event in cgroup name only",
3206                      parse_cgroups),
3207         OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3208                   "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3209         OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3210         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3211                    "user to profile"),
3212
3213         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3214                      "branch any", "sample any taken branches",
3215                      parse_branch_stack),
3216
3217         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3218                      "branch filter mask", "branch stack filter modes",
3219                      parse_branch_stack),
3220         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3221                     "sample by weight (on special events only)"),
3222         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3223                     "sample transaction flags (special events only)"),
3224         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3225                     "use per-thread mmaps"),
3226         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3227                     "sample selected machine registers on interrupt,"
3228                     " use '-I?' to list register names", parse_intr_regs),
3229         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3230                     "sample selected machine registers on interrupt,"
3231                     " use '--user-regs=?' to list register names", parse_user_regs),
3232         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3233                     "Record running/enabled time of read (:S) events"),
3234         OPT_CALLBACK('k', "clockid", &record.opts,
3235         "clockid", "clockid to use for events, see clock_gettime()",
3236         parse_clockid),
3237         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3238                           "opts", "AUX area tracing Snapshot Mode", ""),
3239         OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3240                           "opts", "sample AUX area", ""),
3241         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3242                         "per thread proc mmap processing timeout in ms"),
3243         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3244                     "Record namespaces events"),
3245         OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3246                     "Record cgroup events"),
3247         OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3248                         &record.opts.record_switch_events_set,
3249                         "Record context switch events"),
3250         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3251                          "Configure all used events to run in kernel space.",
3252                          PARSE_OPT_EXCLUSIVE),
3253         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3254                          "Configure all used events to run in user space.",
3255                          PARSE_OPT_EXCLUSIVE),
3256         OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3257                     "collect kernel callchains"),
3258         OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3259                     "collect user callchains"),
3260         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3261                    "clang binary to use for compiling BPF scriptlets"),
3262         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3263                    "options passed to clang when compiling BPF scriptlets"),
3264         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3265                    "file", "vmlinux pathname"),
3266         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3267                     "Record build-id of all DSOs regardless of hits"),
3268         OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3269                     "Record build-id in map events"),
3270         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3271                     "append timestamp to output filename"),
3272         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3273                     "Record timestamp boundary (time of first/last samples)"),
3274         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3275                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3276                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3277                           "signal"),
3278         OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3279                          "switch output event selector. use 'perf list' to list available events",
3280                          parse_events_option_new_evlist),
3281         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3282                    "Limit number of switch output generated files"),
3283         OPT_BOOLEAN(0, "dry-run", &dry_run,
3284                     "Parse options then exit"),
3285 #ifdef HAVE_AIO_SUPPORT
3286         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3287                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3288                      record__aio_parse),
3289 #endif
3290         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3291                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3292                      record__parse_affinity),
3293 #ifdef HAVE_ZSTD_SUPPORT
3294         OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3295                             "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3296                             record__parse_comp_level),
3297 #endif
3298         OPT_CALLBACK(0, "max-size", &record.output_max_size,
3299                      "size", "Limit the maximum size of the output file", parse_output_max_size),
3300         OPT_UINTEGER(0, "num-thread-synthesize",
3301                      &record.opts.nr_threads_synthesize,
3302                      "number of threads to run for event synthesis"),
3303 #ifdef HAVE_LIBPFM
3304         OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3305                 "libpfm4 event selector. use 'perf list' to list available events",
3306                 parse_libpfm_events_option),
3307 #endif
3308         OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3309                      "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3310                      "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3311                      "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3312                      "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3313                       parse_control_option),
3314         OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3315                      "Fine-tune event synthesis: default=all", parse_record_synth_option),
3316         OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3317                           &record.debuginfod.set, "debuginfod urls",
3318                           "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3319                           "system"),
3320         OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3321                             "write collected trace data into several data files using parallel threads",
3322                             record__parse_threads),
3323         OPT_END()
3324 };
3325
3326 struct option *record_options = __record_options;
3327
3328 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3329 {
3330         int c;
3331
3332         for (c = 0; c < cpus->nr; c++)
3333                 set_bit(cpus->map[c].cpu, mask->bits);
3334 }
3335
3336 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3337 {
3338         struct perf_cpu_map *cpus;
3339
3340         cpus = perf_cpu_map__new(mask_spec);
3341         if (!cpus)
3342                 return -ENOMEM;
3343
3344         bitmap_zero(mask->bits, mask->nbits);
3345         record__mmap_cpu_mask_init(mask, cpus);
3346         perf_cpu_map__put(cpus);
3347
3348         return 0;
3349 }
3350
3351 static void record__free_thread_masks(struct record *rec, int nr_threads)
3352 {
3353         int t;
3354
3355         if (rec->thread_masks)
3356                 for (t = 0; t < nr_threads; t++)
3357                         record__thread_mask_free(&rec->thread_masks[t]);
3358
3359         zfree(&rec->thread_masks);
3360 }
3361
3362 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3363 {
3364         int t, ret;
3365
3366         rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3367         if (!rec->thread_masks) {
3368                 pr_err("Failed to allocate thread masks\n");
3369                 return -ENOMEM;
3370         }
3371
3372         for (t = 0; t < nr_threads; t++) {
3373                 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3374                 if (ret) {
3375                         pr_err("Failed to allocate thread masks[%d]\n", t);
3376                         goto out_free;
3377                 }
3378         }
3379
3380         return 0;
3381
3382 out_free:
3383         record__free_thread_masks(rec, nr_threads);
3384
3385         return ret;
3386 }
3387
3388 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3389 {
3390         int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3391
3392         ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3393         if (ret)
3394                 return ret;
3395
3396         rec->nr_threads = nr_cpus;
3397         pr_debug("nr_threads: %d\n", rec->nr_threads);
3398
3399         for (t = 0; t < rec->nr_threads; t++) {
3400                 set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits);
3401                 set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits);
3402                 if (verbose) {
3403                         pr_debug("thread_masks[%d]: ", t);
3404                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3405                         pr_debug("thread_masks[%d]: ", t);
3406                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3407                 }
3408         }
3409
3410         return 0;
3411 }
3412
3413 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3414                                           const char **maps_spec, const char **affinity_spec,
3415                                           u32 nr_spec)
3416 {
3417         u32 s;
3418         int ret = 0, t = 0;
3419         struct mmap_cpu_mask cpus_mask;
3420         struct thread_mask thread_mask, full_mask, *thread_masks;
3421
3422         ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3423         if (ret) {
3424                 pr_err("Failed to allocate CPUs mask\n");
3425                 return ret;
3426         }
3427         record__mmap_cpu_mask_init(&cpus_mask, cpus);
3428
3429         ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3430         if (ret) {
3431                 pr_err("Failed to allocate full mask\n");
3432                 goto out_free_cpu_mask;
3433         }
3434
3435         ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3436         if (ret) {
3437                 pr_err("Failed to allocate thread mask\n");
3438                 goto out_free_full_and_cpu_masks;
3439         }
3440
3441         for (s = 0; s < nr_spec; s++) {
3442                 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3443                 if (ret) {
3444                         pr_err("Failed to initialize maps thread mask\n");
3445                         goto out_free;
3446                 }
3447                 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3448                 if (ret) {
3449                         pr_err("Failed to initialize affinity thread mask\n");
3450                         goto out_free;
3451                 }
3452
3453                 /* ignore invalid CPUs but do not allow empty masks */
3454                 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3455                                 cpus_mask.bits, thread_mask.maps.nbits)) {
3456                         pr_err("Empty maps mask: %s\n", maps_spec[s]);
3457                         ret = -EINVAL;
3458                         goto out_free;
3459                 }
3460                 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3461                                 cpus_mask.bits, thread_mask.affinity.nbits)) {
3462                         pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3463                         ret = -EINVAL;
3464                         goto out_free;
3465                 }
3466
3467                 /* do not allow intersection with other masks (full_mask) */
3468                 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3469                                       thread_mask.maps.nbits)) {
3470                         pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3471                         ret = -EINVAL;
3472                         goto out_free;
3473                 }
3474                 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3475                                       thread_mask.affinity.nbits)) {
3476                         pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3477                         ret = -EINVAL;
3478                         goto out_free;
3479                 }
3480
3481                 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3482                           thread_mask.maps.bits, full_mask.maps.nbits);
3483                 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3484                           thread_mask.affinity.bits, full_mask.maps.nbits);
3485
3486                 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3487                 if (!thread_masks) {
3488                         pr_err("Failed to reallocate thread masks\n");
3489                         ret = -ENOMEM;
3490                         goto out_free;
3491                 }
3492                 rec->thread_masks = thread_masks;
3493                 rec->thread_masks[t] = thread_mask;
3494                 if (verbose) {
3495                         pr_debug("thread_masks[%d]: ", t);
3496                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3497                         pr_debug("thread_masks[%d]: ", t);
3498                         mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3499                 }
3500                 t++;
3501                 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3502                 if (ret) {
3503                         pr_err("Failed to allocate thread mask\n");
3504                         goto out_free_full_and_cpu_masks;
3505                 }
3506         }
3507         rec->nr_threads = t;
3508         pr_debug("nr_threads: %d\n", rec->nr_threads);
3509         if (!rec->nr_threads)
3510                 ret = -EINVAL;
3511
3512 out_free:
3513         record__thread_mask_free(&thread_mask);
3514 out_free_full_and_cpu_masks:
3515         record__thread_mask_free(&full_mask);
3516 out_free_cpu_mask:
3517         record__mmap_cpu_mask_free(&cpus_mask);
3518
3519         return ret;
3520 }
3521
3522 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3523 {
3524         int ret;
3525         struct cpu_topology *topo;
3526
3527         topo = cpu_topology__new();
3528         if (!topo) {
3529                 pr_err("Failed to allocate CPU topology\n");
3530                 return -ENOMEM;
3531         }
3532
3533         ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3534                                              topo->core_cpus_list, topo->core_cpus_lists);
3535         cpu_topology__delete(topo);
3536
3537         return ret;
3538 }
3539
3540 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3541 {
3542         int ret;
3543         struct cpu_topology *topo;
3544
3545         topo = cpu_topology__new();
3546         if (!topo) {
3547                 pr_err("Failed to allocate CPU topology\n");
3548                 return -ENOMEM;
3549         }
3550
3551         ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3552                                              topo->package_cpus_list, topo->package_cpus_lists);
3553         cpu_topology__delete(topo);
3554
3555         return ret;
3556 }
3557
3558 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3559 {
3560         u32 s;
3561         int ret;
3562         const char **spec;
3563         struct numa_topology *topo;
3564
3565         topo = numa_topology__new();
3566         if (!topo) {
3567                 pr_err("Failed to allocate NUMA topology\n");
3568                 return -ENOMEM;
3569         }
3570
3571         spec = zalloc(topo->nr * sizeof(char *));
3572         if (!spec) {
3573                 pr_err("Failed to allocate NUMA spec\n");
3574                 ret = -ENOMEM;
3575                 goto out_delete_topo;
3576         }
3577         for (s = 0; s < topo->nr; s++)
3578                 spec[s] = topo->nodes[s].cpus;
3579
3580         ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3581
3582         zfree(&spec);
3583
3584 out_delete_topo:
3585         numa_topology__delete(topo);
3586
3587         return ret;
3588 }
3589
3590 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3591 {
3592         int t, ret;
3593         u32 s, nr_spec = 0;
3594         char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3595         char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3596
3597         for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3598                 spec = strtok_r(user_spec, ":", &spec_ptr);
3599                 if (spec == NULL)
3600                         break;
3601                 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3602                 mask = strtok_r(spec, "/", &mask_ptr);
3603                 if (mask == NULL)
3604                         break;
3605                 pr_debug2("  maps mask: %s\n", mask);
3606                 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3607                 if (!tmp_spec) {
3608                         pr_err("Failed to reallocate maps spec\n");
3609                         ret = -ENOMEM;
3610                         goto out_free;
3611                 }
3612                 maps_spec = tmp_spec;
3613                 maps_spec[nr_spec] = dup_mask = strdup(mask);
3614                 if (!maps_spec[nr_spec]) {
3615                         pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3616                         ret = -ENOMEM;
3617                         goto out_free;
3618                 }
3619                 mask = strtok_r(NULL, "/", &mask_ptr);
3620                 if (mask == NULL) {
3621                         pr_err("Invalid thread maps or affinity specs\n");
3622                         ret = -EINVAL;
3623                         goto out_free;
3624                 }
3625                 pr_debug2("  affinity mask: %s\n", mask);
3626                 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3627                 if (!tmp_spec) {
3628                         pr_err("Failed to reallocate affinity spec\n");
3629                         ret = -ENOMEM;
3630                         goto out_free;
3631                 }
3632                 affinity_spec = tmp_spec;
3633                 affinity_spec[nr_spec] = strdup(mask);
3634                 if (!affinity_spec[nr_spec]) {
3635                         pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3636                         ret = -ENOMEM;
3637                         goto out_free;
3638                 }
3639                 dup_mask = NULL;
3640                 nr_spec++;
3641         }
3642
3643         ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3644                                              (const char **)affinity_spec, nr_spec);
3645
3646 out_free:
3647         free(dup_mask);
3648         for (s = 0; s < nr_spec; s++) {
3649                 if (maps_spec)
3650                         free(maps_spec[s]);
3651                 if (affinity_spec)
3652                         free(affinity_spec[s]);
3653         }
3654         free(affinity_spec);
3655         free(maps_spec);
3656
3657         return ret;
3658 }
3659
3660 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3661 {
3662         int ret;
3663
3664         ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3665         if (ret)
3666                 return ret;
3667
3668         record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3669
3670         rec->nr_threads = 1;
3671
3672         return 0;
3673 }
3674
3675 static int record__init_thread_masks(struct record *rec)
3676 {
3677         int ret = 0;
3678         struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
3679
3680         if (!record__threads_enabled(rec))
3681                 return record__init_thread_default_masks(rec, cpus);
3682
3683         switch (rec->opts.threads_spec) {
3684         case THREAD_SPEC__CPU:
3685                 ret = record__init_thread_cpu_masks(rec, cpus);
3686                 break;
3687         case THREAD_SPEC__CORE:
3688                 ret = record__init_thread_core_masks(rec, cpus);
3689                 break;
3690         case THREAD_SPEC__PACKAGE:
3691                 ret = record__init_thread_package_masks(rec, cpus);
3692                 break;
3693         case THREAD_SPEC__NUMA:
3694                 ret = record__init_thread_numa_masks(rec, cpus);
3695                 break;
3696         case THREAD_SPEC__USER:
3697                 ret = record__init_thread_user_masks(rec, cpus);
3698                 break;
3699         default:
3700                 break;
3701         }
3702
3703         return ret;
3704 }
3705
3706 int cmd_record(int argc, const char **argv)
3707 {
3708         int err;
3709         struct record *rec = &record;
3710         char errbuf[BUFSIZ];
3711
3712         setlocale(LC_ALL, "");
3713
3714 #ifndef HAVE_LIBBPF_SUPPORT
3715 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3716         set_nobuild('\0', "clang-path", true);
3717         set_nobuild('\0', "clang-opt", true);
3718 # undef set_nobuild
3719 #endif
3720
3721 #ifndef HAVE_BPF_PROLOGUE
3722 # if !defined (HAVE_DWARF_SUPPORT)
3723 #  define REASON  "NO_DWARF=1"
3724 # elif !defined (HAVE_LIBBPF_SUPPORT)
3725 #  define REASON  "NO_LIBBPF=1"
3726 # else
3727 #  define REASON  "this architecture doesn't support BPF prologue"
3728 # endif
3729 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3730         set_nobuild('\0', "vmlinux", true);
3731 # undef set_nobuild
3732 # undef REASON
3733 #endif
3734
3735         rec->opts.affinity = PERF_AFFINITY_SYS;
3736
3737         rec->evlist = evlist__new();
3738         if (rec->evlist == NULL)
3739                 return -ENOMEM;
3740
3741         err = perf_config(perf_record_config, rec);
3742         if (err)
3743                 return err;
3744
3745         argc = parse_options(argc, argv, record_options, record_usage,
3746                             PARSE_OPT_STOP_AT_NON_OPTION);
3747         if (quiet)
3748                 perf_quiet_option();
3749
3750         err = symbol__validate_sym_arguments();
3751         if (err)
3752                 return err;
3753
3754         perf_debuginfod_setup(&record.debuginfod);
3755
3756         /* Make system wide (-a) the default target. */
3757         if (!argc && target__none(&rec->opts.target))
3758                 rec->opts.target.system_wide = true;
3759
3760         if (nr_cgroups && !rec->opts.target.system_wide) {
3761                 usage_with_options_msg(record_usage, record_options,
3762                         "cgroup monitoring only available in system-wide mode");
3763
3764         }
3765
3766         if (rec->buildid_mmap) {
3767                 if (!perf_can_record_build_id()) {
3768                         pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3769                         err = -EINVAL;
3770                         goto out_opts;
3771                 }
3772                 pr_debug("Enabling build id in mmap2 events.\n");
3773                 /* Enable mmap build id synthesizing. */
3774                 symbol_conf.buildid_mmap2 = true;
3775                 /* Enable perf_event_attr::build_id bit. */
3776                 rec->opts.build_id = true;
3777                 /* Disable build id cache. */
3778                 rec->no_buildid = true;
3779         }
3780
3781         if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3782                 pr_err("Kernel has no cgroup sampling support.\n");
3783                 err = -EINVAL;
3784                 goto out_opts;
3785         }
3786
3787         if (rec->opts.kcore || record__threads_enabled(rec))
3788                 rec->data.is_dir = true;
3789
3790         if (record__threads_enabled(rec)) {
3791                 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3792                         pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3793                         goto out_opts;
3794                 }
3795                 if (record__aio_enabled(rec)) {
3796                         pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3797                         goto out_opts;
3798                 }
3799         }
3800
3801         if (rec->opts.comp_level != 0) {
3802                 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3803                 rec->no_buildid = true;
3804         }
3805
3806         if (rec->opts.record_switch_events &&
3807             !perf_can_record_switch_events()) {
3808                 ui__error("kernel does not support recording context switch events\n");
3809                 parse_options_usage(record_usage, record_options, "switch-events", 0);
3810                 err = -EINVAL;
3811                 goto out_opts;
3812         }
3813
3814         if (switch_output_setup(rec)) {
3815                 parse_options_usage(record_usage, record_options, "switch-output", 0);
3816                 err = -EINVAL;
3817                 goto out_opts;
3818         }
3819
3820         if (rec->switch_output.time) {
3821                 signal(SIGALRM, alarm_sig_handler);
3822                 alarm(rec->switch_output.time);
3823         }
3824
3825         if (rec->switch_output.num_files) {
3826                 rec->switch_output.filenames = calloc(sizeof(char *),
3827                                                       rec->switch_output.num_files);
3828                 if (!rec->switch_output.filenames) {
3829                         err = -EINVAL;
3830                         goto out_opts;
3831                 }
3832         }
3833
3834         if (rec->timestamp_filename && record__threads_enabled(rec)) {
3835                 rec->timestamp_filename = false;
3836                 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3837         }
3838
3839         /*
3840          * Allow aliases to facilitate the lookup of symbols for address
3841          * filters. Refer to auxtrace_parse_filters().
3842          */
3843         symbol_conf.allow_aliases = true;
3844
3845         symbol__init(NULL);
3846
3847         err = record__auxtrace_init(rec);
3848         if (err)
3849                 goto out;
3850
3851         if (dry_run)
3852                 goto out;
3853
3854         err = bpf__setup_stdout(rec->evlist);
3855         if (err) {
3856                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3857                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
3858                          errbuf);
3859                 goto out;
3860         }
3861
3862         err = -ENOMEM;
3863
3864         if (rec->no_buildid_cache || rec->no_buildid) {
3865                 disable_buildid_cache();
3866         } else if (rec->switch_output.enabled) {
3867                 /*
3868                  * In 'perf record --switch-output', disable buildid
3869                  * generation by default to reduce data file switching
3870                  * overhead. Still generate buildid if they are required
3871                  * explicitly using
3872                  *
3873                  *  perf record --switch-output --no-no-buildid \
3874                  *              --no-no-buildid-cache
3875                  *
3876                  * Following code equals to:
3877                  *
3878                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
3879                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3880                  *         disable_buildid_cache();
3881                  */
3882                 bool disable = true;
3883
3884                 if (rec->no_buildid_set && !rec->no_buildid)
3885                         disable = false;
3886                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3887                         disable = false;
3888                 if (disable) {
3889                         rec->no_buildid = true;
3890                         rec->no_buildid_cache = true;
3891                         disable_buildid_cache();
3892                 }
3893         }
3894
3895         if (record.opts.overwrite)
3896                 record.opts.tail_synthesize = true;
3897
3898         if (rec->evlist->core.nr_entries == 0) {
3899                 if (perf_pmu__has_hybrid()) {
3900                         err = evlist__add_default_hybrid(rec->evlist,
3901                                                          !record.opts.no_samples);
3902                 } else {
3903                         err = __evlist__add_default(rec->evlist,
3904                                                     !record.opts.no_samples);
3905                 }
3906
3907                 if (err < 0) {
3908                         pr_err("Not enough memory for event selector list\n");
3909                         goto out;
3910                 }
3911         }
3912
3913         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3914                 rec->opts.no_inherit = true;
3915
3916         err = target__validate(&rec->opts.target);
3917         if (err) {
3918                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3919                 ui__warning("%s\n", errbuf);
3920         }
3921
3922         err = target__parse_uid(&rec->opts.target);
3923         if (err) {
3924                 int saved_errno = errno;
3925
3926                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3927                 ui__error("%s", errbuf);
3928
3929                 err = -saved_errno;
3930                 goto out;
3931         }
3932
3933         /* Enable ignoring missing threads when -u/-p option is defined. */
3934         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
3935
3936         if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3937                 pr_err("failed to use cpu list %s\n",
3938                        rec->opts.target.cpu_list);
3939                 goto out;
3940         }
3941
3942         rec->opts.target.hybrid = perf_pmu__has_hybrid();
3943
3944         if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3945                 arch__add_leaf_frame_record_opts(&rec->opts);
3946
3947         err = -ENOMEM;
3948         if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
3949                 usage_with_options(record_usage, record_options);
3950
3951         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3952         if (err)
3953                 goto out;
3954
3955         /*
3956          * We take all buildids when the file contains
3957          * AUX area tracing data because we do not decode the
3958          * trace because it would take too long.
3959          */
3960         if (rec->opts.full_auxtrace)
3961                 rec->buildid_all = true;
3962
3963         if (rec->opts.text_poke) {
3964                 err = record__config_text_poke(rec->evlist);
3965                 if (err) {
3966                         pr_err("record__config_text_poke failed, error %d\n", err);
3967                         goto out;
3968                 }
3969         }
3970
3971         if (record_opts__config(&rec->opts)) {
3972                 err = -EINVAL;
3973                 goto out;
3974         }
3975
3976         err = record__init_thread_masks(rec);
3977         if (err) {
3978                 pr_err("Failed to initialize parallel data streaming masks\n");
3979                 goto out;
3980         }
3981
3982         if (rec->opts.nr_cblocks > nr_cblocks_max)
3983                 rec->opts.nr_cblocks = nr_cblocks_max;
3984         pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
3985
3986         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
3987         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
3988
3989         if (rec->opts.comp_level > comp_level_max)
3990                 rec->opts.comp_level = comp_level_max;
3991         pr_debug("comp level: %d\n", rec->opts.comp_level);
3992
3993         err = __cmd_record(&record, argc, argv);
3994 out:
3995         evlist__delete(rec->evlist);
3996         symbol__exit();
3997         auxtrace_record__free(rec->itr);
3998 out_opts:
3999         record__free_thread_masks(rec, rec->nr_threads);
4000         rec->nr_threads = 0;
4001         evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4002         return err;
4003 }
4004
4005 static void snapshot_sig_handler(int sig __maybe_unused)
4006 {
4007         struct record *rec = &record;
4008
4009         hit_auxtrace_snapshot_trigger(rec);
4010
4011         if (switch_output_signal(rec))
4012                 trigger_hit(&switch_output_trigger);
4013 }
4014
4015 static void alarm_sig_handler(int sig __maybe_unused)
4016 {
4017         struct record *rec = &record;
4018
4019         if (switch_output_time(rec))
4020                 trigger_hit(&switch_output_trigger);
4021 }