perf record: Move side band evlist setup to separate routine
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "asm/bug.h"
49 #include "perf.h"
50
51 #include <errno.h>
52 #include <inttypes.h>
53 #include <locale.h>
54 #include <poll.h>
55 #include <pthread.h>
56 #include <unistd.h>
57 #include <sched.h>
58 #include <signal.h>
59 #include <sys/mman.h>
60 #include <sys/wait.h>
61 #include <sys/types.h>
62 #include <sys/stat.h>
63 #include <fcntl.h>
64 #include <linux/err.h>
65 #include <linux/string.h>
66 #include <linux/time64.h>
67 #include <linux/zalloc.h>
68 #include <linux/bitmap.h>
69
70 struct switch_output {
71         bool             enabled;
72         bool             signal;
73         unsigned long    size;
74         unsigned long    time;
75         const char      *str;
76         bool             set;
77         char             **filenames;
78         int              num_files;
79         int              cur_file;
80 };
81
82 struct record {
83         struct perf_tool        tool;
84         struct record_opts      opts;
85         u64                     bytes_written;
86         struct perf_data        data;
87         struct auxtrace_record  *itr;
88         struct evlist   *evlist;
89         struct perf_session     *session;
90         struct evlist           *sb_evlist;
91         pthread_t               thread_id;
92         int                     realtime_prio;
93         bool                    switch_output_event_set;
94         bool                    no_buildid;
95         bool                    no_buildid_set;
96         bool                    no_buildid_cache;
97         bool                    no_buildid_cache_set;
98         bool                    buildid_all;
99         bool                    timestamp_filename;
100         bool                    timestamp_boundary;
101         struct switch_output    switch_output;
102         unsigned long long      samples;
103         struct mmap_cpu_mask    affinity_mask;
104         unsigned long           output_max_size;        /* = 0: unlimited */
105 };
106
107 static volatile int done;
108
109 static volatile int auxtrace_record__snapshot_started;
110 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
111 static DEFINE_TRIGGER(switch_output_trigger);
112
113 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
114         "SYS", "NODE", "CPU"
115 };
116
117 static bool switch_output_signal(struct record *rec)
118 {
119         return rec->switch_output.signal &&
120                trigger_is_ready(&switch_output_trigger);
121 }
122
123 static bool switch_output_size(struct record *rec)
124 {
125         return rec->switch_output.size &&
126                trigger_is_ready(&switch_output_trigger) &&
127                (rec->bytes_written >= rec->switch_output.size);
128 }
129
130 static bool switch_output_time(struct record *rec)
131 {
132         return rec->switch_output.time &&
133                trigger_is_ready(&switch_output_trigger);
134 }
135
136 static bool record__output_max_size_exceeded(struct record *rec)
137 {
138         return rec->output_max_size &&
139                (rec->bytes_written >= rec->output_max_size);
140 }
141
142 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
143                          void *bf, size_t size)
144 {
145         struct perf_data_file *file = &rec->session->data->file;
146
147         if (perf_data_file__write(file, bf, size) < 0) {
148                 pr_err("failed to write perf data, error: %m\n");
149                 return -1;
150         }
151
152         rec->bytes_written += size;
153
154         if (record__output_max_size_exceeded(rec) && !done) {
155                 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
156                                 " stopping session ]\n",
157                                 rec->bytes_written >> 10);
158                 done = 1;
159         }
160
161         if (switch_output_size(rec))
162                 trigger_hit(&switch_output_trigger);
163
164         return 0;
165 }
166
167 static int record__aio_enabled(struct record *rec);
168 static int record__comp_enabled(struct record *rec);
169 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
170                             void *src, size_t src_size);
171
172 #ifdef HAVE_AIO_SUPPORT
173 static int record__aio_write(struct aiocb *cblock, int trace_fd,
174                 void *buf, size_t size, off_t off)
175 {
176         int rc;
177
178         cblock->aio_fildes = trace_fd;
179         cblock->aio_buf    = buf;
180         cblock->aio_nbytes = size;
181         cblock->aio_offset = off;
182         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
183
184         do {
185                 rc = aio_write(cblock);
186                 if (rc == 0) {
187                         break;
188                 } else if (errno != EAGAIN) {
189                         cblock->aio_fildes = -1;
190                         pr_err("failed to queue perf data, error: %m\n");
191                         break;
192                 }
193         } while (1);
194
195         return rc;
196 }
197
198 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
199 {
200         void *rem_buf;
201         off_t rem_off;
202         size_t rem_size;
203         int rc, aio_errno;
204         ssize_t aio_ret, written;
205
206         aio_errno = aio_error(cblock);
207         if (aio_errno == EINPROGRESS)
208                 return 0;
209
210         written = aio_ret = aio_return(cblock);
211         if (aio_ret < 0) {
212                 if (aio_errno != EINTR)
213                         pr_err("failed to write perf data, error: %m\n");
214                 written = 0;
215         }
216
217         rem_size = cblock->aio_nbytes - written;
218
219         if (rem_size == 0) {
220                 cblock->aio_fildes = -1;
221                 /*
222                  * md->refcount is incremented in record__aio_pushfn() for
223                  * every aio write request started in record__aio_push() so
224                  * decrement it because the request is now complete.
225                  */
226                 perf_mmap__put(&md->core);
227                 rc = 1;
228         } else {
229                 /*
230                  * aio write request may require restart with the
231                  * reminder if the kernel didn't write whole
232                  * chunk at once.
233                  */
234                 rem_off = cblock->aio_offset + written;
235                 rem_buf = (void *)(cblock->aio_buf + written);
236                 record__aio_write(cblock, cblock->aio_fildes,
237                                 rem_buf, rem_size, rem_off);
238                 rc = 0;
239         }
240
241         return rc;
242 }
243
244 static int record__aio_sync(struct mmap *md, bool sync_all)
245 {
246         struct aiocb **aiocb = md->aio.aiocb;
247         struct aiocb *cblocks = md->aio.cblocks;
248         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
249         int i, do_suspend;
250
251         do {
252                 do_suspend = 0;
253                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
254                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
255                                 if (sync_all)
256                                         aiocb[i] = NULL;
257                                 else
258                                         return i;
259                         } else {
260                                 /*
261                                  * Started aio write is not complete yet
262                                  * so it has to be waited before the
263                                  * next allocation.
264                                  */
265                                 aiocb[i] = &cblocks[i];
266                                 do_suspend = 1;
267                         }
268                 }
269                 if (!do_suspend)
270                         return -1;
271
272                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
273                         if (!(errno == EAGAIN || errno == EINTR))
274                                 pr_err("failed to sync perf data, error: %m\n");
275                 }
276         } while (1);
277 }
278
279 struct record_aio {
280         struct record   *rec;
281         void            *data;
282         size_t          size;
283 };
284
285 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
286 {
287         struct record_aio *aio = to;
288
289         /*
290          * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
291          * to release space in the kernel buffer as fast as possible, calling
292          * perf_mmap__consume() from perf_mmap__push() function.
293          *
294          * That lets the kernel to proceed with storing more profiling data into
295          * the kernel buffer earlier than other per-cpu kernel buffers are handled.
296          *
297          * Coping can be done in two steps in case the chunk of profiling data
298          * crosses the upper bound of the kernel buffer. In this case we first move
299          * part of data from map->start till the upper bound and then the reminder
300          * from the beginning of the kernel buffer till the end of the data chunk.
301          */
302
303         if (record__comp_enabled(aio->rec)) {
304                 size = zstd_compress(aio->rec->session, aio->data + aio->size,
305                                      mmap__mmap_len(map) - aio->size,
306                                      buf, size);
307         } else {
308                 memcpy(aio->data + aio->size, buf, size);
309         }
310
311         if (!aio->size) {
312                 /*
313                  * Increment map->refcount to guard map->aio.data[] buffer
314                  * from premature deallocation because map object can be
315                  * released earlier than aio write request started on
316                  * map->aio.data[] buffer is complete.
317                  *
318                  * perf_mmap__put() is done at record__aio_complete()
319                  * after started aio request completion or at record__aio_push()
320                  * if the request failed to start.
321                  */
322                 perf_mmap__get(&map->core);
323         }
324
325         aio->size += size;
326
327         return size;
328 }
329
330 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
331 {
332         int ret, idx;
333         int trace_fd = rec->session->data->file.fd;
334         struct record_aio aio = { .rec = rec, .size = 0 };
335
336         /*
337          * Call record__aio_sync() to wait till map->aio.data[] buffer
338          * becomes available after previous aio write operation.
339          */
340
341         idx = record__aio_sync(map, false);
342         aio.data = map->aio.data[idx];
343         ret = perf_mmap__push(map, &aio, record__aio_pushfn);
344         if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
345                 return ret;
346
347         rec->samples++;
348         ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
349         if (!ret) {
350                 *off += aio.size;
351                 rec->bytes_written += aio.size;
352                 if (switch_output_size(rec))
353                         trigger_hit(&switch_output_trigger);
354         } else {
355                 /*
356                  * Decrement map->refcount incremented in record__aio_pushfn()
357                  * back if record__aio_write() operation failed to start, otherwise
358                  * map->refcount is decremented in record__aio_complete() after
359                  * aio write operation finishes successfully.
360                  */
361                 perf_mmap__put(&map->core);
362         }
363
364         return ret;
365 }
366
367 static off_t record__aio_get_pos(int trace_fd)
368 {
369         return lseek(trace_fd, 0, SEEK_CUR);
370 }
371
372 static void record__aio_set_pos(int trace_fd, off_t pos)
373 {
374         lseek(trace_fd, pos, SEEK_SET);
375 }
376
377 static void record__aio_mmap_read_sync(struct record *rec)
378 {
379         int i;
380         struct evlist *evlist = rec->evlist;
381         struct mmap *maps = evlist->mmap;
382
383         if (!record__aio_enabled(rec))
384                 return;
385
386         for (i = 0; i < evlist->core.nr_mmaps; i++) {
387                 struct mmap *map = &maps[i];
388
389                 if (map->core.base)
390                         record__aio_sync(map, true);
391         }
392 }
393
394 static int nr_cblocks_default = 1;
395 static int nr_cblocks_max = 4;
396
397 static int record__aio_parse(const struct option *opt,
398                              const char *str,
399                              int unset)
400 {
401         struct record_opts *opts = (struct record_opts *)opt->value;
402
403         if (unset) {
404                 opts->nr_cblocks = 0;
405         } else {
406                 if (str)
407                         opts->nr_cblocks = strtol(str, NULL, 0);
408                 if (!opts->nr_cblocks)
409                         opts->nr_cblocks = nr_cblocks_default;
410         }
411
412         return 0;
413 }
414 #else /* HAVE_AIO_SUPPORT */
415 static int nr_cblocks_max = 0;
416
417 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
418                             off_t *off __maybe_unused)
419 {
420         return -1;
421 }
422
423 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
424 {
425         return -1;
426 }
427
428 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
429 {
430 }
431
432 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
433 {
434 }
435 #endif
436
437 static int record__aio_enabled(struct record *rec)
438 {
439         return rec->opts.nr_cblocks > 0;
440 }
441
442 #define MMAP_FLUSH_DEFAULT 1
443 static int record__mmap_flush_parse(const struct option *opt,
444                                     const char *str,
445                                     int unset)
446 {
447         int flush_max;
448         struct record_opts *opts = (struct record_opts *)opt->value;
449         static struct parse_tag tags[] = {
450                         { .tag  = 'B', .mult = 1       },
451                         { .tag  = 'K', .mult = 1 << 10 },
452                         { .tag  = 'M', .mult = 1 << 20 },
453                         { .tag  = 'G', .mult = 1 << 30 },
454                         { .tag  = 0 },
455         };
456
457         if (unset)
458                 return 0;
459
460         if (str) {
461                 opts->mmap_flush = parse_tag_value(str, tags);
462                 if (opts->mmap_flush == (int)-1)
463                         opts->mmap_flush = strtol(str, NULL, 0);
464         }
465
466         if (!opts->mmap_flush)
467                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
468
469         flush_max = evlist__mmap_size(opts->mmap_pages);
470         flush_max /= 4;
471         if (opts->mmap_flush > flush_max)
472                 opts->mmap_flush = flush_max;
473
474         return 0;
475 }
476
477 #ifdef HAVE_ZSTD_SUPPORT
478 static unsigned int comp_level_default = 1;
479
480 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
481 {
482         struct record_opts *opts = opt->value;
483
484         if (unset) {
485                 opts->comp_level = 0;
486         } else {
487                 if (str)
488                         opts->comp_level = strtol(str, NULL, 0);
489                 if (!opts->comp_level)
490                         opts->comp_level = comp_level_default;
491         }
492
493         return 0;
494 }
495 #endif
496 static unsigned int comp_level_max = 22;
497
498 static int record__comp_enabled(struct record *rec)
499 {
500         return rec->opts.comp_level > 0;
501 }
502
503 static int process_synthesized_event(struct perf_tool *tool,
504                                      union perf_event *event,
505                                      struct perf_sample *sample __maybe_unused,
506                                      struct machine *machine __maybe_unused)
507 {
508         struct record *rec = container_of(tool, struct record, tool);
509         return record__write(rec, NULL, event, event->header.size);
510 }
511
512 static int process_locked_synthesized_event(struct perf_tool *tool,
513                                      union perf_event *event,
514                                      struct perf_sample *sample __maybe_unused,
515                                      struct machine *machine __maybe_unused)
516 {
517         static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
518         int ret;
519
520         pthread_mutex_lock(&synth_lock);
521         ret = process_synthesized_event(tool, event, sample, machine);
522         pthread_mutex_unlock(&synth_lock);
523         return ret;
524 }
525
526 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
527 {
528         struct record *rec = to;
529
530         if (record__comp_enabled(rec)) {
531                 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
532                 bf   = map->data;
533         }
534
535         rec->samples++;
536         return record__write(rec, map, bf, size);
537 }
538
539 static volatile int signr = -1;
540 static volatile int child_finished;
541
542 static void sig_handler(int sig)
543 {
544         if (sig == SIGCHLD)
545                 child_finished = 1;
546         else
547                 signr = sig;
548
549         done = 1;
550 }
551
552 static void sigsegv_handler(int sig)
553 {
554         perf_hooks__recover();
555         sighandler_dump_stack(sig);
556 }
557
558 static void record__sig_exit(void)
559 {
560         if (signr == -1)
561                 return;
562
563         signal(signr, SIG_DFL);
564         raise(signr);
565 }
566
567 #ifdef HAVE_AUXTRACE_SUPPORT
568
569 static int record__process_auxtrace(struct perf_tool *tool,
570                                     struct mmap *map,
571                                     union perf_event *event, void *data1,
572                                     size_t len1, void *data2, size_t len2)
573 {
574         struct record *rec = container_of(tool, struct record, tool);
575         struct perf_data *data = &rec->data;
576         size_t padding;
577         u8 pad[8] = {0};
578
579         if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
580                 off_t file_offset;
581                 int fd = perf_data__fd(data);
582                 int err;
583
584                 file_offset = lseek(fd, 0, SEEK_CUR);
585                 if (file_offset == -1)
586                         return -1;
587                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
588                                                      event, file_offset);
589                 if (err)
590                         return err;
591         }
592
593         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
594         padding = (len1 + len2) & 7;
595         if (padding)
596                 padding = 8 - padding;
597
598         record__write(rec, map, event, event->header.size);
599         record__write(rec, map, data1, len1);
600         if (len2)
601                 record__write(rec, map, data2, len2);
602         record__write(rec, map, &pad, padding);
603
604         return 0;
605 }
606
607 static int record__auxtrace_mmap_read(struct record *rec,
608                                       struct mmap *map)
609 {
610         int ret;
611
612         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
613                                   record__process_auxtrace);
614         if (ret < 0)
615                 return ret;
616
617         if (ret)
618                 rec->samples++;
619
620         return 0;
621 }
622
623 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
624                                                struct mmap *map)
625 {
626         int ret;
627
628         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
629                                            record__process_auxtrace,
630                                            rec->opts.auxtrace_snapshot_size);
631         if (ret < 0)
632                 return ret;
633
634         if (ret)
635                 rec->samples++;
636
637         return 0;
638 }
639
640 static int record__auxtrace_read_snapshot_all(struct record *rec)
641 {
642         int i;
643         int rc = 0;
644
645         for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
646                 struct mmap *map = &rec->evlist->mmap[i];
647
648                 if (!map->auxtrace_mmap.base)
649                         continue;
650
651                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
652                         rc = -1;
653                         goto out;
654                 }
655         }
656 out:
657         return rc;
658 }
659
660 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
661 {
662         pr_debug("Recording AUX area tracing snapshot\n");
663         if (record__auxtrace_read_snapshot_all(rec) < 0) {
664                 trigger_error(&auxtrace_snapshot_trigger);
665         } else {
666                 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
667                         trigger_error(&auxtrace_snapshot_trigger);
668                 else
669                         trigger_ready(&auxtrace_snapshot_trigger);
670         }
671 }
672
673 static int record__auxtrace_snapshot_exit(struct record *rec)
674 {
675         if (trigger_is_error(&auxtrace_snapshot_trigger))
676                 return 0;
677
678         if (!auxtrace_record__snapshot_started &&
679             auxtrace_record__snapshot_start(rec->itr))
680                 return -1;
681
682         record__read_auxtrace_snapshot(rec, true);
683         if (trigger_is_error(&auxtrace_snapshot_trigger))
684                 return -1;
685
686         return 0;
687 }
688
689 static int record__auxtrace_init(struct record *rec)
690 {
691         int err;
692
693         if (!rec->itr) {
694                 rec->itr = auxtrace_record__init(rec->evlist, &err);
695                 if (err)
696                         return err;
697         }
698
699         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
700                                               rec->opts.auxtrace_snapshot_opts);
701         if (err)
702                 return err;
703
704         err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
705                                             rec->opts.auxtrace_sample_opts);
706         if (err)
707                 return err;
708
709         return auxtrace_parse_filters(rec->evlist);
710 }
711
712 #else
713
714 static inline
715 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
716                                struct mmap *map __maybe_unused)
717 {
718         return 0;
719 }
720
721 static inline
722 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
723                                     bool on_exit __maybe_unused)
724 {
725 }
726
727 static inline
728 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
729 {
730         return 0;
731 }
732
733 static inline
734 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
735 {
736         return 0;
737 }
738
739 static int record__auxtrace_init(struct record *rec __maybe_unused)
740 {
741         return 0;
742 }
743
744 #endif
745
746 static bool record__kcore_readable(struct machine *machine)
747 {
748         char kcore[PATH_MAX];
749         int fd;
750
751         scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
752
753         fd = open(kcore, O_RDONLY);
754         if (fd < 0)
755                 return false;
756
757         close(fd);
758
759         return true;
760 }
761
762 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
763 {
764         char from_dir[PATH_MAX];
765         char kcore_dir[PATH_MAX];
766         int ret;
767
768         snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
769
770         ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
771         if (ret)
772                 return ret;
773
774         return kcore_copy(from_dir, kcore_dir);
775 }
776
777 static int record__mmap_evlist(struct record *rec,
778                                struct evlist *evlist)
779 {
780         struct record_opts *opts = &rec->opts;
781         bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
782                                   opts->auxtrace_sample_mode;
783         char msg[512];
784
785         if (opts->affinity != PERF_AFFINITY_SYS)
786                 cpu__setup_cpunode_map();
787
788         if (evlist__mmap_ex(evlist, opts->mmap_pages,
789                                  opts->auxtrace_mmap_pages,
790                                  auxtrace_overwrite,
791                                  opts->nr_cblocks, opts->affinity,
792                                  opts->mmap_flush, opts->comp_level) < 0) {
793                 if (errno == EPERM) {
794                         pr_err("Permission error mapping pages.\n"
795                                "Consider increasing "
796                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
797                                "or try again with a smaller value of -m/--mmap_pages.\n"
798                                "(current value: %u,%u)\n",
799                                opts->mmap_pages, opts->auxtrace_mmap_pages);
800                         return -errno;
801                 } else {
802                         pr_err("failed to mmap with %d (%s)\n", errno,
803                                 str_error_r(errno, msg, sizeof(msg)));
804                         if (errno)
805                                 return -errno;
806                         else
807                                 return -EINVAL;
808                 }
809         }
810         return 0;
811 }
812
813 static int record__mmap(struct record *rec)
814 {
815         return record__mmap_evlist(rec, rec->evlist);
816 }
817
818 static int record__open(struct record *rec)
819 {
820         char msg[BUFSIZ];
821         struct evsel *pos;
822         struct evlist *evlist = rec->evlist;
823         struct perf_session *session = rec->session;
824         struct record_opts *opts = &rec->opts;
825         int rc = 0;
826
827         /*
828          * For initial_delay we need to add a dummy event so that we can track
829          * PERF_RECORD_MMAP while we wait for the initial delay to enable the
830          * real events, the ones asked by the user.
831          */
832         if (opts->initial_delay) {
833                 if (perf_evlist__add_dummy(evlist))
834                         return -ENOMEM;
835
836                 pos = evlist__first(evlist);
837                 pos->tracking = 0;
838                 pos = evlist__last(evlist);
839                 pos->tracking = 1;
840                 pos->core.attr.enable_on_exec = 1;
841         }
842
843         perf_evlist__config(evlist, opts, &callchain_param);
844
845         evlist__for_each_entry(evlist, pos) {
846 try_again:
847                 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
848                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
849                                 if (verbose > 0)
850                                         ui__warning("%s\n", msg);
851                                 goto try_again;
852                         }
853                         if ((errno == EINVAL || errno == EBADF) &&
854                             pos->leader != pos &&
855                             pos->weak_group) {
856                                 pos = perf_evlist__reset_weak_group(evlist, pos, true);
857                                 goto try_again;
858                         }
859                         rc = -errno;
860                         perf_evsel__open_strerror(pos, &opts->target,
861                                                   errno, msg, sizeof(msg));
862                         ui__error("%s\n", msg);
863                         goto out;
864                 }
865
866                 pos->supported = true;
867         }
868
869         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
870                 pr_warning(
871 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
872 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
873 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
874 "file is not found in the buildid cache or in the vmlinux path.\n\n"
875 "Samples in kernel modules won't be resolved at all.\n\n"
876 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
877 "even with a suitable vmlinux or kallsyms file.\n\n");
878         }
879
880         if (perf_evlist__apply_filters(evlist, &pos)) {
881                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
882                         pos->filter, perf_evsel__name(pos), errno,
883                         str_error_r(errno, msg, sizeof(msg)));
884                 rc = -1;
885                 goto out;
886         }
887
888         rc = record__mmap(rec);
889         if (rc)
890                 goto out;
891
892         session->evlist = evlist;
893         perf_session__set_id_hdr_size(session);
894 out:
895         return rc;
896 }
897
898 static int process_sample_event(struct perf_tool *tool,
899                                 union perf_event *event,
900                                 struct perf_sample *sample,
901                                 struct evsel *evsel,
902                                 struct machine *machine)
903 {
904         struct record *rec = container_of(tool, struct record, tool);
905
906         if (rec->evlist->first_sample_time == 0)
907                 rec->evlist->first_sample_time = sample->time;
908
909         rec->evlist->last_sample_time = sample->time;
910
911         if (rec->buildid_all)
912                 return 0;
913
914         rec->samples++;
915         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
916 }
917
918 static int process_buildids(struct record *rec)
919 {
920         struct perf_session *session = rec->session;
921
922         if (perf_data__size(&rec->data) == 0)
923                 return 0;
924
925         /*
926          * During this process, it'll load kernel map and replace the
927          * dso->long_name to a real pathname it found.  In this case
928          * we prefer the vmlinux path like
929          *   /lib/modules/3.16.4/build/vmlinux
930          *
931          * rather than build-id path (in debug directory).
932          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
933          */
934         symbol_conf.ignore_vmlinux_buildid = true;
935
936         /*
937          * If --buildid-all is given, it marks all DSO regardless of hits,
938          * so no need to process samples. But if timestamp_boundary is enabled,
939          * it still needs to walk on all samples to get the timestamps of
940          * first/last samples.
941          */
942         if (rec->buildid_all && !rec->timestamp_boundary)
943                 rec->tool.sample = NULL;
944
945         return perf_session__process_events(session);
946 }
947
948 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
949 {
950         int err;
951         struct perf_tool *tool = data;
952         /*
953          *As for guest kernel when processing subcommand record&report,
954          *we arrange module mmap prior to guest kernel mmap and trigger
955          *a preload dso because default guest module symbols are loaded
956          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
957          *method is used to avoid symbol missing when the first addr is
958          *in module instead of in guest kernel.
959          */
960         err = perf_event__synthesize_modules(tool, process_synthesized_event,
961                                              machine);
962         if (err < 0)
963                 pr_err("Couldn't record guest kernel [%d]'s reference"
964                        " relocation symbol.\n", machine->pid);
965
966         /*
967          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
968          * have no _text sometimes.
969          */
970         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
971                                                  machine);
972         if (err < 0)
973                 pr_err("Couldn't record guest kernel [%d]'s reference"
974                        " relocation symbol.\n", machine->pid);
975 }
976
977 static struct perf_event_header finished_round_event = {
978         .size = sizeof(struct perf_event_header),
979         .type = PERF_RECORD_FINISHED_ROUND,
980 };
981
982 static void record__adjust_affinity(struct record *rec, struct mmap *map)
983 {
984         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
985             !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
986                           rec->affinity_mask.nbits)) {
987                 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
988                 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
989                           map->affinity_mask.bits, rec->affinity_mask.nbits);
990                 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
991                                   (cpu_set_t *)rec->affinity_mask.bits);
992                 if (verbose == 2)
993                         mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
994         }
995 }
996
997 static size_t process_comp_header(void *record, size_t increment)
998 {
999         struct perf_record_compressed *event = record;
1000         size_t size = sizeof(*event);
1001
1002         if (increment) {
1003                 event->header.size += increment;
1004                 return increment;
1005         }
1006
1007         event->header.type = PERF_RECORD_COMPRESSED;
1008         event->header.size = size;
1009
1010         return size;
1011 }
1012
1013 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1014                             void *src, size_t src_size)
1015 {
1016         size_t compressed;
1017         size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1018
1019         compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1020                                                      max_record_size, process_comp_header);
1021
1022         session->bytes_transferred += src_size;
1023         session->bytes_compressed  += compressed;
1024
1025         return compressed;
1026 }
1027
1028 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1029                                     bool overwrite, bool synch)
1030 {
1031         u64 bytes_written = rec->bytes_written;
1032         int i;
1033         int rc = 0;
1034         struct mmap *maps;
1035         int trace_fd = rec->data.file.fd;
1036         off_t off = 0;
1037
1038         if (!evlist)
1039                 return 0;
1040
1041         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1042         if (!maps)
1043                 return 0;
1044
1045         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1046                 return 0;
1047
1048         if (record__aio_enabled(rec))
1049                 off = record__aio_get_pos(trace_fd);
1050
1051         for (i = 0; i < evlist->core.nr_mmaps; i++) {
1052                 u64 flush = 0;
1053                 struct mmap *map = &maps[i];
1054
1055                 if (map->core.base) {
1056                         record__adjust_affinity(rec, map);
1057                         if (synch) {
1058                                 flush = map->core.flush;
1059                                 map->core.flush = 1;
1060                         }
1061                         if (!record__aio_enabled(rec)) {
1062                                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1063                                         if (synch)
1064                                                 map->core.flush = flush;
1065                                         rc = -1;
1066                                         goto out;
1067                                 }
1068                         } else {
1069                                 if (record__aio_push(rec, map, &off) < 0) {
1070                                         record__aio_set_pos(trace_fd, off);
1071                                         if (synch)
1072                                                 map->core.flush = flush;
1073                                         rc = -1;
1074                                         goto out;
1075                                 }
1076                         }
1077                         if (synch)
1078                                 map->core.flush = flush;
1079                 }
1080
1081                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1082                     !rec->opts.auxtrace_sample_mode &&
1083                     record__auxtrace_mmap_read(rec, map) != 0) {
1084                         rc = -1;
1085                         goto out;
1086                 }
1087         }
1088
1089         if (record__aio_enabled(rec))
1090                 record__aio_set_pos(trace_fd, off);
1091
1092         /*
1093          * Mark the round finished in case we wrote
1094          * at least one event.
1095          */
1096         if (bytes_written != rec->bytes_written)
1097                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1098
1099         if (overwrite)
1100                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1101 out:
1102         return rc;
1103 }
1104
1105 static int record__mmap_read_all(struct record *rec, bool synch)
1106 {
1107         int err;
1108
1109         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1110         if (err)
1111                 return err;
1112
1113         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1114 }
1115
1116 static void record__init_features(struct record *rec)
1117 {
1118         struct perf_session *session = rec->session;
1119         int feat;
1120
1121         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1122                 perf_header__set_feat(&session->header, feat);
1123
1124         if (rec->no_buildid)
1125                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1126
1127         if (!have_tracepoints(&rec->evlist->core.entries))
1128                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1129
1130         if (!rec->opts.branch_stack)
1131                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1132
1133         if (!rec->opts.full_auxtrace)
1134                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1135
1136         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1137                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1138
1139         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1140         if (!record__comp_enabled(rec))
1141                 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1142
1143         perf_header__clear_feat(&session->header, HEADER_STAT);
1144 }
1145
1146 static void
1147 record__finish_output(struct record *rec)
1148 {
1149         struct perf_data *data = &rec->data;
1150         int fd = perf_data__fd(data);
1151
1152         if (data->is_pipe)
1153                 return;
1154
1155         rec->session->header.data_size += rec->bytes_written;
1156         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1157
1158         if (!rec->no_buildid) {
1159                 process_buildids(rec);
1160
1161                 if (rec->buildid_all)
1162                         dsos__hit_all(rec->session);
1163         }
1164         perf_session__write_header(rec->session, rec->evlist, fd, true);
1165
1166         return;
1167 }
1168
1169 static int record__synthesize_workload(struct record *rec, bool tail)
1170 {
1171         int err;
1172         struct perf_thread_map *thread_map;
1173
1174         if (rec->opts.tail_synthesize != tail)
1175                 return 0;
1176
1177         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1178         if (thread_map == NULL)
1179                 return -1;
1180
1181         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1182                                                  process_synthesized_event,
1183                                                  &rec->session->machines.host,
1184                                                  rec->opts.sample_address);
1185         perf_thread_map__put(thread_map);
1186         return err;
1187 }
1188
1189 static int record__synthesize(struct record *rec, bool tail);
1190
1191 static int
1192 record__switch_output(struct record *rec, bool at_exit)
1193 {
1194         struct perf_data *data = &rec->data;
1195         int fd, err;
1196         char *new_filename;
1197
1198         /* Same Size:      "2015122520103046"*/
1199         char timestamp[] = "InvalidTimestamp";
1200
1201         record__aio_mmap_read_sync(rec);
1202
1203         record__synthesize(rec, true);
1204         if (target__none(&rec->opts.target))
1205                 record__synthesize_workload(rec, true);
1206
1207         rec->samples = 0;
1208         record__finish_output(rec);
1209         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1210         if (err) {
1211                 pr_err("Failed to get current timestamp\n");
1212                 return -EINVAL;
1213         }
1214
1215         fd = perf_data__switch(data, timestamp,
1216                                     rec->session->header.data_offset,
1217                                     at_exit, &new_filename);
1218         if (fd >= 0 && !at_exit) {
1219                 rec->bytes_written = 0;
1220                 rec->session->header.data_size = 0;
1221         }
1222
1223         if (!quiet)
1224                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1225                         data->path, timestamp);
1226
1227         if (rec->switch_output.num_files) {
1228                 int n = rec->switch_output.cur_file + 1;
1229
1230                 if (n >= rec->switch_output.num_files)
1231                         n = 0;
1232                 rec->switch_output.cur_file = n;
1233                 if (rec->switch_output.filenames[n]) {
1234                         remove(rec->switch_output.filenames[n]);
1235                         zfree(&rec->switch_output.filenames[n]);
1236                 }
1237                 rec->switch_output.filenames[n] = new_filename;
1238         } else {
1239                 free(new_filename);
1240         }
1241
1242         /* Output tracking events */
1243         if (!at_exit) {
1244                 record__synthesize(rec, false);
1245
1246                 /*
1247                  * In 'perf record --switch-output' without -a,
1248                  * record__synthesize() in record__switch_output() won't
1249                  * generate tracking events because there's no thread_map
1250                  * in evlist. Which causes newly created perf.data doesn't
1251                  * contain map and comm information.
1252                  * Create a fake thread_map and directly call
1253                  * perf_event__synthesize_thread_map() for those events.
1254                  */
1255                 if (target__none(&rec->opts.target))
1256                         record__synthesize_workload(rec, false);
1257         }
1258         return fd;
1259 }
1260
1261 static volatile int workload_exec_errno;
1262
1263 /*
1264  * perf_evlist__prepare_workload will send a SIGUSR1
1265  * if the fork fails, since we asked by setting its
1266  * want_signal to true.
1267  */
1268 static void workload_exec_failed_signal(int signo __maybe_unused,
1269                                         siginfo_t *info,
1270                                         void *ucontext __maybe_unused)
1271 {
1272         workload_exec_errno = info->si_value.sival_int;
1273         done = 1;
1274         child_finished = 1;
1275 }
1276
1277 static void snapshot_sig_handler(int sig);
1278 static void alarm_sig_handler(int sig);
1279
1280 static const struct perf_event_mmap_page *
1281 perf_evlist__pick_pc(struct evlist *evlist)
1282 {
1283         if (evlist) {
1284                 if (evlist->mmap && evlist->mmap[0].core.base)
1285                         return evlist->mmap[0].core.base;
1286                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1287                         return evlist->overwrite_mmap[0].core.base;
1288         }
1289         return NULL;
1290 }
1291
1292 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1293 {
1294         const struct perf_event_mmap_page *pc;
1295
1296         pc = perf_evlist__pick_pc(rec->evlist);
1297         if (pc)
1298                 return pc;
1299         return NULL;
1300 }
1301
1302 static int record__synthesize(struct record *rec, bool tail)
1303 {
1304         struct perf_session *session = rec->session;
1305         struct machine *machine = &session->machines.host;
1306         struct perf_data *data = &rec->data;
1307         struct record_opts *opts = &rec->opts;
1308         struct perf_tool *tool = &rec->tool;
1309         int fd = perf_data__fd(data);
1310         int err = 0;
1311         event_op f = process_synthesized_event;
1312
1313         if (rec->opts.tail_synthesize != tail)
1314                 return 0;
1315
1316         if (data->is_pipe) {
1317                 /*
1318                  * We need to synthesize events first, because some
1319                  * features works on top of them (on report side).
1320                  */
1321                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1322                                                    process_synthesized_event);
1323                 if (err < 0) {
1324                         pr_err("Couldn't synthesize attrs.\n");
1325                         goto out;
1326                 }
1327
1328                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1329                                                       process_synthesized_event);
1330                 if (err < 0) {
1331                         pr_err("Couldn't synthesize features.\n");
1332                         return err;
1333                 }
1334
1335                 if (have_tracepoints(&rec->evlist->core.entries)) {
1336                         /*
1337                          * FIXME err <= 0 here actually means that
1338                          * there were no tracepoints so its not really
1339                          * an error, just that we don't need to
1340                          * synthesize anything.  We really have to
1341                          * return this more properly and also
1342                          * propagate errors that now are calling die()
1343                          */
1344                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1345                                                                   process_synthesized_event);
1346                         if (err <= 0) {
1347                                 pr_err("Couldn't record tracing data.\n");
1348                                 goto out;
1349                         }
1350                         rec->bytes_written += err;
1351                 }
1352         }
1353
1354         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1355                                           process_synthesized_event, machine);
1356         if (err)
1357                 goto out;
1358
1359         /* Synthesize id_index before auxtrace_info */
1360         if (rec->opts.auxtrace_sample_mode) {
1361                 err = perf_event__synthesize_id_index(tool,
1362                                                       process_synthesized_event,
1363                                                       session->evlist, machine);
1364                 if (err)
1365                         goto out;
1366         }
1367
1368         if (rec->opts.full_auxtrace) {
1369                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1370                                         session, process_synthesized_event);
1371                 if (err)
1372                         goto out;
1373         }
1374
1375         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1376                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1377                                                          machine);
1378                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1379                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1380                                    "Check /proc/kallsyms permission or run as root.\n");
1381
1382                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1383                                                      machine);
1384                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1385                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1386                                    "Check /proc/modules permission or run as root.\n");
1387         }
1388
1389         if (perf_guest) {
1390                 machines__process_guests(&session->machines,
1391                                          perf_event__synthesize_guest_os, tool);
1392         }
1393
1394         err = perf_event__synthesize_extra_attr(&rec->tool,
1395                                                 rec->evlist,
1396                                                 process_synthesized_event,
1397                                                 data->is_pipe);
1398         if (err)
1399                 goto out;
1400
1401         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1402                                                  process_synthesized_event,
1403                                                 NULL);
1404         if (err < 0) {
1405                 pr_err("Couldn't synthesize thread map.\n");
1406                 return err;
1407         }
1408
1409         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1410                                              process_synthesized_event, NULL);
1411         if (err < 0) {
1412                 pr_err("Couldn't synthesize cpu map.\n");
1413                 return err;
1414         }
1415
1416         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1417                                                 machine, opts);
1418         if (err < 0)
1419                 pr_warning("Couldn't synthesize bpf events.\n");
1420
1421         err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1422                                              machine);
1423         if (err < 0)
1424                 pr_warning("Couldn't synthesize cgroup events.\n");
1425
1426         if (rec->opts.nr_threads_synthesize > 1) {
1427                 perf_set_multithreaded();
1428                 f = process_locked_synthesized_event;
1429         }
1430
1431         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1432                                             f, opts->sample_address,
1433                                             rec->opts.nr_threads_synthesize);
1434
1435         if (rec->opts.nr_threads_synthesize > 1)
1436                 perf_set_singlethreaded();
1437
1438 out:
1439         return err;
1440 }
1441
1442 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1443 {
1444         struct record *rec = data;
1445         pthread_kill(rec->thread_id, SIGUSR2);
1446         return 0;
1447 }
1448
1449 static int record__setup_sb_evlist(struct record *rec)
1450 {
1451         struct record_opts *opts = &rec->opts;
1452
1453         if (rec->sb_evlist != NULL) {
1454                 /*
1455                  * We get here if --switch-output-event populated the
1456                  * sb_evlist, so associate a callback that will send a SIGUSR2
1457                  * to the main thread.
1458                  */
1459                 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1460                 rec->thread_id = pthread_self();
1461         }
1462
1463         if (!opts->no_bpf_event) {
1464                 if (rec->sb_evlist == NULL) {
1465                         rec->sb_evlist = evlist__new();
1466
1467                         if (rec->sb_evlist == NULL) {
1468                                 pr_err("Couldn't create side band evlist.\n.");
1469                                 return -1;
1470                         }
1471                 }
1472
1473                 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1474                         pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1475                         return -1;
1476                 }
1477         }
1478
1479         if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1480                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1481                 opts->no_bpf_event = true;
1482         }
1483
1484         return 0;
1485 }
1486
1487 static int __cmd_record(struct record *rec, int argc, const char **argv)
1488 {
1489         int err;
1490         int status = 0;
1491         unsigned long waking = 0;
1492         const bool forks = argc > 0;
1493         struct perf_tool *tool = &rec->tool;
1494         struct record_opts *opts = &rec->opts;
1495         struct perf_data *data = &rec->data;
1496         struct perf_session *session;
1497         bool disabled = false, draining = false;
1498         int fd;
1499         float ratio = 0;
1500
1501         atexit(record__sig_exit);
1502         signal(SIGCHLD, sig_handler);
1503         signal(SIGINT, sig_handler);
1504         signal(SIGTERM, sig_handler);
1505         signal(SIGSEGV, sigsegv_handler);
1506
1507         if (rec->opts.record_namespaces)
1508                 tool->namespace_events = true;
1509
1510         if (rec->opts.record_cgroup) {
1511 #ifdef HAVE_FILE_HANDLE
1512                 tool->cgroup_events = true;
1513 #else
1514                 pr_err("cgroup tracking is not supported\n");
1515                 return -1;
1516 #endif
1517         }
1518
1519         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1520                 signal(SIGUSR2, snapshot_sig_handler);
1521                 if (rec->opts.auxtrace_snapshot_mode)
1522                         trigger_on(&auxtrace_snapshot_trigger);
1523                 if (rec->switch_output.enabled)
1524                         trigger_on(&switch_output_trigger);
1525         } else {
1526                 signal(SIGUSR2, SIG_IGN);
1527         }
1528
1529         session = perf_session__new(data, false, tool);
1530         if (IS_ERR(session)) {
1531                 pr_err("Perf session creation failed.\n");
1532                 return PTR_ERR(session);
1533         }
1534
1535         fd = perf_data__fd(data);
1536         rec->session = session;
1537
1538         if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1539                 pr_err("Compression initialization failed.\n");
1540                 return -1;
1541         }
1542
1543         session->header.env.comp_type  = PERF_COMP_ZSTD;
1544         session->header.env.comp_level = rec->opts.comp_level;
1545
1546         if (rec->opts.kcore &&
1547             !record__kcore_readable(&session->machines.host)) {
1548                 pr_err("ERROR: kcore is not readable.\n");
1549                 return -1;
1550         }
1551
1552         record__init_features(rec);
1553
1554         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1555                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1556
1557         if (forks) {
1558                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1559                                                     argv, data->is_pipe,
1560                                                     workload_exec_failed_signal);
1561                 if (err < 0) {
1562                         pr_err("Couldn't run the workload!\n");
1563                         status = err;
1564                         goto out_delete_session;
1565                 }
1566         }
1567
1568         /*
1569          * If we have just single event and are sending data
1570          * through pipe, we need to force the ids allocation,
1571          * because we synthesize event name through the pipe
1572          * and need the id for that.
1573          */
1574         if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1575                 rec->opts.sample_id = true;
1576
1577         if (record__open(rec) != 0) {
1578                 err = -1;
1579                 goto out_child;
1580         }
1581         session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1582
1583         if (rec->opts.kcore) {
1584                 err = record__kcore_copy(&session->machines.host, data);
1585                 if (err) {
1586                         pr_err("ERROR: Failed to copy kcore\n");
1587                         goto out_child;
1588                 }
1589         }
1590
1591         err = bpf__apply_obj_config();
1592         if (err) {
1593                 char errbuf[BUFSIZ];
1594
1595                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1596                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1597                          errbuf);
1598                 goto out_child;
1599         }
1600
1601         /*
1602          * Normally perf_session__new would do this, but it doesn't have the
1603          * evlist.
1604          */
1605         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1606                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1607                 rec->tool.ordered_events = false;
1608         }
1609
1610         if (!rec->evlist->nr_groups)
1611                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1612
1613         if (data->is_pipe) {
1614                 err = perf_header__write_pipe(fd);
1615                 if (err < 0)
1616                         goto out_child;
1617         } else {
1618                 err = perf_session__write_header(session, rec->evlist, fd, false);
1619                 if (err < 0)
1620                         goto out_child;
1621         }
1622
1623         err = -1;
1624         if (!rec->no_buildid
1625             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1626                 pr_err("Couldn't generate buildids. "
1627                        "Use --no-buildid to profile anyway.\n");
1628                 goto out_child;
1629         }
1630
1631         err = record__setup_sb_evlist(rec);
1632         if (err)
1633                 goto out_child;
1634
1635         err = record__synthesize(rec, false);
1636         if (err < 0)
1637                 goto out_child;
1638
1639         if (rec->realtime_prio) {
1640                 struct sched_param param;
1641
1642                 param.sched_priority = rec->realtime_prio;
1643                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1644                         pr_err("Could not set realtime priority.\n");
1645                         err = -1;
1646                         goto out_child;
1647                 }
1648         }
1649
1650         /*
1651          * When perf is starting the traced process, all the events
1652          * (apart from group members) have enable_on_exec=1 set,
1653          * so don't spoil it by prematurely enabling them.
1654          */
1655         if (!target__none(&opts->target) && !opts->initial_delay)
1656                 evlist__enable(rec->evlist);
1657
1658         /*
1659          * Let the child rip
1660          */
1661         if (forks) {
1662                 struct machine *machine = &session->machines.host;
1663                 union perf_event *event;
1664                 pid_t tgid;
1665
1666                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1667                 if (event == NULL) {
1668                         err = -ENOMEM;
1669                         goto out_child;
1670                 }
1671
1672                 /*
1673                  * Some H/W events are generated before COMM event
1674                  * which is emitted during exec(), so perf script
1675                  * cannot see a correct process name for those events.
1676                  * Synthesize COMM event to prevent it.
1677                  */
1678                 tgid = perf_event__synthesize_comm(tool, event,
1679                                                    rec->evlist->workload.pid,
1680                                                    process_synthesized_event,
1681                                                    machine);
1682                 free(event);
1683
1684                 if (tgid == -1)
1685                         goto out_child;
1686
1687                 event = malloc(sizeof(event->namespaces) +
1688                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1689                                machine->id_hdr_size);
1690                 if (event == NULL) {
1691                         err = -ENOMEM;
1692                         goto out_child;
1693                 }
1694
1695                 /*
1696                  * Synthesize NAMESPACES event for the command specified.
1697                  */
1698                 perf_event__synthesize_namespaces(tool, event,
1699                                                   rec->evlist->workload.pid,
1700                                                   tgid, process_synthesized_event,
1701                                                   machine);
1702                 free(event);
1703
1704                 perf_evlist__start_workload(rec->evlist);
1705         }
1706
1707         if (opts->initial_delay) {
1708                 usleep(opts->initial_delay * USEC_PER_MSEC);
1709                 evlist__enable(rec->evlist);
1710         }
1711
1712         trigger_ready(&auxtrace_snapshot_trigger);
1713         trigger_ready(&switch_output_trigger);
1714         perf_hooks__invoke_record_start();
1715         for (;;) {
1716                 unsigned long long hits = rec->samples;
1717
1718                 /*
1719                  * rec->evlist->bkw_mmap_state is possible to be
1720                  * BKW_MMAP_EMPTY here: when done == true and
1721                  * hits != rec->samples in previous round.
1722                  *
1723                  * perf_evlist__toggle_bkw_mmap ensure we never
1724                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1725                  */
1726                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1727                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1728
1729                 if (record__mmap_read_all(rec, false) < 0) {
1730                         trigger_error(&auxtrace_snapshot_trigger);
1731                         trigger_error(&switch_output_trigger);
1732                         err = -1;
1733                         goto out_child;
1734                 }
1735
1736                 if (auxtrace_record__snapshot_started) {
1737                         auxtrace_record__snapshot_started = 0;
1738                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1739                                 record__read_auxtrace_snapshot(rec, false);
1740                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1741                                 pr_err("AUX area tracing snapshot failed\n");
1742                                 err = -1;
1743                                 goto out_child;
1744                         }
1745                 }
1746
1747                 if (trigger_is_hit(&switch_output_trigger)) {
1748                         /*
1749                          * If switch_output_trigger is hit, the data in
1750                          * overwritable ring buffer should have been collected,
1751                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1752                          *
1753                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1754                          * record__mmap_read_all() didn't collect data from
1755                          * overwritable ring buffer. Read again.
1756                          */
1757                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1758                                 continue;
1759                         trigger_ready(&switch_output_trigger);
1760
1761                         /*
1762                          * Reenable events in overwrite ring buffer after
1763                          * record__mmap_read_all(): we should have collected
1764                          * data from it.
1765                          */
1766                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1767
1768                         if (!quiet)
1769                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1770                                         waking);
1771                         waking = 0;
1772                         fd = record__switch_output(rec, false);
1773                         if (fd < 0) {
1774                                 pr_err("Failed to switch to new file\n");
1775                                 trigger_error(&switch_output_trigger);
1776                                 err = fd;
1777                                 goto out_child;
1778                         }
1779
1780                         /* re-arm the alarm */
1781                         if (rec->switch_output.time)
1782                                 alarm(rec->switch_output.time);
1783                 }
1784
1785                 if (hits == rec->samples) {
1786                         if (done || draining)
1787                                 break;
1788                         err = evlist__poll(rec->evlist, -1);
1789                         /*
1790                          * Propagate error, only if there's any. Ignore positive
1791                          * number of returned events and interrupt error.
1792                          */
1793                         if (err > 0 || (err < 0 && errno == EINTR))
1794                                 err = 0;
1795                         waking++;
1796
1797                         if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1798                                 draining = true;
1799                 }
1800
1801                 /*
1802                  * When perf is starting the traced process, at the end events
1803                  * die with the process and we wait for that. Thus no need to
1804                  * disable events in this case.
1805                  */
1806                 if (done && !disabled && !target__none(&opts->target)) {
1807                         trigger_off(&auxtrace_snapshot_trigger);
1808                         evlist__disable(rec->evlist);
1809                         disabled = true;
1810                 }
1811         }
1812
1813         trigger_off(&auxtrace_snapshot_trigger);
1814         trigger_off(&switch_output_trigger);
1815
1816         if (opts->auxtrace_snapshot_on_exit)
1817                 record__auxtrace_snapshot_exit(rec);
1818
1819         if (forks && workload_exec_errno) {
1820                 char msg[STRERR_BUFSIZE];
1821                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1822                 pr_err("Workload failed: %s\n", emsg);
1823                 err = -1;
1824                 goto out_child;
1825         }
1826
1827         if (!quiet)
1828                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1829
1830         if (target__none(&rec->opts.target))
1831                 record__synthesize_workload(rec, true);
1832
1833 out_child:
1834         record__mmap_read_all(rec, true);
1835         record__aio_mmap_read_sync(rec);
1836
1837         if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1838                 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1839                 session->header.env.comp_ratio = ratio + 0.5;
1840         }
1841
1842         if (forks) {
1843                 int exit_status;
1844
1845                 if (!child_finished)
1846                         kill(rec->evlist->workload.pid, SIGTERM);
1847
1848                 wait(&exit_status);
1849
1850                 if (err < 0)
1851                         status = err;
1852                 else if (WIFEXITED(exit_status))
1853                         status = WEXITSTATUS(exit_status);
1854                 else if (WIFSIGNALED(exit_status))
1855                         signr = WTERMSIG(exit_status);
1856         } else
1857                 status = err;
1858
1859         record__synthesize(rec, true);
1860         /* this will be recalculated during process_buildids() */
1861         rec->samples = 0;
1862
1863         if (!err) {
1864                 if (!rec->timestamp_filename) {
1865                         record__finish_output(rec);
1866                 } else {
1867                         fd = record__switch_output(rec, true);
1868                         if (fd < 0) {
1869                                 status = fd;
1870                                 goto out_delete_session;
1871                         }
1872                 }
1873         }
1874
1875         perf_hooks__invoke_record_end();
1876
1877         if (!err && !quiet) {
1878                 char samples[128];
1879                 const char *postfix = rec->timestamp_filename ?
1880                                         ".<timestamp>" : "";
1881
1882                 if (rec->samples && !rec->opts.full_auxtrace)
1883                         scnprintf(samples, sizeof(samples),
1884                                   " (%" PRIu64 " samples)", rec->samples);
1885                 else
1886                         samples[0] = '\0';
1887
1888                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1889                         perf_data__size(data) / 1024.0 / 1024.0,
1890                         data->path, postfix, samples);
1891                 if (ratio) {
1892                         fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1893                                         rec->session->bytes_transferred / 1024.0 / 1024.0,
1894                                         ratio);
1895                 }
1896                 fprintf(stderr, " ]\n");
1897         }
1898
1899 out_delete_session:
1900         zstd_fini(&session->zstd_data);
1901         perf_session__delete(session);
1902
1903         if (!opts->no_bpf_event)
1904                 perf_evlist__stop_sb_thread(rec->sb_evlist);
1905         return status;
1906 }
1907
1908 static void callchain_debug(struct callchain_param *callchain)
1909 {
1910         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1911
1912         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1913
1914         if (callchain->record_mode == CALLCHAIN_DWARF)
1915                 pr_debug("callchain: stack dump size %d\n",
1916                          callchain->dump_size);
1917 }
1918
1919 int record_opts__parse_callchain(struct record_opts *record,
1920                                  struct callchain_param *callchain,
1921                                  const char *arg, bool unset)
1922 {
1923         int ret;
1924         callchain->enabled = !unset;
1925
1926         /* --no-call-graph */
1927         if (unset) {
1928                 callchain->record_mode = CALLCHAIN_NONE;
1929                 pr_debug("callchain: disabled\n");
1930                 return 0;
1931         }
1932
1933         ret = parse_callchain_record_opt(arg, callchain);
1934         if (!ret) {
1935                 /* Enable data address sampling for DWARF unwind. */
1936                 if (callchain->record_mode == CALLCHAIN_DWARF)
1937                         record->sample_address = true;
1938                 callchain_debug(callchain);
1939         }
1940
1941         return ret;
1942 }
1943
1944 int record_parse_callchain_opt(const struct option *opt,
1945                                const char *arg,
1946                                int unset)
1947 {
1948         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1949 }
1950
1951 int record_callchain_opt(const struct option *opt,
1952                          const char *arg __maybe_unused,
1953                          int unset __maybe_unused)
1954 {
1955         struct callchain_param *callchain = opt->value;
1956
1957         callchain->enabled = true;
1958
1959         if (callchain->record_mode == CALLCHAIN_NONE)
1960                 callchain->record_mode = CALLCHAIN_FP;
1961
1962         callchain_debug(callchain);
1963         return 0;
1964 }
1965
1966 static int perf_record_config(const char *var, const char *value, void *cb)
1967 {
1968         struct record *rec = cb;
1969
1970         if (!strcmp(var, "record.build-id")) {
1971                 if (!strcmp(value, "cache"))
1972                         rec->no_buildid_cache = false;
1973                 else if (!strcmp(value, "no-cache"))
1974                         rec->no_buildid_cache = true;
1975                 else if (!strcmp(value, "skip"))
1976                         rec->no_buildid = true;
1977                 else
1978                         return -1;
1979                 return 0;
1980         }
1981         if (!strcmp(var, "record.call-graph")) {
1982                 var = "call-graph.record-mode";
1983                 return perf_default_config(var, value, cb);
1984         }
1985 #ifdef HAVE_AIO_SUPPORT
1986         if (!strcmp(var, "record.aio")) {
1987                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1988                 if (!rec->opts.nr_cblocks)
1989                         rec->opts.nr_cblocks = nr_cblocks_default;
1990         }
1991 #endif
1992
1993         return 0;
1994 }
1995
1996 struct clockid_map {
1997         const char *name;
1998         int clockid;
1999 };
2000
2001 #define CLOCKID_MAP(n, c)       \
2002         { .name = n, .clockid = (c), }
2003
2004 #define CLOCKID_END     { .name = NULL, }
2005
2006
2007 /*
2008  * Add the missing ones, we need to build on many distros...
2009  */
2010 #ifndef CLOCK_MONOTONIC_RAW
2011 #define CLOCK_MONOTONIC_RAW 4
2012 #endif
2013 #ifndef CLOCK_BOOTTIME
2014 #define CLOCK_BOOTTIME 7
2015 #endif
2016 #ifndef CLOCK_TAI
2017 #define CLOCK_TAI 11
2018 #endif
2019
2020 static const struct clockid_map clockids[] = {
2021         /* available for all events, NMI safe */
2022         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2023         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2024
2025         /* available for some events */
2026         CLOCKID_MAP("realtime", CLOCK_REALTIME),
2027         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2028         CLOCKID_MAP("tai", CLOCK_TAI),
2029
2030         /* available for the lazy */
2031         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2032         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2033         CLOCKID_MAP("real", CLOCK_REALTIME),
2034         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2035
2036         CLOCKID_END,
2037 };
2038
2039 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2040 {
2041         struct timespec res;
2042
2043         *res_ns = 0;
2044         if (!clock_getres(clk_id, &res))
2045                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2046         else
2047                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2048
2049         return 0;
2050 }
2051
2052 static int parse_clockid(const struct option *opt, const char *str, int unset)
2053 {
2054         struct record_opts *opts = (struct record_opts *)opt->value;
2055         const struct clockid_map *cm;
2056         const char *ostr = str;
2057
2058         if (unset) {
2059                 opts->use_clockid = 0;
2060                 return 0;
2061         }
2062
2063         /* no arg passed */
2064         if (!str)
2065                 return 0;
2066
2067         /* no setting it twice */
2068         if (opts->use_clockid)
2069                 return -1;
2070
2071         opts->use_clockid = true;
2072
2073         /* if its a number, we're done */
2074         if (sscanf(str, "%d", &opts->clockid) == 1)
2075                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2076
2077         /* allow a "CLOCK_" prefix to the name */
2078         if (!strncasecmp(str, "CLOCK_", 6))
2079                 str += 6;
2080
2081         for (cm = clockids; cm->name; cm++) {
2082                 if (!strcasecmp(str, cm->name)) {
2083                         opts->clockid = cm->clockid;
2084                         return get_clockid_res(opts->clockid,
2085                                                &opts->clockid_res_ns);
2086                 }
2087         }
2088
2089         opts->use_clockid = false;
2090         ui__warning("unknown clockid %s, check man page\n", ostr);
2091         return -1;
2092 }
2093
2094 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2095 {
2096         struct record_opts *opts = (struct record_opts *)opt->value;
2097
2098         if (unset || !str)
2099                 return 0;
2100
2101         if (!strcasecmp(str, "node"))
2102                 opts->affinity = PERF_AFFINITY_NODE;
2103         else if (!strcasecmp(str, "cpu"))
2104                 opts->affinity = PERF_AFFINITY_CPU;
2105
2106         return 0;
2107 }
2108
2109 static int parse_output_max_size(const struct option *opt,
2110                                  const char *str, int unset)
2111 {
2112         unsigned long *s = (unsigned long *)opt->value;
2113         static struct parse_tag tags_size[] = {
2114                 { .tag  = 'B', .mult = 1       },
2115                 { .tag  = 'K', .mult = 1 << 10 },
2116                 { .tag  = 'M', .mult = 1 << 20 },
2117                 { .tag  = 'G', .mult = 1 << 30 },
2118                 { .tag  = 0 },
2119         };
2120         unsigned long val;
2121
2122         if (unset) {
2123                 *s = 0;
2124                 return 0;
2125         }
2126
2127         val = parse_tag_value(str, tags_size);
2128         if (val != (unsigned long) -1) {
2129                 *s = val;
2130                 return 0;
2131         }
2132
2133         return -1;
2134 }
2135
2136 static int record__parse_mmap_pages(const struct option *opt,
2137                                     const char *str,
2138                                     int unset __maybe_unused)
2139 {
2140         struct record_opts *opts = opt->value;
2141         char *s, *p;
2142         unsigned int mmap_pages;
2143         int ret;
2144
2145         if (!str)
2146                 return -EINVAL;
2147
2148         s = strdup(str);
2149         if (!s)
2150                 return -ENOMEM;
2151
2152         p = strchr(s, ',');
2153         if (p)
2154                 *p = '\0';
2155
2156         if (*s) {
2157                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2158                 if (ret)
2159                         goto out_free;
2160                 opts->mmap_pages = mmap_pages;
2161         }
2162
2163         if (!p) {
2164                 ret = 0;
2165                 goto out_free;
2166         }
2167
2168         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2169         if (ret)
2170                 goto out_free;
2171
2172         opts->auxtrace_mmap_pages = mmap_pages;
2173
2174 out_free:
2175         free(s);
2176         return ret;
2177 }
2178
2179 static void switch_output_size_warn(struct record *rec)
2180 {
2181         u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2182         struct switch_output *s = &rec->switch_output;
2183
2184         wakeup_size /= 2;
2185
2186         if (s->size < wakeup_size) {
2187                 char buf[100];
2188
2189                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2190                 pr_warning("WARNING: switch-output data size lower than "
2191                            "wakeup kernel buffer size (%s) "
2192                            "expect bigger perf.data sizes\n", buf);
2193         }
2194 }
2195
2196 static int switch_output_setup(struct record *rec)
2197 {
2198         struct switch_output *s = &rec->switch_output;
2199         static struct parse_tag tags_size[] = {
2200                 { .tag  = 'B', .mult = 1       },
2201                 { .tag  = 'K', .mult = 1 << 10 },
2202                 { .tag  = 'M', .mult = 1 << 20 },
2203                 { .tag  = 'G', .mult = 1 << 30 },
2204                 { .tag  = 0 },
2205         };
2206         static struct parse_tag tags_time[] = {
2207                 { .tag  = 's', .mult = 1        },
2208                 { .tag  = 'm', .mult = 60       },
2209                 { .tag  = 'h', .mult = 60*60    },
2210                 { .tag  = 'd', .mult = 60*60*24 },
2211                 { .tag  = 0 },
2212         };
2213         unsigned long val;
2214
2215         /*
2216          * If we're using --switch-output-events, then we imply its 
2217          * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2218          *  thread to its parent.
2219          */
2220         if (rec->switch_output_event_set)
2221                 goto do_signal;
2222
2223         if (!s->set)
2224                 return 0;
2225
2226         if (!strcmp(s->str, "signal")) {
2227 do_signal:
2228                 s->signal = true;
2229                 pr_debug("switch-output with SIGUSR2 signal\n");
2230                 goto enabled;
2231         }
2232
2233         val = parse_tag_value(s->str, tags_size);
2234         if (val != (unsigned long) -1) {
2235                 s->size = val;
2236                 pr_debug("switch-output with %s size threshold\n", s->str);
2237                 goto enabled;
2238         }
2239
2240         val = parse_tag_value(s->str, tags_time);
2241         if (val != (unsigned long) -1) {
2242                 s->time = val;
2243                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2244                          s->str, s->time);
2245                 goto enabled;
2246         }
2247
2248         return -1;
2249
2250 enabled:
2251         rec->timestamp_filename = true;
2252         s->enabled              = true;
2253
2254         if (s->size && !rec->opts.no_buffering)
2255                 switch_output_size_warn(rec);
2256
2257         return 0;
2258 }
2259
2260 static const char * const __record_usage[] = {
2261         "perf record [<options>] [<command>]",
2262         "perf record [<options>] -- <command> [<options>]",
2263         NULL
2264 };
2265 const char * const *record_usage = __record_usage;
2266
2267 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2268                                   struct perf_sample *sample, struct machine *machine)
2269 {
2270         /*
2271          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2272          * no need to add them twice.
2273          */
2274         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2275                 return 0;
2276         return perf_event__process_mmap(tool, event, sample, machine);
2277 }
2278
2279 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2280                                    struct perf_sample *sample, struct machine *machine)
2281 {
2282         /*
2283          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2284          * no need to add them twice.
2285          */
2286         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2287                 return 0;
2288
2289         return perf_event__process_mmap2(tool, event, sample, machine);
2290 }
2291
2292 /*
2293  * XXX Ideally would be local to cmd_record() and passed to a record__new
2294  * because we need to have access to it in record__exit, that is called
2295  * after cmd_record() exits, but since record_options need to be accessible to
2296  * builtin-script, leave it here.
2297  *
2298  * At least we don't ouch it in all the other functions here directly.
2299  *
2300  * Just say no to tons of global variables, sigh.
2301  */
2302 static struct record record = {
2303         .opts = {
2304                 .sample_time         = true,
2305                 .mmap_pages          = UINT_MAX,
2306                 .user_freq           = UINT_MAX,
2307                 .user_interval       = ULLONG_MAX,
2308                 .freq                = 4000,
2309                 .target              = {
2310                         .uses_mmap   = true,
2311                         .default_per_cpu = true,
2312                 },
2313                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
2314                 .nr_threads_synthesize = 1,
2315         },
2316         .tool = {
2317                 .sample         = process_sample_event,
2318                 .fork           = perf_event__process_fork,
2319                 .exit           = perf_event__process_exit,
2320                 .comm           = perf_event__process_comm,
2321                 .namespaces     = perf_event__process_namespaces,
2322                 .mmap           = build_id__process_mmap,
2323                 .mmap2          = build_id__process_mmap2,
2324                 .ordered_events = true,
2325         },
2326 };
2327
2328 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2329         "\n\t\t\t\tDefault: fp";
2330
2331 static bool dry_run;
2332
2333 /*
2334  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2335  * with it and switch to use the library functions in perf_evlist that came
2336  * from builtin-record.c, i.e. use record_opts,
2337  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2338  * using pipes, etc.
2339  */
2340 static struct option __record_options[] = {
2341         OPT_CALLBACK('e', "event", &record.evlist, "event",
2342                      "event selector. use 'perf list' to list available events",
2343                      parse_events_option),
2344         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2345                      "event filter", parse_filter),
2346         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2347                            NULL, "don't record events from perf itself",
2348                            exclude_perf),
2349         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2350                     "record events on existing process id"),
2351         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2352                     "record events on existing thread id"),
2353         OPT_INTEGER('r', "realtime", &record.realtime_prio,
2354                     "collect data with this RT SCHED_FIFO priority"),
2355         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2356                     "collect data without buffering"),
2357         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2358                     "collect raw sample records from all opened counters"),
2359         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2360                             "system-wide collection from all CPUs"),
2361         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2362                     "list of cpus to monitor"),
2363         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2364         OPT_STRING('o', "output", &record.data.path, "file",
2365                     "output file name"),
2366         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2367                         &record.opts.no_inherit_set,
2368                         "child tasks do not inherit counters"),
2369         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2370                     "synthesize non-sample events at the end of output"),
2371         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2372         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2373         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2374                     "Fail if the specified frequency can't be used"),
2375         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2376                      "profile at this frequency",
2377                       record__parse_freq),
2378         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2379                      "number of mmap data pages and AUX area tracing mmap pages",
2380                      record__parse_mmap_pages),
2381         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2382                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2383                      record__mmap_flush_parse),
2384         OPT_BOOLEAN(0, "group", &record.opts.group,
2385                     "put the counters into a counter group"),
2386         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2387                            NULL, "enables call-graph recording" ,
2388                            &record_callchain_opt),
2389         OPT_CALLBACK(0, "call-graph", &record.opts,
2390                      "record_mode[,record_size]", record_callchain_help,
2391                      &record_parse_callchain_opt),
2392         OPT_INCR('v', "verbose", &verbose,
2393                     "be more verbose (show counter open errors, etc)"),
2394         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2395         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2396                     "per thread counts"),
2397         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2398         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2399                     "Record the sample physical addresses"),
2400         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2401         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2402                         &record.opts.sample_time_set,
2403                         "Record the sample timestamps"),
2404         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2405                         "Record the sample period"),
2406         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2407                     "don't sample"),
2408         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2409                         &record.no_buildid_cache_set,
2410                         "do not update the buildid cache"),
2411         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2412                         &record.no_buildid_set,
2413                         "do not collect buildids in perf.data"),
2414         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2415                      "monitor event in cgroup name only",
2416                      parse_cgroups),
2417         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2418                   "ms to wait before starting measurement after program start"),
2419         OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2420         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2421                    "user to profile"),
2422
2423         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2424                      "branch any", "sample any taken branches",
2425                      parse_branch_stack),
2426
2427         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2428                      "branch filter mask", "branch stack filter modes",
2429                      parse_branch_stack),
2430         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2431                     "sample by weight (on special events only)"),
2432         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2433                     "sample transaction flags (special events only)"),
2434         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2435                     "use per-thread mmaps"),
2436         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2437                     "sample selected machine registers on interrupt,"
2438                     " use '-I?' to list register names", parse_intr_regs),
2439         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2440                     "sample selected machine registers on interrupt,"
2441                     " use '--user-regs=?' to list register names", parse_user_regs),
2442         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2443                     "Record running/enabled time of read (:S) events"),
2444         OPT_CALLBACK('k', "clockid", &record.opts,
2445         "clockid", "clockid to use for events, see clock_gettime()",
2446         parse_clockid),
2447         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2448                           "opts", "AUX area tracing Snapshot Mode", ""),
2449         OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2450                           "opts", "sample AUX area", ""),
2451         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2452                         "per thread proc mmap processing timeout in ms"),
2453         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2454                     "Record namespaces events"),
2455         OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2456                     "Record cgroup events"),
2457         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2458                     "Record context switch events"),
2459         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2460                          "Configure all used events to run in kernel space.",
2461                          PARSE_OPT_EXCLUSIVE),
2462         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2463                          "Configure all used events to run in user space.",
2464                          PARSE_OPT_EXCLUSIVE),
2465         OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2466                     "collect kernel callchains"),
2467         OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2468                     "collect user callchains"),
2469         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2470                    "clang binary to use for compiling BPF scriptlets"),
2471         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2472                    "options passed to clang when compiling BPF scriptlets"),
2473         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2474                    "file", "vmlinux pathname"),
2475         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2476                     "Record build-id of all DSOs regardless of hits"),
2477         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2478                     "append timestamp to output filename"),
2479         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2480                     "Record timestamp boundary (time of first/last samples)"),
2481         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2482                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2483                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2484                           "signal"),
2485         OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2486                          "switch output event selector. use 'perf list' to list available events",
2487                          parse_events_option_new_evlist),
2488         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2489                    "Limit number of switch output generated files"),
2490         OPT_BOOLEAN(0, "dry-run", &dry_run,
2491                     "Parse options then exit"),
2492 #ifdef HAVE_AIO_SUPPORT
2493         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2494                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2495                      record__aio_parse),
2496 #endif
2497         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2498                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2499                      record__parse_affinity),
2500 #ifdef HAVE_ZSTD_SUPPORT
2501         OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2502                             "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2503                             record__parse_comp_level),
2504 #endif
2505         OPT_CALLBACK(0, "max-size", &record.output_max_size,
2506                      "size", "Limit the maximum size of the output file", parse_output_max_size),
2507         OPT_UINTEGER(0, "num-thread-synthesize",
2508                      &record.opts.nr_threads_synthesize,
2509                      "number of threads to run for event synthesis"),
2510         OPT_END()
2511 };
2512
2513 struct option *record_options = __record_options;
2514
2515 int cmd_record(int argc, const char **argv)
2516 {
2517         int err;
2518         struct record *rec = &record;
2519         char errbuf[BUFSIZ];
2520
2521         setlocale(LC_ALL, "");
2522
2523 #ifndef HAVE_LIBBPF_SUPPORT
2524 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2525         set_nobuild('\0', "clang-path", true);
2526         set_nobuild('\0', "clang-opt", true);
2527 # undef set_nobuild
2528 #endif
2529
2530 #ifndef HAVE_BPF_PROLOGUE
2531 # if !defined (HAVE_DWARF_SUPPORT)
2532 #  define REASON  "NO_DWARF=1"
2533 # elif !defined (HAVE_LIBBPF_SUPPORT)
2534 #  define REASON  "NO_LIBBPF=1"
2535 # else
2536 #  define REASON  "this architecture doesn't support BPF prologue"
2537 # endif
2538 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2539         set_nobuild('\0', "vmlinux", true);
2540 # undef set_nobuild
2541 # undef REASON
2542 #endif
2543
2544         rec->opts.affinity = PERF_AFFINITY_SYS;
2545
2546         rec->evlist = evlist__new();
2547         if (rec->evlist == NULL)
2548                 return -ENOMEM;
2549
2550         err = perf_config(perf_record_config, rec);
2551         if (err)
2552                 return err;
2553
2554         argc = parse_options(argc, argv, record_options, record_usage,
2555                             PARSE_OPT_STOP_AT_NON_OPTION);
2556         if (quiet)
2557                 perf_quiet_option();
2558
2559         /* Make system wide (-a) the default target. */
2560         if (!argc && target__none(&rec->opts.target))
2561                 rec->opts.target.system_wide = true;
2562
2563         if (nr_cgroups && !rec->opts.target.system_wide) {
2564                 usage_with_options_msg(record_usage, record_options,
2565                         "cgroup monitoring only available in system-wide mode");
2566
2567         }
2568
2569         if (rec->opts.kcore)
2570                 rec->data.is_dir = true;
2571
2572         if (rec->opts.comp_level != 0) {
2573                 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2574                 rec->no_buildid = true;
2575         }
2576
2577         if (rec->opts.record_switch_events &&
2578             !perf_can_record_switch_events()) {
2579                 ui__error("kernel does not support recording context switch events\n");
2580                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2581                 return -EINVAL;
2582         }
2583
2584         if (switch_output_setup(rec)) {
2585                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2586                 return -EINVAL;
2587         }
2588
2589         if (rec->switch_output.time) {
2590                 signal(SIGALRM, alarm_sig_handler);
2591                 alarm(rec->switch_output.time);
2592         }
2593
2594         if (rec->switch_output.num_files) {
2595                 rec->switch_output.filenames = calloc(sizeof(char *),
2596                                                       rec->switch_output.num_files);
2597                 if (!rec->switch_output.filenames)
2598                         return -EINVAL;
2599         }
2600
2601         /*
2602          * Allow aliases to facilitate the lookup of symbols for address
2603          * filters. Refer to auxtrace_parse_filters().
2604          */
2605         symbol_conf.allow_aliases = true;
2606
2607         symbol__init(NULL);
2608
2609         if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2610                 rec->affinity_mask.nbits = cpu__max_cpu();
2611                 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2612                 if (!rec->affinity_mask.bits) {
2613                         pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2614                         return -ENOMEM;
2615                 }
2616                 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2617         }
2618
2619         err = record__auxtrace_init(rec);
2620         if (err)
2621                 goto out;
2622
2623         if (dry_run)
2624                 goto out;
2625
2626         err = bpf__setup_stdout(rec->evlist);
2627         if (err) {
2628                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2629                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2630                          errbuf);
2631                 goto out;
2632         }
2633
2634         err = -ENOMEM;
2635
2636         if (rec->no_buildid_cache || rec->no_buildid) {
2637                 disable_buildid_cache();
2638         } else if (rec->switch_output.enabled) {
2639                 /*
2640                  * In 'perf record --switch-output', disable buildid
2641                  * generation by default to reduce data file switching
2642                  * overhead. Still generate buildid if they are required
2643                  * explicitly using
2644                  *
2645                  *  perf record --switch-output --no-no-buildid \
2646                  *              --no-no-buildid-cache
2647                  *
2648                  * Following code equals to:
2649                  *
2650                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2651                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2652                  *         disable_buildid_cache();
2653                  */
2654                 bool disable = true;
2655
2656                 if (rec->no_buildid_set && !rec->no_buildid)
2657                         disable = false;
2658                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2659                         disable = false;
2660                 if (disable) {
2661                         rec->no_buildid = true;
2662                         rec->no_buildid_cache = true;
2663                         disable_buildid_cache();
2664                 }
2665         }
2666
2667         if (record.opts.overwrite)
2668                 record.opts.tail_synthesize = true;
2669
2670         if (rec->evlist->core.nr_entries == 0 &&
2671             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2672                 pr_err("Not enough memory for event selector list\n");
2673                 goto out;
2674         }
2675
2676         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2677                 rec->opts.no_inherit = true;
2678
2679         err = target__validate(&rec->opts.target);
2680         if (err) {
2681                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2682                 ui__warning("%s\n", errbuf);
2683         }
2684
2685         err = target__parse_uid(&rec->opts.target);
2686         if (err) {
2687                 int saved_errno = errno;
2688
2689                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2690                 ui__error("%s", errbuf);
2691
2692                 err = -saved_errno;
2693                 goto out;
2694         }
2695
2696         /* Enable ignoring missing threads when -u/-p option is defined. */
2697         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2698
2699         err = -ENOMEM;
2700         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2701                 usage_with_options(record_usage, record_options);
2702
2703         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2704         if (err)
2705                 goto out;
2706
2707         /*
2708          * We take all buildids when the file contains
2709          * AUX area tracing data because we do not decode the
2710          * trace because it would take too long.
2711          */
2712         if (rec->opts.full_auxtrace)
2713                 rec->buildid_all = true;
2714
2715         if (record_opts__config(&rec->opts)) {
2716                 err = -EINVAL;
2717                 goto out;
2718         }
2719
2720         if (rec->opts.nr_cblocks > nr_cblocks_max)
2721                 rec->opts.nr_cblocks = nr_cblocks_max;
2722         pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2723
2724         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2725         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2726
2727         if (rec->opts.comp_level > comp_level_max)
2728                 rec->opts.comp_level = comp_level_max;
2729         pr_debug("comp level: %d\n", rec->opts.comp_level);
2730
2731         err = __cmd_record(&record, argc, argv);
2732 out:
2733         bitmap_free(rec->affinity_mask.bits);
2734         evlist__delete(rec->evlist);
2735         symbol__exit();
2736         auxtrace_record__free(rec->itr);
2737         return err;
2738 }
2739
2740 static void snapshot_sig_handler(int sig __maybe_unused)
2741 {
2742         struct record *rec = &record;
2743
2744         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2745                 trigger_hit(&auxtrace_snapshot_trigger);
2746                 auxtrace_record__snapshot_started = 1;
2747                 if (auxtrace_record__snapshot_start(record.itr))
2748                         trigger_error(&auxtrace_snapshot_trigger);
2749         }
2750
2751         if (switch_output_signal(rec))
2752                 trigger_hit(&switch_output_trigger);
2753 }
2754
2755 static void alarm_sig_handler(int sig __maybe_unused)
2756 {
2757         struct record *rec = &record;
2758
2759         if (switch_output_time(rec))
2760                 trigger_hit(&switch_output_trigger);
2761 }