perf metrics: Fix parse errors in power9 metrics
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "asm/bug.h"
49 #include "perf.h"
50
51 #include <errno.h>
52 #include <inttypes.h>
53 #include <locale.h>
54 #include <poll.h>
55 #include <pthread.h>
56 #include <unistd.h>
57 #include <sched.h>
58 #include <signal.h>
59 #include <sys/mman.h>
60 #include <sys/wait.h>
61 #include <sys/types.h>
62 #include <sys/stat.h>
63 #include <fcntl.h>
64 #include <linux/err.h>
65 #include <linux/string.h>
66 #include <linux/time64.h>
67 #include <linux/zalloc.h>
68 #include <linux/bitmap.h>
69
70 struct switch_output {
71         bool             enabled;
72         bool             signal;
73         unsigned long    size;
74         unsigned long    time;
75         const char      *str;
76         bool             set;
77         char             **filenames;
78         int              num_files;
79         int              cur_file;
80 };
81
82 struct record {
83         struct perf_tool        tool;
84         struct record_opts      opts;
85         u64                     bytes_written;
86         struct perf_data        data;
87         struct auxtrace_record  *itr;
88         struct evlist   *evlist;
89         struct perf_session     *session;
90         struct evlist           *sb_evlist;
91         pthread_t               thread_id;
92         int                     realtime_prio;
93         bool                    switch_output_event_set;
94         bool                    no_buildid;
95         bool                    no_buildid_set;
96         bool                    no_buildid_cache;
97         bool                    no_buildid_cache_set;
98         bool                    buildid_all;
99         bool                    timestamp_filename;
100         bool                    timestamp_boundary;
101         struct switch_output    switch_output;
102         unsigned long long      samples;
103         struct mmap_cpu_mask    affinity_mask;
104         unsigned long           output_max_size;        /* = 0: unlimited */
105 };
106
107 static volatile int done;
108
109 static volatile int auxtrace_record__snapshot_started;
110 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
111 static DEFINE_TRIGGER(switch_output_trigger);
112
113 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
114         "SYS", "NODE", "CPU"
115 };
116
117 static bool switch_output_signal(struct record *rec)
118 {
119         return rec->switch_output.signal &&
120                trigger_is_ready(&switch_output_trigger);
121 }
122
123 static bool switch_output_size(struct record *rec)
124 {
125         return rec->switch_output.size &&
126                trigger_is_ready(&switch_output_trigger) &&
127                (rec->bytes_written >= rec->switch_output.size);
128 }
129
130 static bool switch_output_time(struct record *rec)
131 {
132         return rec->switch_output.time &&
133                trigger_is_ready(&switch_output_trigger);
134 }
135
136 static bool record__output_max_size_exceeded(struct record *rec)
137 {
138         return rec->output_max_size &&
139                (rec->bytes_written >= rec->output_max_size);
140 }
141
142 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
143                          void *bf, size_t size)
144 {
145         struct perf_data_file *file = &rec->session->data->file;
146
147         if (perf_data_file__write(file, bf, size) < 0) {
148                 pr_err("failed to write perf data, error: %m\n");
149                 return -1;
150         }
151
152         rec->bytes_written += size;
153
154         if (record__output_max_size_exceeded(rec) && !done) {
155                 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
156                                 " stopping session ]\n",
157                                 rec->bytes_written >> 10);
158                 done = 1;
159         }
160
161         if (switch_output_size(rec))
162                 trigger_hit(&switch_output_trigger);
163
164         return 0;
165 }
166
167 static int record__aio_enabled(struct record *rec);
168 static int record__comp_enabled(struct record *rec);
169 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
170                             void *src, size_t src_size);
171
172 #ifdef HAVE_AIO_SUPPORT
173 static int record__aio_write(struct aiocb *cblock, int trace_fd,
174                 void *buf, size_t size, off_t off)
175 {
176         int rc;
177
178         cblock->aio_fildes = trace_fd;
179         cblock->aio_buf    = buf;
180         cblock->aio_nbytes = size;
181         cblock->aio_offset = off;
182         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
183
184         do {
185                 rc = aio_write(cblock);
186                 if (rc == 0) {
187                         break;
188                 } else if (errno != EAGAIN) {
189                         cblock->aio_fildes = -1;
190                         pr_err("failed to queue perf data, error: %m\n");
191                         break;
192                 }
193         } while (1);
194
195         return rc;
196 }
197
198 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
199 {
200         void *rem_buf;
201         off_t rem_off;
202         size_t rem_size;
203         int rc, aio_errno;
204         ssize_t aio_ret, written;
205
206         aio_errno = aio_error(cblock);
207         if (aio_errno == EINPROGRESS)
208                 return 0;
209
210         written = aio_ret = aio_return(cblock);
211         if (aio_ret < 0) {
212                 if (aio_errno != EINTR)
213                         pr_err("failed to write perf data, error: %m\n");
214                 written = 0;
215         }
216
217         rem_size = cblock->aio_nbytes - written;
218
219         if (rem_size == 0) {
220                 cblock->aio_fildes = -1;
221                 /*
222                  * md->refcount is incremented in record__aio_pushfn() for
223                  * every aio write request started in record__aio_push() so
224                  * decrement it because the request is now complete.
225                  */
226                 perf_mmap__put(&md->core);
227                 rc = 1;
228         } else {
229                 /*
230                  * aio write request may require restart with the
231                  * reminder if the kernel didn't write whole
232                  * chunk at once.
233                  */
234                 rem_off = cblock->aio_offset + written;
235                 rem_buf = (void *)(cblock->aio_buf + written);
236                 record__aio_write(cblock, cblock->aio_fildes,
237                                 rem_buf, rem_size, rem_off);
238                 rc = 0;
239         }
240
241         return rc;
242 }
243
244 static int record__aio_sync(struct mmap *md, bool sync_all)
245 {
246         struct aiocb **aiocb = md->aio.aiocb;
247         struct aiocb *cblocks = md->aio.cblocks;
248         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
249         int i, do_suspend;
250
251         do {
252                 do_suspend = 0;
253                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
254                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
255                                 if (sync_all)
256                                         aiocb[i] = NULL;
257                                 else
258                                         return i;
259                         } else {
260                                 /*
261                                  * Started aio write is not complete yet
262                                  * so it has to be waited before the
263                                  * next allocation.
264                                  */
265                                 aiocb[i] = &cblocks[i];
266                                 do_suspend = 1;
267                         }
268                 }
269                 if (!do_suspend)
270                         return -1;
271
272                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
273                         if (!(errno == EAGAIN || errno == EINTR))
274                                 pr_err("failed to sync perf data, error: %m\n");
275                 }
276         } while (1);
277 }
278
279 struct record_aio {
280         struct record   *rec;
281         void            *data;
282         size_t          size;
283 };
284
285 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
286 {
287         struct record_aio *aio = to;
288
289         /*
290          * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
291          * to release space in the kernel buffer as fast as possible, calling
292          * perf_mmap__consume() from perf_mmap__push() function.
293          *
294          * That lets the kernel to proceed with storing more profiling data into
295          * the kernel buffer earlier than other per-cpu kernel buffers are handled.
296          *
297          * Coping can be done in two steps in case the chunk of profiling data
298          * crosses the upper bound of the kernel buffer. In this case we first move
299          * part of data from map->start till the upper bound and then the reminder
300          * from the beginning of the kernel buffer till the end of the data chunk.
301          */
302
303         if (record__comp_enabled(aio->rec)) {
304                 size = zstd_compress(aio->rec->session, aio->data + aio->size,
305                                      mmap__mmap_len(map) - aio->size,
306                                      buf, size);
307         } else {
308                 memcpy(aio->data + aio->size, buf, size);
309         }
310
311         if (!aio->size) {
312                 /*
313                  * Increment map->refcount to guard map->aio.data[] buffer
314                  * from premature deallocation because map object can be
315                  * released earlier than aio write request started on
316                  * map->aio.data[] buffer is complete.
317                  *
318                  * perf_mmap__put() is done at record__aio_complete()
319                  * after started aio request completion or at record__aio_push()
320                  * if the request failed to start.
321                  */
322                 perf_mmap__get(&map->core);
323         }
324
325         aio->size += size;
326
327         return size;
328 }
329
330 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
331 {
332         int ret, idx;
333         int trace_fd = rec->session->data->file.fd;
334         struct record_aio aio = { .rec = rec, .size = 0 };
335
336         /*
337          * Call record__aio_sync() to wait till map->aio.data[] buffer
338          * becomes available after previous aio write operation.
339          */
340
341         idx = record__aio_sync(map, false);
342         aio.data = map->aio.data[idx];
343         ret = perf_mmap__push(map, &aio, record__aio_pushfn);
344         if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
345                 return ret;
346
347         rec->samples++;
348         ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
349         if (!ret) {
350                 *off += aio.size;
351                 rec->bytes_written += aio.size;
352                 if (switch_output_size(rec))
353                         trigger_hit(&switch_output_trigger);
354         } else {
355                 /*
356                  * Decrement map->refcount incremented in record__aio_pushfn()
357                  * back if record__aio_write() operation failed to start, otherwise
358                  * map->refcount is decremented in record__aio_complete() after
359                  * aio write operation finishes successfully.
360                  */
361                 perf_mmap__put(&map->core);
362         }
363
364         return ret;
365 }
366
367 static off_t record__aio_get_pos(int trace_fd)
368 {
369         return lseek(trace_fd, 0, SEEK_CUR);
370 }
371
372 static void record__aio_set_pos(int trace_fd, off_t pos)
373 {
374         lseek(trace_fd, pos, SEEK_SET);
375 }
376
377 static void record__aio_mmap_read_sync(struct record *rec)
378 {
379         int i;
380         struct evlist *evlist = rec->evlist;
381         struct mmap *maps = evlist->mmap;
382
383         if (!record__aio_enabled(rec))
384                 return;
385
386         for (i = 0; i < evlist->core.nr_mmaps; i++) {
387                 struct mmap *map = &maps[i];
388
389                 if (map->core.base)
390                         record__aio_sync(map, true);
391         }
392 }
393
394 static int nr_cblocks_default = 1;
395 static int nr_cblocks_max = 4;
396
397 static int record__aio_parse(const struct option *opt,
398                              const char *str,
399                              int unset)
400 {
401         struct record_opts *opts = (struct record_opts *)opt->value;
402
403         if (unset) {
404                 opts->nr_cblocks = 0;
405         } else {
406                 if (str)
407                         opts->nr_cblocks = strtol(str, NULL, 0);
408                 if (!opts->nr_cblocks)
409                         opts->nr_cblocks = nr_cblocks_default;
410         }
411
412         return 0;
413 }
414 #else /* HAVE_AIO_SUPPORT */
415 static int nr_cblocks_max = 0;
416
417 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
418                             off_t *off __maybe_unused)
419 {
420         return -1;
421 }
422
423 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
424 {
425         return -1;
426 }
427
428 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
429 {
430 }
431
432 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
433 {
434 }
435 #endif
436
437 static int record__aio_enabled(struct record *rec)
438 {
439         return rec->opts.nr_cblocks > 0;
440 }
441
442 #define MMAP_FLUSH_DEFAULT 1
443 static int record__mmap_flush_parse(const struct option *opt,
444                                     const char *str,
445                                     int unset)
446 {
447         int flush_max;
448         struct record_opts *opts = (struct record_opts *)opt->value;
449         static struct parse_tag tags[] = {
450                         { .tag  = 'B', .mult = 1       },
451                         { .tag  = 'K', .mult = 1 << 10 },
452                         { .tag  = 'M', .mult = 1 << 20 },
453                         { .tag  = 'G', .mult = 1 << 30 },
454                         { .tag  = 0 },
455         };
456
457         if (unset)
458                 return 0;
459
460         if (str) {
461                 opts->mmap_flush = parse_tag_value(str, tags);
462                 if (opts->mmap_flush == (int)-1)
463                         opts->mmap_flush = strtol(str, NULL, 0);
464         }
465
466         if (!opts->mmap_flush)
467                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
468
469         flush_max = evlist__mmap_size(opts->mmap_pages);
470         flush_max /= 4;
471         if (opts->mmap_flush > flush_max)
472                 opts->mmap_flush = flush_max;
473
474         return 0;
475 }
476
477 #ifdef HAVE_ZSTD_SUPPORT
478 static unsigned int comp_level_default = 1;
479
480 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
481 {
482         struct record_opts *opts = opt->value;
483
484         if (unset) {
485                 opts->comp_level = 0;
486         } else {
487                 if (str)
488                         opts->comp_level = strtol(str, NULL, 0);
489                 if (!opts->comp_level)
490                         opts->comp_level = comp_level_default;
491         }
492
493         return 0;
494 }
495 #endif
496 static unsigned int comp_level_max = 22;
497
498 static int record__comp_enabled(struct record *rec)
499 {
500         return rec->opts.comp_level > 0;
501 }
502
503 static int process_synthesized_event(struct perf_tool *tool,
504                                      union perf_event *event,
505                                      struct perf_sample *sample __maybe_unused,
506                                      struct machine *machine __maybe_unused)
507 {
508         struct record *rec = container_of(tool, struct record, tool);
509         return record__write(rec, NULL, event, event->header.size);
510 }
511
512 static int process_locked_synthesized_event(struct perf_tool *tool,
513                                      union perf_event *event,
514                                      struct perf_sample *sample __maybe_unused,
515                                      struct machine *machine __maybe_unused)
516 {
517         static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
518         int ret;
519
520         pthread_mutex_lock(&synth_lock);
521         ret = process_synthesized_event(tool, event, sample, machine);
522         pthread_mutex_unlock(&synth_lock);
523         return ret;
524 }
525
526 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
527 {
528         struct record *rec = to;
529
530         if (record__comp_enabled(rec)) {
531                 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
532                 bf   = map->data;
533         }
534
535         rec->samples++;
536         return record__write(rec, map, bf, size);
537 }
538
539 static volatile int signr = -1;
540 static volatile int child_finished;
541
542 static void sig_handler(int sig)
543 {
544         if (sig == SIGCHLD)
545                 child_finished = 1;
546         else
547                 signr = sig;
548
549         done = 1;
550 }
551
552 static void sigsegv_handler(int sig)
553 {
554         perf_hooks__recover();
555         sighandler_dump_stack(sig);
556 }
557
558 static void record__sig_exit(void)
559 {
560         if (signr == -1)
561                 return;
562
563         signal(signr, SIG_DFL);
564         raise(signr);
565 }
566
567 #ifdef HAVE_AUXTRACE_SUPPORT
568
569 static int record__process_auxtrace(struct perf_tool *tool,
570                                     struct mmap *map,
571                                     union perf_event *event, void *data1,
572                                     size_t len1, void *data2, size_t len2)
573 {
574         struct record *rec = container_of(tool, struct record, tool);
575         struct perf_data *data = &rec->data;
576         size_t padding;
577         u8 pad[8] = {0};
578
579         if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
580                 off_t file_offset;
581                 int fd = perf_data__fd(data);
582                 int err;
583
584                 file_offset = lseek(fd, 0, SEEK_CUR);
585                 if (file_offset == -1)
586                         return -1;
587                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
588                                                      event, file_offset);
589                 if (err)
590                         return err;
591         }
592
593         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
594         padding = (len1 + len2) & 7;
595         if (padding)
596                 padding = 8 - padding;
597
598         record__write(rec, map, event, event->header.size);
599         record__write(rec, map, data1, len1);
600         if (len2)
601                 record__write(rec, map, data2, len2);
602         record__write(rec, map, &pad, padding);
603
604         return 0;
605 }
606
607 static int record__auxtrace_mmap_read(struct record *rec,
608                                       struct mmap *map)
609 {
610         int ret;
611
612         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
613                                   record__process_auxtrace);
614         if (ret < 0)
615                 return ret;
616
617         if (ret)
618                 rec->samples++;
619
620         return 0;
621 }
622
623 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
624                                                struct mmap *map)
625 {
626         int ret;
627
628         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
629                                            record__process_auxtrace,
630                                            rec->opts.auxtrace_snapshot_size);
631         if (ret < 0)
632                 return ret;
633
634         if (ret)
635                 rec->samples++;
636
637         return 0;
638 }
639
640 static int record__auxtrace_read_snapshot_all(struct record *rec)
641 {
642         int i;
643         int rc = 0;
644
645         for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
646                 struct mmap *map = &rec->evlist->mmap[i];
647
648                 if (!map->auxtrace_mmap.base)
649                         continue;
650
651                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
652                         rc = -1;
653                         goto out;
654                 }
655         }
656 out:
657         return rc;
658 }
659
660 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
661 {
662         pr_debug("Recording AUX area tracing snapshot\n");
663         if (record__auxtrace_read_snapshot_all(rec) < 0) {
664                 trigger_error(&auxtrace_snapshot_trigger);
665         } else {
666                 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
667                         trigger_error(&auxtrace_snapshot_trigger);
668                 else
669                         trigger_ready(&auxtrace_snapshot_trigger);
670         }
671 }
672
673 static int record__auxtrace_snapshot_exit(struct record *rec)
674 {
675         if (trigger_is_error(&auxtrace_snapshot_trigger))
676                 return 0;
677
678         if (!auxtrace_record__snapshot_started &&
679             auxtrace_record__snapshot_start(rec->itr))
680                 return -1;
681
682         record__read_auxtrace_snapshot(rec, true);
683         if (trigger_is_error(&auxtrace_snapshot_trigger))
684                 return -1;
685
686         return 0;
687 }
688
689 static int record__auxtrace_init(struct record *rec)
690 {
691         int err;
692
693         if (!rec->itr) {
694                 rec->itr = auxtrace_record__init(rec->evlist, &err);
695                 if (err)
696                         return err;
697         }
698
699         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
700                                               rec->opts.auxtrace_snapshot_opts);
701         if (err)
702                 return err;
703
704         err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
705                                             rec->opts.auxtrace_sample_opts);
706         if (err)
707                 return err;
708
709         return auxtrace_parse_filters(rec->evlist);
710 }
711
712 #else
713
714 static inline
715 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
716                                struct mmap *map __maybe_unused)
717 {
718         return 0;
719 }
720
721 static inline
722 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
723                                     bool on_exit __maybe_unused)
724 {
725 }
726
727 static inline
728 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
729 {
730         return 0;
731 }
732
733 static inline
734 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
735 {
736         return 0;
737 }
738
739 static int record__auxtrace_init(struct record *rec __maybe_unused)
740 {
741         return 0;
742 }
743
744 #endif
745
746 static bool record__kcore_readable(struct machine *machine)
747 {
748         char kcore[PATH_MAX];
749         int fd;
750
751         scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
752
753         fd = open(kcore, O_RDONLY);
754         if (fd < 0)
755                 return false;
756
757         close(fd);
758
759         return true;
760 }
761
762 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
763 {
764         char from_dir[PATH_MAX];
765         char kcore_dir[PATH_MAX];
766         int ret;
767
768         snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
769
770         ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
771         if (ret)
772                 return ret;
773
774         return kcore_copy(from_dir, kcore_dir);
775 }
776
777 static int record__mmap_evlist(struct record *rec,
778                                struct evlist *evlist)
779 {
780         struct record_opts *opts = &rec->opts;
781         bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
782                                   opts->auxtrace_sample_mode;
783         char msg[512];
784
785         if (opts->affinity != PERF_AFFINITY_SYS)
786                 cpu__setup_cpunode_map();
787
788         if (evlist__mmap_ex(evlist, opts->mmap_pages,
789                                  opts->auxtrace_mmap_pages,
790                                  auxtrace_overwrite,
791                                  opts->nr_cblocks, opts->affinity,
792                                  opts->mmap_flush, opts->comp_level) < 0) {
793                 if (errno == EPERM) {
794                         pr_err("Permission error mapping pages.\n"
795                                "Consider increasing "
796                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
797                                "or try again with a smaller value of -m/--mmap_pages.\n"
798                                "(current value: %u,%u)\n",
799                                opts->mmap_pages, opts->auxtrace_mmap_pages);
800                         return -errno;
801                 } else {
802                         pr_err("failed to mmap with %d (%s)\n", errno,
803                                 str_error_r(errno, msg, sizeof(msg)));
804                         if (errno)
805                                 return -errno;
806                         else
807                                 return -EINVAL;
808                 }
809         }
810         return 0;
811 }
812
813 static int record__mmap(struct record *rec)
814 {
815         return record__mmap_evlist(rec, rec->evlist);
816 }
817
818 static int record__open(struct record *rec)
819 {
820         char msg[BUFSIZ];
821         struct evsel *pos;
822         struct evlist *evlist = rec->evlist;
823         struct perf_session *session = rec->session;
824         struct record_opts *opts = &rec->opts;
825         int rc = 0;
826
827         /*
828          * For initial_delay we need to add a dummy event so that we can track
829          * PERF_RECORD_MMAP while we wait for the initial delay to enable the
830          * real events, the ones asked by the user.
831          */
832         if (opts->initial_delay) {
833                 if (perf_evlist__add_dummy(evlist))
834                         return -ENOMEM;
835
836                 pos = evlist__first(evlist);
837                 pos->tracking = 0;
838                 pos = evlist__last(evlist);
839                 pos->tracking = 1;
840                 pos->core.attr.enable_on_exec = 1;
841         }
842
843         perf_evlist__config(evlist, opts, &callchain_param);
844
845         evlist__for_each_entry(evlist, pos) {
846 try_again:
847                 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
848                         if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
849                                 if (verbose > 0)
850                                         ui__warning("%s\n", msg);
851                                 goto try_again;
852                         }
853                         if ((errno == EINVAL || errno == EBADF) &&
854                             pos->leader != pos &&
855                             pos->weak_group) {
856                                 pos = perf_evlist__reset_weak_group(evlist, pos, true);
857                                 goto try_again;
858                         }
859                         rc = -errno;
860                         evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
861                         ui__error("%s\n", msg);
862                         goto out;
863                 }
864
865                 pos->supported = true;
866         }
867
868         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
869                 pr_warning(
870 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
871 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
872 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
873 "file is not found in the buildid cache or in the vmlinux path.\n\n"
874 "Samples in kernel modules won't be resolved at all.\n\n"
875 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
876 "even with a suitable vmlinux or kallsyms file.\n\n");
877         }
878
879         if (perf_evlist__apply_filters(evlist, &pos)) {
880                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
881                         pos->filter, evsel__name(pos), errno,
882                         str_error_r(errno, msg, sizeof(msg)));
883                 rc = -1;
884                 goto out;
885         }
886
887         rc = record__mmap(rec);
888         if (rc)
889                 goto out;
890
891         session->evlist = evlist;
892         perf_session__set_id_hdr_size(session);
893 out:
894         return rc;
895 }
896
897 static int process_sample_event(struct perf_tool *tool,
898                                 union perf_event *event,
899                                 struct perf_sample *sample,
900                                 struct evsel *evsel,
901                                 struct machine *machine)
902 {
903         struct record *rec = container_of(tool, struct record, tool);
904
905         if (rec->evlist->first_sample_time == 0)
906                 rec->evlist->first_sample_time = sample->time;
907
908         rec->evlist->last_sample_time = sample->time;
909
910         if (rec->buildid_all)
911                 return 0;
912
913         rec->samples++;
914         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
915 }
916
917 static int process_buildids(struct record *rec)
918 {
919         struct perf_session *session = rec->session;
920
921         if (perf_data__size(&rec->data) == 0)
922                 return 0;
923
924         /*
925          * During this process, it'll load kernel map and replace the
926          * dso->long_name to a real pathname it found.  In this case
927          * we prefer the vmlinux path like
928          *   /lib/modules/3.16.4/build/vmlinux
929          *
930          * rather than build-id path (in debug directory).
931          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
932          */
933         symbol_conf.ignore_vmlinux_buildid = true;
934
935         /*
936          * If --buildid-all is given, it marks all DSO regardless of hits,
937          * so no need to process samples. But if timestamp_boundary is enabled,
938          * it still needs to walk on all samples to get the timestamps of
939          * first/last samples.
940          */
941         if (rec->buildid_all && !rec->timestamp_boundary)
942                 rec->tool.sample = NULL;
943
944         return perf_session__process_events(session);
945 }
946
947 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
948 {
949         int err;
950         struct perf_tool *tool = data;
951         /*
952          *As for guest kernel when processing subcommand record&report,
953          *we arrange module mmap prior to guest kernel mmap and trigger
954          *a preload dso because default guest module symbols are loaded
955          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
956          *method is used to avoid symbol missing when the first addr is
957          *in module instead of in guest kernel.
958          */
959         err = perf_event__synthesize_modules(tool, process_synthesized_event,
960                                              machine);
961         if (err < 0)
962                 pr_err("Couldn't record guest kernel [%d]'s reference"
963                        " relocation symbol.\n", machine->pid);
964
965         /*
966          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
967          * have no _text sometimes.
968          */
969         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
970                                                  machine);
971         if (err < 0)
972                 pr_err("Couldn't record guest kernel [%d]'s reference"
973                        " relocation symbol.\n", machine->pid);
974 }
975
976 static struct perf_event_header finished_round_event = {
977         .size = sizeof(struct perf_event_header),
978         .type = PERF_RECORD_FINISHED_ROUND,
979 };
980
981 static void record__adjust_affinity(struct record *rec, struct mmap *map)
982 {
983         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
984             !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
985                           rec->affinity_mask.nbits)) {
986                 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
987                 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
988                           map->affinity_mask.bits, rec->affinity_mask.nbits);
989                 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
990                                   (cpu_set_t *)rec->affinity_mask.bits);
991                 if (verbose == 2)
992                         mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
993         }
994 }
995
996 static size_t process_comp_header(void *record, size_t increment)
997 {
998         struct perf_record_compressed *event = record;
999         size_t size = sizeof(*event);
1000
1001         if (increment) {
1002                 event->header.size += increment;
1003                 return increment;
1004         }
1005
1006         event->header.type = PERF_RECORD_COMPRESSED;
1007         event->header.size = size;
1008
1009         return size;
1010 }
1011
1012 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1013                             void *src, size_t src_size)
1014 {
1015         size_t compressed;
1016         size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1017
1018         compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1019                                                      max_record_size, process_comp_header);
1020
1021         session->bytes_transferred += src_size;
1022         session->bytes_compressed  += compressed;
1023
1024         return compressed;
1025 }
1026
1027 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1028                                     bool overwrite, bool synch)
1029 {
1030         u64 bytes_written = rec->bytes_written;
1031         int i;
1032         int rc = 0;
1033         struct mmap *maps;
1034         int trace_fd = rec->data.file.fd;
1035         off_t off = 0;
1036
1037         if (!evlist)
1038                 return 0;
1039
1040         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1041         if (!maps)
1042                 return 0;
1043
1044         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1045                 return 0;
1046
1047         if (record__aio_enabled(rec))
1048                 off = record__aio_get_pos(trace_fd);
1049
1050         for (i = 0; i < evlist->core.nr_mmaps; i++) {
1051                 u64 flush = 0;
1052                 struct mmap *map = &maps[i];
1053
1054                 if (map->core.base) {
1055                         record__adjust_affinity(rec, map);
1056                         if (synch) {
1057                                 flush = map->core.flush;
1058                                 map->core.flush = 1;
1059                         }
1060                         if (!record__aio_enabled(rec)) {
1061                                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1062                                         if (synch)
1063                                                 map->core.flush = flush;
1064                                         rc = -1;
1065                                         goto out;
1066                                 }
1067                         } else {
1068                                 if (record__aio_push(rec, map, &off) < 0) {
1069                                         record__aio_set_pos(trace_fd, off);
1070                                         if (synch)
1071                                                 map->core.flush = flush;
1072                                         rc = -1;
1073                                         goto out;
1074                                 }
1075                         }
1076                         if (synch)
1077                                 map->core.flush = flush;
1078                 }
1079
1080                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1081                     !rec->opts.auxtrace_sample_mode &&
1082                     record__auxtrace_mmap_read(rec, map) != 0) {
1083                         rc = -1;
1084                         goto out;
1085                 }
1086         }
1087
1088         if (record__aio_enabled(rec))
1089                 record__aio_set_pos(trace_fd, off);
1090
1091         /*
1092          * Mark the round finished in case we wrote
1093          * at least one event.
1094          */
1095         if (bytes_written != rec->bytes_written)
1096                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1097
1098         if (overwrite)
1099                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1100 out:
1101         return rc;
1102 }
1103
1104 static int record__mmap_read_all(struct record *rec, bool synch)
1105 {
1106         int err;
1107
1108         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1109         if (err)
1110                 return err;
1111
1112         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1113 }
1114
1115 static void record__init_features(struct record *rec)
1116 {
1117         struct perf_session *session = rec->session;
1118         int feat;
1119
1120         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1121                 perf_header__set_feat(&session->header, feat);
1122
1123         if (rec->no_buildid)
1124                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1125
1126         if (!have_tracepoints(&rec->evlist->core.entries))
1127                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1128
1129         if (!rec->opts.branch_stack)
1130                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1131
1132         if (!rec->opts.full_auxtrace)
1133                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1134
1135         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1136                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1137
1138         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1139         if (!record__comp_enabled(rec))
1140                 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1141
1142         perf_header__clear_feat(&session->header, HEADER_STAT);
1143 }
1144
1145 static void
1146 record__finish_output(struct record *rec)
1147 {
1148         struct perf_data *data = &rec->data;
1149         int fd = perf_data__fd(data);
1150
1151         if (data->is_pipe)
1152                 return;
1153
1154         rec->session->header.data_size += rec->bytes_written;
1155         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1156
1157         if (!rec->no_buildid) {
1158                 process_buildids(rec);
1159
1160                 if (rec->buildid_all)
1161                         dsos__hit_all(rec->session);
1162         }
1163         perf_session__write_header(rec->session, rec->evlist, fd, true);
1164
1165         return;
1166 }
1167
1168 static int record__synthesize_workload(struct record *rec, bool tail)
1169 {
1170         int err;
1171         struct perf_thread_map *thread_map;
1172
1173         if (rec->opts.tail_synthesize != tail)
1174                 return 0;
1175
1176         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1177         if (thread_map == NULL)
1178                 return -1;
1179
1180         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1181                                                  process_synthesized_event,
1182                                                  &rec->session->machines.host,
1183                                                  rec->opts.sample_address);
1184         perf_thread_map__put(thread_map);
1185         return err;
1186 }
1187
1188 static int record__synthesize(struct record *rec, bool tail);
1189
1190 static int
1191 record__switch_output(struct record *rec, bool at_exit)
1192 {
1193         struct perf_data *data = &rec->data;
1194         int fd, err;
1195         char *new_filename;
1196
1197         /* Same Size:      "2015122520103046"*/
1198         char timestamp[] = "InvalidTimestamp";
1199
1200         record__aio_mmap_read_sync(rec);
1201
1202         record__synthesize(rec, true);
1203         if (target__none(&rec->opts.target))
1204                 record__synthesize_workload(rec, true);
1205
1206         rec->samples = 0;
1207         record__finish_output(rec);
1208         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1209         if (err) {
1210                 pr_err("Failed to get current timestamp\n");
1211                 return -EINVAL;
1212         }
1213
1214         fd = perf_data__switch(data, timestamp,
1215                                     rec->session->header.data_offset,
1216                                     at_exit, &new_filename);
1217         if (fd >= 0 && !at_exit) {
1218                 rec->bytes_written = 0;
1219                 rec->session->header.data_size = 0;
1220         }
1221
1222         if (!quiet)
1223                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1224                         data->path, timestamp);
1225
1226         if (rec->switch_output.num_files) {
1227                 int n = rec->switch_output.cur_file + 1;
1228
1229                 if (n >= rec->switch_output.num_files)
1230                         n = 0;
1231                 rec->switch_output.cur_file = n;
1232                 if (rec->switch_output.filenames[n]) {
1233                         remove(rec->switch_output.filenames[n]);
1234                         zfree(&rec->switch_output.filenames[n]);
1235                 }
1236                 rec->switch_output.filenames[n] = new_filename;
1237         } else {
1238                 free(new_filename);
1239         }
1240
1241         /* Output tracking events */
1242         if (!at_exit) {
1243                 record__synthesize(rec, false);
1244
1245                 /*
1246                  * In 'perf record --switch-output' without -a,
1247                  * record__synthesize() in record__switch_output() won't
1248                  * generate tracking events because there's no thread_map
1249                  * in evlist. Which causes newly created perf.data doesn't
1250                  * contain map and comm information.
1251                  * Create a fake thread_map and directly call
1252                  * perf_event__synthesize_thread_map() for those events.
1253                  */
1254                 if (target__none(&rec->opts.target))
1255                         record__synthesize_workload(rec, false);
1256         }
1257         return fd;
1258 }
1259
1260 static volatile int workload_exec_errno;
1261
1262 /*
1263  * perf_evlist__prepare_workload will send a SIGUSR1
1264  * if the fork fails, since we asked by setting its
1265  * want_signal to true.
1266  */
1267 static void workload_exec_failed_signal(int signo __maybe_unused,
1268                                         siginfo_t *info,
1269                                         void *ucontext __maybe_unused)
1270 {
1271         workload_exec_errno = info->si_value.sival_int;
1272         done = 1;
1273         child_finished = 1;
1274 }
1275
1276 static void snapshot_sig_handler(int sig);
1277 static void alarm_sig_handler(int sig);
1278
1279 static const struct perf_event_mmap_page *
1280 perf_evlist__pick_pc(struct evlist *evlist)
1281 {
1282         if (evlist) {
1283                 if (evlist->mmap && evlist->mmap[0].core.base)
1284                         return evlist->mmap[0].core.base;
1285                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1286                         return evlist->overwrite_mmap[0].core.base;
1287         }
1288         return NULL;
1289 }
1290
1291 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1292 {
1293         const struct perf_event_mmap_page *pc;
1294
1295         pc = perf_evlist__pick_pc(rec->evlist);
1296         if (pc)
1297                 return pc;
1298         return NULL;
1299 }
1300
1301 static int record__synthesize(struct record *rec, bool tail)
1302 {
1303         struct perf_session *session = rec->session;
1304         struct machine *machine = &session->machines.host;
1305         struct perf_data *data = &rec->data;
1306         struct record_opts *opts = &rec->opts;
1307         struct perf_tool *tool = &rec->tool;
1308         int fd = perf_data__fd(data);
1309         int err = 0;
1310         event_op f = process_synthesized_event;
1311
1312         if (rec->opts.tail_synthesize != tail)
1313                 return 0;
1314
1315         if (data->is_pipe) {
1316                 /*
1317                  * We need to synthesize events first, because some
1318                  * features works on top of them (on report side).
1319                  */
1320                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1321                                                    process_synthesized_event);
1322                 if (err < 0) {
1323                         pr_err("Couldn't synthesize attrs.\n");
1324                         goto out;
1325                 }
1326
1327                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1328                                                       process_synthesized_event);
1329                 if (err < 0) {
1330                         pr_err("Couldn't synthesize features.\n");
1331                         return err;
1332                 }
1333
1334                 if (have_tracepoints(&rec->evlist->core.entries)) {
1335                         /*
1336                          * FIXME err <= 0 here actually means that
1337                          * there were no tracepoints so its not really
1338                          * an error, just that we don't need to
1339                          * synthesize anything.  We really have to
1340                          * return this more properly and also
1341                          * propagate errors that now are calling die()
1342                          */
1343                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1344                                                                   process_synthesized_event);
1345                         if (err <= 0) {
1346                                 pr_err("Couldn't record tracing data.\n");
1347                                 goto out;
1348                         }
1349                         rec->bytes_written += err;
1350                 }
1351         }
1352
1353         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1354                                           process_synthesized_event, machine);
1355         if (err)
1356                 goto out;
1357
1358         /* Synthesize id_index before auxtrace_info */
1359         if (rec->opts.auxtrace_sample_mode) {
1360                 err = perf_event__synthesize_id_index(tool,
1361                                                       process_synthesized_event,
1362                                                       session->evlist, machine);
1363                 if (err)
1364                         goto out;
1365         }
1366
1367         if (rec->opts.full_auxtrace) {
1368                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1369                                         session, process_synthesized_event);
1370                 if (err)
1371                         goto out;
1372         }
1373
1374         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1375                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1376                                                          machine);
1377                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1378                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1379                                    "Check /proc/kallsyms permission or run as root.\n");
1380
1381                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1382                                                      machine);
1383                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1384                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1385                                    "Check /proc/modules permission or run as root.\n");
1386         }
1387
1388         if (perf_guest) {
1389                 machines__process_guests(&session->machines,
1390                                          perf_event__synthesize_guest_os, tool);
1391         }
1392
1393         err = perf_event__synthesize_extra_attr(&rec->tool,
1394                                                 rec->evlist,
1395                                                 process_synthesized_event,
1396                                                 data->is_pipe);
1397         if (err)
1398                 goto out;
1399
1400         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1401                                                  process_synthesized_event,
1402                                                 NULL);
1403         if (err < 0) {
1404                 pr_err("Couldn't synthesize thread map.\n");
1405                 return err;
1406         }
1407
1408         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1409                                              process_synthesized_event, NULL);
1410         if (err < 0) {
1411                 pr_err("Couldn't synthesize cpu map.\n");
1412                 return err;
1413         }
1414
1415         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1416                                                 machine, opts);
1417         if (err < 0)
1418                 pr_warning("Couldn't synthesize bpf events.\n");
1419
1420         err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1421                                              machine);
1422         if (err < 0)
1423                 pr_warning("Couldn't synthesize cgroup events.\n");
1424
1425         if (rec->opts.nr_threads_synthesize > 1) {
1426                 perf_set_multithreaded();
1427                 f = process_locked_synthesized_event;
1428         }
1429
1430         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1431                                             f, opts->sample_address,
1432                                             rec->opts.nr_threads_synthesize);
1433
1434         if (rec->opts.nr_threads_synthesize > 1)
1435                 perf_set_singlethreaded();
1436
1437 out:
1438         return err;
1439 }
1440
1441 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1442 {
1443         struct record *rec = data;
1444         pthread_kill(rec->thread_id, SIGUSR2);
1445         return 0;
1446 }
1447
1448 static int record__setup_sb_evlist(struct record *rec)
1449 {
1450         struct record_opts *opts = &rec->opts;
1451
1452         if (rec->sb_evlist != NULL) {
1453                 /*
1454                  * We get here if --switch-output-event populated the
1455                  * sb_evlist, so associate a callback that will send a SIGUSR2
1456                  * to the main thread.
1457                  */
1458                 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1459                 rec->thread_id = pthread_self();
1460         }
1461
1462         if (!opts->no_bpf_event) {
1463                 if (rec->sb_evlist == NULL) {
1464                         rec->sb_evlist = evlist__new();
1465
1466                         if (rec->sb_evlist == NULL) {
1467                                 pr_err("Couldn't create side band evlist.\n.");
1468                                 return -1;
1469                         }
1470                 }
1471
1472                 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1473                         pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1474                         return -1;
1475                 }
1476         }
1477
1478         if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1479                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1480                 opts->no_bpf_event = true;
1481         }
1482
1483         return 0;
1484 }
1485
1486 static int __cmd_record(struct record *rec, int argc, const char **argv)
1487 {
1488         int err;
1489         int status = 0;
1490         unsigned long waking = 0;
1491         const bool forks = argc > 0;
1492         struct perf_tool *tool = &rec->tool;
1493         struct record_opts *opts = &rec->opts;
1494         struct perf_data *data = &rec->data;
1495         struct perf_session *session;
1496         bool disabled = false, draining = false;
1497         int fd;
1498         float ratio = 0;
1499
1500         atexit(record__sig_exit);
1501         signal(SIGCHLD, sig_handler);
1502         signal(SIGINT, sig_handler);
1503         signal(SIGTERM, sig_handler);
1504         signal(SIGSEGV, sigsegv_handler);
1505
1506         if (rec->opts.record_namespaces)
1507                 tool->namespace_events = true;
1508
1509         if (rec->opts.record_cgroup) {
1510 #ifdef HAVE_FILE_HANDLE
1511                 tool->cgroup_events = true;
1512 #else
1513                 pr_err("cgroup tracking is not supported\n");
1514                 return -1;
1515 #endif
1516         }
1517
1518         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1519                 signal(SIGUSR2, snapshot_sig_handler);
1520                 if (rec->opts.auxtrace_snapshot_mode)
1521                         trigger_on(&auxtrace_snapshot_trigger);
1522                 if (rec->switch_output.enabled)
1523                         trigger_on(&switch_output_trigger);
1524         } else {
1525                 signal(SIGUSR2, SIG_IGN);
1526         }
1527
1528         session = perf_session__new(data, false, tool);
1529         if (IS_ERR(session)) {
1530                 pr_err("Perf session creation failed.\n");
1531                 return PTR_ERR(session);
1532         }
1533
1534         fd = perf_data__fd(data);
1535         rec->session = session;
1536
1537         if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1538                 pr_err("Compression initialization failed.\n");
1539                 return -1;
1540         }
1541
1542         session->header.env.comp_type  = PERF_COMP_ZSTD;
1543         session->header.env.comp_level = rec->opts.comp_level;
1544
1545         if (rec->opts.kcore &&
1546             !record__kcore_readable(&session->machines.host)) {
1547                 pr_err("ERROR: kcore is not readable.\n");
1548                 return -1;
1549         }
1550
1551         record__init_features(rec);
1552
1553         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1554                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1555
1556         if (forks) {
1557                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1558                                                     argv, data->is_pipe,
1559                                                     workload_exec_failed_signal);
1560                 if (err < 0) {
1561                         pr_err("Couldn't run the workload!\n");
1562                         status = err;
1563                         goto out_delete_session;
1564                 }
1565         }
1566
1567         /*
1568          * If we have just single event and are sending data
1569          * through pipe, we need to force the ids allocation,
1570          * because we synthesize event name through the pipe
1571          * and need the id for that.
1572          */
1573         if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1574                 rec->opts.sample_id = true;
1575
1576         if (record__open(rec) != 0) {
1577                 err = -1;
1578                 goto out_child;
1579         }
1580         session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1581
1582         if (rec->opts.kcore) {
1583                 err = record__kcore_copy(&session->machines.host, data);
1584                 if (err) {
1585                         pr_err("ERROR: Failed to copy kcore\n");
1586                         goto out_child;
1587                 }
1588         }
1589
1590         err = bpf__apply_obj_config();
1591         if (err) {
1592                 char errbuf[BUFSIZ];
1593
1594                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1595                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1596                          errbuf);
1597                 goto out_child;
1598         }
1599
1600         /*
1601          * Normally perf_session__new would do this, but it doesn't have the
1602          * evlist.
1603          */
1604         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1605                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1606                 rec->tool.ordered_events = false;
1607         }
1608
1609         if (!rec->evlist->nr_groups)
1610                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1611
1612         if (data->is_pipe) {
1613                 err = perf_header__write_pipe(fd);
1614                 if (err < 0)
1615                         goto out_child;
1616         } else {
1617                 err = perf_session__write_header(session, rec->evlist, fd, false);
1618                 if (err < 0)
1619                         goto out_child;
1620         }
1621
1622         err = -1;
1623         if (!rec->no_buildid
1624             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1625                 pr_err("Couldn't generate buildids. "
1626                        "Use --no-buildid to profile anyway.\n");
1627                 goto out_child;
1628         }
1629
1630         err = record__setup_sb_evlist(rec);
1631         if (err)
1632                 goto out_child;
1633
1634         err = record__synthesize(rec, false);
1635         if (err < 0)
1636                 goto out_child;
1637
1638         if (rec->realtime_prio) {
1639                 struct sched_param param;
1640
1641                 param.sched_priority = rec->realtime_prio;
1642                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1643                         pr_err("Could not set realtime priority.\n");
1644                         err = -1;
1645                         goto out_child;
1646                 }
1647         }
1648
1649         /*
1650          * When perf is starting the traced process, all the events
1651          * (apart from group members) have enable_on_exec=1 set,
1652          * so don't spoil it by prematurely enabling them.
1653          */
1654         if (!target__none(&opts->target) && !opts->initial_delay)
1655                 evlist__enable(rec->evlist);
1656
1657         /*
1658          * Let the child rip
1659          */
1660         if (forks) {
1661                 struct machine *machine = &session->machines.host;
1662                 union perf_event *event;
1663                 pid_t tgid;
1664
1665                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1666                 if (event == NULL) {
1667                         err = -ENOMEM;
1668                         goto out_child;
1669                 }
1670
1671                 /*
1672                  * Some H/W events are generated before COMM event
1673                  * which is emitted during exec(), so perf script
1674                  * cannot see a correct process name for those events.
1675                  * Synthesize COMM event to prevent it.
1676                  */
1677                 tgid = perf_event__synthesize_comm(tool, event,
1678                                                    rec->evlist->workload.pid,
1679                                                    process_synthesized_event,
1680                                                    machine);
1681                 free(event);
1682
1683                 if (tgid == -1)
1684                         goto out_child;
1685
1686                 event = malloc(sizeof(event->namespaces) +
1687                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1688                                machine->id_hdr_size);
1689                 if (event == NULL) {
1690                         err = -ENOMEM;
1691                         goto out_child;
1692                 }
1693
1694                 /*
1695                  * Synthesize NAMESPACES event for the command specified.
1696                  */
1697                 perf_event__synthesize_namespaces(tool, event,
1698                                                   rec->evlist->workload.pid,
1699                                                   tgid, process_synthesized_event,
1700                                                   machine);
1701                 free(event);
1702
1703                 perf_evlist__start_workload(rec->evlist);
1704         }
1705
1706         if (opts->initial_delay) {
1707                 usleep(opts->initial_delay * USEC_PER_MSEC);
1708                 evlist__enable(rec->evlist);
1709         }
1710
1711         trigger_ready(&auxtrace_snapshot_trigger);
1712         trigger_ready(&switch_output_trigger);
1713         perf_hooks__invoke_record_start();
1714         for (;;) {
1715                 unsigned long long hits = rec->samples;
1716
1717                 /*
1718                  * rec->evlist->bkw_mmap_state is possible to be
1719                  * BKW_MMAP_EMPTY here: when done == true and
1720                  * hits != rec->samples in previous round.
1721                  *
1722                  * perf_evlist__toggle_bkw_mmap ensure we never
1723                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1724                  */
1725                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1726                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1727
1728                 if (record__mmap_read_all(rec, false) < 0) {
1729                         trigger_error(&auxtrace_snapshot_trigger);
1730                         trigger_error(&switch_output_trigger);
1731                         err = -1;
1732                         goto out_child;
1733                 }
1734
1735                 if (auxtrace_record__snapshot_started) {
1736                         auxtrace_record__snapshot_started = 0;
1737                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1738                                 record__read_auxtrace_snapshot(rec, false);
1739                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1740                                 pr_err("AUX area tracing snapshot failed\n");
1741                                 err = -1;
1742                                 goto out_child;
1743                         }
1744                 }
1745
1746                 if (trigger_is_hit(&switch_output_trigger)) {
1747                         /*
1748                          * If switch_output_trigger is hit, the data in
1749                          * overwritable ring buffer should have been collected,
1750                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1751                          *
1752                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1753                          * record__mmap_read_all() didn't collect data from
1754                          * overwritable ring buffer. Read again.
1755                          */
1756                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1757                                 continue;
1758                         trigger_ready(&switch_output_trigger);
1759
1760                         /*
1761                          * Reenable events in overwrite ring buffer after
1762                          * record__mmap_read_all(): we should have collected
1763                          * data from it.
1764                          */
1765                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1766
1767                         if (!quiet)
1768                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1769                                         waking);
1770                         waking = 0;
1771                         fd = record__switch_output(rec, false);
1772                         if (fd < 0) {
1773                                 pr_err("Failed to switch to new file\n");
1774                                 trigger_error(&switch_output_trigger);
1775                                 err = fd;
1776                                 goto out_child;
1777                         }
1778
1779                         /* re-arm the alarm */
1780                         if (rec->switch_output.time)
1781                                 alarm(rec->switch_output.time);
1782                 }
1783
1784                 if (hits == rec->samples) {
1785                         if (done || draining)
1786                                 break;
1787                         err = evlist__poll(rec->evlist, -1);
1788                         /*
1789                          * Propagate error, only if there's any. Ignore positive
1790                          * number of returned events and interrupt error.
1791                          */
1792                         if (err > 0 || (err < 0 && errno == EINTR))
1793                                 err = 0;
1794                         waking++;
1795
1796                         if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1797                                 draining = true;
1798                 }
1799
1800                 /*
1801                  * When perf is starting the traced process, at the end events
1802                  * die with the process and we wait for that. Thus no need to
1803                  * disable events in this case.
1804                  */
1805                 if (done && !disabled && !target__none(&opts->target)) {
1806                         trigger_off(&auxtrace_snapshot_trigger);
1807                         evlist__disable(rec->evlist);
1808                         disabled = true;
1809                 }
1810         }
1811
1812         trigger_off(&auxtrace_snapshot_trigger);
1813         trigger_off(&switch_output_trigger);
1814
1815         if (opts->auxtrace_snapshot_on_exit)
1816                 record__auxtrace_snapshot_exit(rec);
1817
1818         if (forks && workload_exec_errno) {
1819                 char msg[STRERR_BUFSIZE];
1820                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1821                 pr_err("Workload failed: %s\n", emsg);
1822                 err = -1;
1823                 goto out_child;
1824         }
1825
1826         if (!quiet)
1827                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1828
1829         if (target__none(&rec->opts.target))
1830                 record__synthesize_workload(rec, true);
1831
1832 out_child:
1833         record__mmap_read_all(rec, true);
1834         record__aio_mmap_read_sync(rec);
1835
1836         if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1837                 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1838                 session->header.env.comp_ratio = ratio + 0.5;
1839         }
1840
1841         if (forks) {
1842                 int exit_status;
1843
1844                 if (!child_finished)
1845                         kill(rec->evlist->workload.pid, SIGTERM);
1846
1847                 wait(&exit_status);
1848
1849                 if (err < 0)
1850                         status = err;
1851                 else if (WIFEXITED(exit_status))
1852                         status = WEXITSTATUS(exit_status);
1853                 else if (WIFSIGNALED(exit_status))
1854                         signr = WTERMSIG(exit_status);
1855         } else
1856                 status = err;
1857
1858         record__synthesize(rec, true);
1859         /* this will be recalculated during process_buildids() */
1860         rec->samples = 0;
1861
1862         if (!err) {
1863                 if (!rec->timestamp_filename) {
1864                         record__finish_output(rec);
1865                 } else {
1866                         fd = record__switch_output(rec, true);
1867                         if (fd < 0) {
1868                                 status = fd;
1869                                 goto out_delete_session;
1870                         }
1871                 }
1872         }
1873
1874         perf_hooks__invoke_record_end();
1875
1876         if (!err && !quiet) {
1877                 char samples[128];
1878                 const char *postfix = rec->timestamp_filename ?
1879                                         ".<timestamp>" : "";
1880
1881                 if (rec->samples && !rec->opts.full_auxtrace)
1882                         scnprintf(samples, sizeof(samples),
1883                                   " (%" PRIu64 " samples)", rec->samples);
1884                 else
1885                         samples[0] = '\0';
1886
1887                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1888                         perf_data__size(data) / 1024.0 / 1024.0,
1889                         data->path, postfix, samples);
1890                 if (ratio) {
1891                         fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1892                                         rec->session->bytes_transferred / 1024.0 / 1024.0,
1893                                         ratio);
1894                 }
1895                 fprintf(stderr, " ]\n");
1896         }
1897
1898 out_delete_session:
1899         zstd_fini(&session->zstd_data);
1900         perf_session__delete(session);
1901
1902         if (!opts->no_bpf_event)
1903                 perf_evlist__stop_sb_thread(rec->sb_evlist);
1904         return status;
1905 }
1906
1907 static void callchain_debug(struct callchain_param *callchain)
1908 {
1909         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1910
1911         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1912
1913         if (callchain->record_mode == CALLCHAIN_DWARF)
1914                 pr_debug("callchain: stack dump size %d\n",
1915                          callchain->dump_size);
1916 }
1917
1918 int record_opts__parse_callchain(struct record_opts *record,
1919                                  struct callchain_param *callchain,
1920                                  const char *arg, bool unset)
1921 {
1922         int ret;
1923         callchain->enabled = !unset;
1924
1925         /* --no-call-graph */
1926         if (unset) {
1927                 callchain->record_mode = CALLCHAIN_NONE;
1928                 pr_debug("callchain: disabled\n");
1929                 return 0;
1930         }
1931
1932         ret = parse_callchain_record_opt(arg, callchain);
1933         if (!ret) {
1934                 /* Enable data address sampling for DWARF unwind. */
1935                 if (callchain->record_mode == CALLCHAIN_DWARF)
1936                         record->sample_address = true;
1937                 callchain_debug(callchain);
1938         }
1939
1940         return ret;
1941 }
1942
1943 int record_parse_callchain_opt(const struct option *opt,
1944                                const char *arg,
1945                                int unset)
1946 {
1947         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1948 }
1949
1950 int record_callchain_opt(const struct option *opt,
1951                          const char *arg __maybe_unused,
1952                          int unset __maybe_unused)
1953 {
1954         struct callchain_param *callchain = opt->value;
1955
1956         callchain->enabled = true;
1957
1958         if (callchain->record_mode == CALLCHAIN_NONE)
1959                 callchain->record_mode = CALLCHAIN_FP;
1960
1961         callchain_debug(callchain);
1962         return 0;
1963 }
1964
1965 static int perf_record_config(const char *var, const char *value, void *cb)
1966 {
1967         struct record *rec = cb;
1968
1969         if (!strcmp(var, "record.build-id")) {
1970                 if (!strcmp(value, "cache"))
1971                         rec->no_buildid_cache = false;
1972                 else if (!strcmp(value, "no-cache"))
1973                         rec->no_buildid_cache = true;
1974                 else if (!strcmp(value, "skip"))
1975                         rec->no_buildid = true;
1976                 else
1977                         return -1;
1978                 return 0;
1979         }
1980         if (!strcmp(var, "record.call-graph")) {
1981                 var = "call-graph.record-mode";
1982                 return perf_default_config(var, value, cb);
1983         }
1984 #ifdef HAVE_AIO_SUPPORT
1985         if (!strcmp(var, "record.aio")) {
1986                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1987                 if (!rec->opts.nr_cblocks)
1988                         rec->opts.nr_cblocks = nr_cblocks_default;
1989         }
1990 #endif
1991
1992         return 0;
1993 }
1994
1995 struct clockid_map {
1996         const char *name;
1997         int clockid;
1998 };
1999
2000 #define CLOCKID_MAP(n, c)       \
2001         { .name = n, .clockid = (c), }
2002
2003 #define CLOCKID_END     { .name = NULL, }
2004
2005
2006 /*
2007  * Add the missing ones, we need to build on many distros...
2008  */
2009 #ifndef CLOCK_MONOTONIC_RAW
2010 #define CLOCK_MONOTONIC_RAW 4
2011 #endif
2012 #ifndef CLOCK_BOOTTIME
2013 #define CLOCK_BOOTTIME 7
2014 #endif
2015 #ifndef CLOCK_TAI
2016 #define CLOCK_TAI 11
2017 #endif
2018
2019 static const struct clockid_map clockids[] = {
2020         /* available for all events, NMI safe */
2021         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2022         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2023
2024         /* available for some events */
2025         CLOCKID_MAP("realtime", CLOCK_REALTIME),
2026         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2027         CLOCKID_MAP("tai", CLOCK_TAI),
2028
2029         /* available for the lazy */
2030         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2031         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2032         CLOCKID_MAP("real", CLOCK_REALTIME),
2033         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2034
2035         CLOCKID_END,
2036 };
2037
2038 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2039 {
2040         struct timespec res;
2041
2042         *res_ns = 0;
2043         if (!clock_getres(clk_id, &res))
2044                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2045         else
2046                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2047
2048         return 0;
2049 }
2050
2051 static int parse_clockid(const struct option *opt, const char *str, int unset)
2052 {
2053         struct record_opts *opts = (struct record_opts *)opt->value;
2054         const struct clockid_map *cm;
2055         const char *ostr = str;
2056
2057         if (unset) {
2058                 opts->use_clockid = 0;
2059                 return 0;
2060         }
2061
2062         /* no arg passed */
2063         if (!str)
2064                 return 0;
2065
2066         /* no setting it twice */
2067         if (opts->use_clockid)
2068                 return -1;
2069
2070         opts->use_clockid = true;
2071
2072         /* if its a number, we're done */
2073         if (sscanf(str, "%d", &opts->clockid) == 1)
2074                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2075
2076         /* allow a "CLOCK_" prefix to the name */
2077         if (!strncasecmp(str, "CLOCK_", 6))
2078                 str += 6;
2079
2080         for (cm = clockids; cm->name; cm++) {
2081                 if (!strcasecmp(str, cm->name)) {
2082                         opts->clockid = cm->clockid;
2083                         return get_clockid_res(opts->clockid,
2084                                                &opts->clockid_res_ns);
2085                 }
2086         }
2087
2088         opts->use_clockid = false;
2089         ui__warning("unknown clockid %s, check man page\n", ostr);
2090         return -1;
2091 }
2092
2093 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2094 {
2095         struct record_opts *opts = (struct record_opts *)opt->value;
2096
2097         if (unset || !str)
2098                 return 0;
2099
2100         if (!strcasecmp(str, "node"))
2101                 opts->affinity = PERF_AFFINITY_NODE;
2102         else if (!strcasecmp(str, "cpu"))
2103                 opts->affinity = PERF_AFFINITY_CPU;
2104
2105         return 0;
2106 }
2107
2108 static int parse_output_max_size(const struct option *opt,
2109                                  const char *str, int unset)
2110 {
2111         unsigned long *s = (unsigned long *)opt->value;
2112         static struct parse_tag tags_size[] = {
2113                 { .tag  = 'B', .mult = 1       },
2114                 { .tag  = 'K', .mult = 1 << 10 },
2115                 { .tag  = 'M', .mult = 1 << 20 },
2116                 { .tag  = 'G', .mult = 1 << 30 },
2117                 { .tag  = 0 },
2118         };
2119         unsigned long val;
2120
2121         if (unset) {
2122                 *s = 0;
2123                 return 0;
2124         }
2125
2126         val = parse_tag_value(str, tags_size);
2127         if (val != (unsigned long) -1) {
2128                 *s = val;
2129                 return 0;
2130         }
2131
2132         return -1;
2133 }
2134
2135 static int record__parse_mmap_pages(const struct option *opt,
2136                                     const char *str,
2137                                     int unset __maybe_unused)
2138 {
2139         struct record_opts *opts = opt->value;
2140         char *s, *p;
2141         unsigned int mmap_pages;
2142         int ret;
2143
2144         if (!str)
2145                 return -EINVAL;
2146
2147         s = strdup(str);
2148         if (!s)
2149                 return -ENOMEM;
2150
2151         p = strchr(s, ',');
2152         if (p)
2153                 *p = '\0';
2154
2155         if (*s) {
2156                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2157                 if (ret)
2158                         goto out_free;
2159                 opts->mmap_pages = mmap_pages;
2160         }
2161
2162         if (!p) {
2163                 ret = 0;
2164                 goto out_free;
2165         }
2166
2167         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2168         if (ret)
2169                 goto out_free;
2170
2171         opts->auxtrace_mmap_pages = mmap_pages;
2172
2173 out_free:
2174         free(s);
2175         return ret;
2176 }
2177
2178 static void switch_output_size_warn(struct record *rec)
2179 {
2180         u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2181         struct switch_output *s = &rec->switch_output;
2182
2183         wakeup_size /= 2;
2184
2185         if (s->size < wakeup_size) {
2186                 char buf[100];
2187
2188                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2189                 pr_warning("WARNING: switch-output data size lower than "
2190                            "wakeup kernel buffer size (%s) "
2191                            "expect bigger perf.data sizes\n", buf);
2192         }
2193 }
2194
2195 static int switch_output_setup(struct record *rec)
2196 {
2197         struct switch_output *s = &rec->switch_output;
2198         static struct parse_tag tags_size[] = {
2199                 { .tag  = 'B', .mult = 1       },
2200                 { .tag  = 'K', .mult = 1 << 10 },
2201                 { .tag  = 'M', .mult = 1 << 20 },
2202                 { .tag  = 'G', .mult = 1 << 30 },
2203                 { .tag  = 0 },
2204         };
2205         static struct parse_tag tags_time[] = {
2206                 { .tag  = 's', .mult = 1        },
2207                 { .tag  = 'm', .mult = 60       },
2208                 { .tag  = 'h', .mult = 60*60    },
2209                 { .tag  = 'd', .mult = 60*60*24 },
2210                 { .tag  = 0 },
2211         };
2212         unsigned long val;
2213
2214         /*
2215          * If we're using --switch-output-events, then we imply its 
2216          * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2217          *  thread to its parent.
2218          */
2219         if (rec->switch_output_event_set)
2220                 goto do_signal;
2221
2222         if (!s->set)
2223                 return 0;
2224
2225         if (!strcmp(s->str, "signal")) {
2226 do_signal:
2227                 s->signal = true;
2228                 pr_debug("switch-output with SIGUSR2 signal\n");
2229                 goto enabled;
2230         }
2231
2232         val = parse_tag_value(s->str, tags_size);
2233         if (val != (unsigned long) -1) {
2234                 s->size = val;
2235                 pr_debug("switch-output with %s size threshold\n", s->str);
2236                 goto enabled;
2237         }
2238
2239         val = parse_tag_value(s->str, tags_time);
2240         if (val != (unsigned long) -1) {
2241                 s->time = val;
2242                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2243                          s->str, s->time);
2244                 goto enabled;
2245         }
2246
2247         return -1;
2248
2249 enabled:
2250         rec->timestamp_filename = true;
2251         s->enabled              = true;
2252
2253         if (s->size && !rec->opts.no_buffering)
2254                 switch_output_size_warn(rec);
2255
2256         return 0;
2257 }
2258
2259 static const char * const __record_usage[] = {
2260         "perf record [<options>] [<command>]",
2261         "perf record [<options>] -- <command> [<options>]",
2262         NULL
2263 };
2264 const char * const *record_usage = __record_usage;
2265
2266 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2267                                   struct perf_sample *sample, struct machine *machine)
2268 {
2269         /*
2270          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2271          * no need to add them twice.
2272          */
2273         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2274                 return 0;
2275         return perf_event__process_mmap(tool, event, sample, machine);
2276 }
2277
2278 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2279                                    struct perf_sample *sample, struct machine *machine)
2280 {
2281         /*
2282          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2283          * no need to add them twice.
2284          */
2285         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2286                 return 0;
2287
2288         return perf_event__process_mmap2(tool, event, sample, machine);
2289 }
2290
2291 /*
2292  * XXX Ideally would be local to cmd_record() and passed to a record__new
2293  * because we need to have access to it in record__exit, that is called
2294  * after cmd_record() exits, but since record_options need to be accessible to
2295  * builtin-script, leave it here.
2296  *
2297  * At least we don't ouch it in all the other functions here directly.
2298  *
2299  * Just say no to tons of global variables, sigh.
2300  */
2301 static struct record record = {
2302         .opts = {
2303                 .sample_time         = true,
2304                 .mmap_pages          = UINT_MAX,
2305                 .user_freq           = UINT_MAX,
2306                 .user_interval       = ULLONG_MAX,
2307                 .freq                = 4000,
2308                 .target              = {
2309                         .uses_mmap   = true,
2310                         .default_per_cpu = true,
2311                 },
2312                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
2313                 .nr_threads_synthesize = 1,
2314         },
2315         .tool = {
2316                 .sample         = process_sample_event,
2317                 .fork           = perf_event__process_fork,
2318                 .exit           = perf_event__process_exit,
2319                 .comm           = perf_event__process_comm,
2320                 .namespaces     = perf_event__process_namespaces,
2321                 .mmap           = build_id__process_mmap,
2322                 .mmap2          = build_id__process_mmap2,
2323                 .ordered_events = true,
2324         },
2325 };
2326
2327 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2328         "\n\t\t\t\tDefault: fp";
2329
2330 static bool dry_run;
2331
2332 /*
2333  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2334  * with it and switch to use the library functions in perf_evlist that came
2335  * from builtin-record.c, i.e. use record_opts,
2336  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2337  * using pipes, etc.
2338  */
2339 static struct option __record_options[] = {
2340         OPT_CALLBACK('e', "event", &record.evlist, "event",
2341                      "event selector. use 'perf list' to list available events",
2342                      parse_events_option),
2343         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2344                      "event filter", parse_filter),
2345         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2346                            NULL, "don't record events from perf itself",
2347                            exclude_perf),
2348         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2349                     "record events on existing process id"),
2350         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2351                     "record events on existing thread id"),
2352         OPT_INTEGER('r', "realtime", &record.realtime_prio,
2353                     "collect data with this RT SCHED_FIFO priority"),
2354         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2355                     "collect data without buffering"),
2356         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2357                     "collect raw sample records from all opened counters"),
2358         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2359                             "system-wide collection from all CPUs"),
2360         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2361                     "list of cpus to monitor"),
2362         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2363         OPT_STRING('o', "output", &record.data.path, "file",
2364                     "output file name"),
2365         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2366                         &record.opts.no_inherit_set,
2367                         "child tasks do not inherit counters"),
2368         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2369                     "synthesize non-sample events at the end of output"),
2370         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2371         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2372         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2373                     "Fail if the specified frequency can't be used"),
2374         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2375                      "profile at this frequency",
2376                       record__parse_freq),
2377         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2378                      "number of mmap data pages and AUX area tracing mmap pages",
2379                      record__parse_mmap_pages),
2380         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2381                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2382                      record__mmap_flush_parse),
2383         OPT_BOOLEAN(0, "group", &record.opts.group,
2384                     "put the counters into a counter group"),
2385         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2386                            NULL, "enables call-graph recording" ,
2387                            &record_callchain_opt),
2388         OPT_CALLBACK(0, "call-graph", &record.opts,
2389                      "record_mode[,record_size]", record_callchain_help,
2390                      &record_parse_callchain_opt),
2391         OPT_INCR('v', "verbose", &verbose,
2392                     "be more verbose (show counter open errors, etc)"),
2393         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2394         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2395                     "per thread counts"),
2396         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2397         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2398                     "Record the sample physical addresses"),
2399         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2400         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2401                         &record.opts.sample_time_set,
2402                         "Record the sample timestamps"),
2403         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2404                         "Record the sample period"),
2405         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2406                     "don't sample"),
2407         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2408                         &record.no_buildid_cache_set,
2409                         "do not update the buildid cache"),
2410         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2411                         &record.no_buildid_set,
2412                         "do not collect buildids in perf.data"),
2413         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2414                      "monitor event in cgroup name only",
2415                      parse_cgroups),
2416         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2417                   "ms to wait before starting measurement after program start"),
2418         OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2419         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2420                    "user to profile"),
2421
2422         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2423                      "branch any", "sample any taken branches",
2424                      parse_branch_stack),
2425
2426         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2427                      "branch filter mask", "branch stack filter modes",
2428                      parse_branch_stack),
2429         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2430                     "sample by weight (on special events only)"),
2431         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2432                     "sample transaction flags (special events only)"),
2433         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2434                     "use per-thread mmaps"),
2435         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2436                     "sample selected machine registers on interrupt,"
2437                     " use '-I?' to list register names", parse_intr_regs),
2438         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2439                     "sample selected machine registers on interrupt,"
2440                     " use '--user-regs=?' to list register names", parse_user_regs),
2441         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2442                     "Record running/enabled time of read (:S) events"),
2443         OPT_CALLBACK('k', "clockid", &record.opts,
2444         "clockid", "clockid to use for events, see clock_gettime()",
2445         parse_clockid),
2446         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2447                           "opts", "AUX area tracing Snapshot Mode", ""),
2448         OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2449                           "opts", "sample AUX area", ""),
2450         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2451                         "per thread proc mmap processing timeout in ms"),
2452         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2453                     "Record namespaces events"),
2454         OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2455                     "Record cgroup events"),
2456         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2457                     "Record context switch events"),
2458         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2459                          "Configure all used events to run in kernel space.",
2460                          PARSE_OPT_EXCLUSIVE),
2461         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2462                          "Configure all used events to run in user space.",
2463                          PARSE_OPT_EXCLUSIVE),
2464         OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2465                     "collect kernel callchains"),
2466         OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2467                     "collect user callchains"),
2468         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2469                    "clang binary to use for compiling BPF scriptlets"),
2470         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2471                    "options passed to clang when compiling BPF scriptlets"),
2472         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2473                    "file", "vmlinux pathname"),
2474         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2475                     "Record build-id of all DSOs regardless of hits"),
2476         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2477                     "append timestamp to output filename"),
2478         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2479                     "Record timestamp boundary (time of first/last samples)"),
2480         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2481                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2482                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2483                           "signal"),
2484         OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2485                          "switch output event selector. use 'perf list' to list available events",
2486                          parse_events_option_new_evlist),
2487         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2488                    "Limit number of switch output generated files"),
2489         OPT_BOOLEAN(0, "dry-run", &dry_run,
2490                     "Parse options then exit"),
2491 #ifdef HAVE_AIO_SUPPORT
2492         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2493                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2494                      record__aio_parse),
2495 #endif
2496         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2497                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2498                      record__parse_affinity),
2499 #ifdef HAVE_ZSTD_SUPPORT
2500         OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2501                             "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2502                             record__parse_comp_level),
2503 #endif
2504         OPT_CALLBACK(0, "max-size", &record.output_max_size,
2505                      "size", "Limit the maximum size of the output file", parse_output_max_size),
2506         OPT_UINTEGER(0, "num-thread-synthesize",
2507                      &record.opts.nr_threads_synthesize,
2508                      "number of threads to run for event synthesis"),
2509         OPT_END()
2510 };
2511
2512 struct option *record_options = __record_options;
2513
2514 int cmd_record(int argc, const char **argv)
2515 {
2516         int err;
2517         struct record *rec = &record;
2518         char errbuf[BUFSIZ];
2519
2520         setlocale(LC_ALL, "");
2521
2522 #ifndef HAVE_LIBBPF_SUPPORT
2523 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2524         set_nobuild('\0', "clang-path", true);
2525         set_nobuild('\0', "clang-opt", true);
2526 # undef set_nobuild
2527 #endif
2528
2529 #ifndef HAVE_BPF_PROLOGUE
2530 # if !defined (HAVE_DWARF_SUPPORT)
2531 #  define REASON  "NO_DWARF=1"
2532 # elif !defined (HAVE_LIBBPF_SUPPORT)
2533 #  define REASON  "NO_LIBBPF=1"
2534 # else
2535 #  define REASON  "this architecture doesn't support BPF prologue"
2536 # endif
2537 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2538         set_nobuild('\0', "vmlinux", true);
2539 # undef set_nobuild
2540 # undef REASON
2541 #endif
2542
2543         rec->opts.affinity = PERF_AFFINITY_SYS;
2544
2545         rec->evlist = evlist__new();
2546         if (rec->evlist == NULL)
2547                 return -ENOMEM;
2548
2549         err = perf_config(perf_record_config, rec);
2550         if (err)
2551                 return err;
2552
2553         argc = parse_options(argc, argv, record_options, record_usage,
2554                             PARSE_OPT_STOP_AT_NON_OPTION);
2555         if (quiet)
2556                 perf_quiet_option();
2557
2558         /* Make system wide (-a) the default target. */
2559         if (!argc && target__none(&rec->opts.target))
2560                 rec->opts.target.system_wide = true;
2561
2562         if (nr_cgroups && !rec->opts.target.system_wide) {
2563                 usage_with_options_msg(record_usage, record_options,
2564                         "cgroup monitoring only available in system-wide mode");
2565
2566         }
2567
2568         if (rec->opts.kcore)
2569                 rec->data.is_dir = true;
2570
2571         if (rec->opts.comp_level != 0) {
2572                 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2573                 rec->no_buildid = true;
2574         }
2575
2576         if (rec->opts.record_switch_events &&
2577             !perf_can_record_switch_events()) {
2578                 ui__error("kernel does not support recording context switch events\n");
2579                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2580                 return -EINVAL;
2581         }
2582
2583         if (switch_output_setup(rec)) {
2584                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2585                 return -EINVAL;
2586         }
2587
2588         if (rec->switch_output.time) {
2589                 signal(SIGALRM, alarm_sig_handler);
2590                 alarm(rec->switch_output.time);
2591         }
2592
2593         if (rec->switch_output.num_files) {
2594                 rec->switch_output.filenames = calloc(sizeof(char *),
2595                                                       rec->switch_output.num_files);
2596                 if (!rec->switch_output.filenames)
2597                         return -EINVAL;
2598         }
2599
2600         /*
2601          * Allow aliases to facilitate the lookup of symbols for address
2602          * filters. Refer to auxtrace_parse_filters().
2603          */
2604         symbol_conf.allow_aliases = true;
2605
2606         symbol__init(NULL);
2607
2608         if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2609                 rec->affinity_mask.nbits = cpu__max_cpu();
2610                 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2611                 if (!rec->affinity_mask.bits) {
2612                         pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2613                         return -ENOMEM;
2614                 }
2615                 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2616         }
2617
2618         err = record__auxtrace_init(rec);
2619         if (err)
2620                 goto out;
2621
2622         if (dry_run)
2623                 goto out;
2624
2625         err = bpf__setup_stdout(rec->evlist);
2626         if (err) {
2627                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2628                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2629                          errbuf);
2630                 goto out;
2631         }
2632
2633         err = -ENOMEM;
2634
2635         if (rec->no_buildid_cache || rec->no_buildid) {
2636                 disable_buildid_cache();
2637         } else if (rec->switch_output.enabled) {
2638                 /*
2639                  * In 'perf record --switch-output', disable buildid
2640                  * generation by default to reduce data file switching
2641                  * overhead. Still generate buildid if they are required
2642                  * explicitly using
2643                  *
2644                  *  perf record --switch-output --no-no-buildid \
2645                  *              --no-no-buildid-cache
2646                  *
2647                  * Following code equals to:
2648                  *
2649                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2650                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2651                  *         disable_buildid_cache();
2652                  */
2653                 bool disable = true;
2654
2655                 if (rec->no_buildid_set && !rec->no_buildid)
2656                         disable = false;
2657                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2658                         disable = false;
2659                 if (disable) {
2660                         rec->no_buildid = true;
2661                         rec->no_buildid_cache = true;
2662                         disable_buildid_cache();
2663                 }
2664         }
2665
2666         if (record.opts.overwrite)
2667                 record.opts.tail_synthesize = true;
2668
2669         if (rec->evlist->core.nr_entries == 0 &&
2670             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2671                 pr_err("Not enough memory for event selector list\n");
2672                 goto out;
2673         }
2674
2675         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2676                 rec->opts.no_inherit = true;
2677
2678         err = target__validate(&rec->opts.target);
2679         if (err) {
2680                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2681                 ui__warning("%s\n", errbuf);
2682         }
2683
2684         err = target__parse_uid(&rec->opts.target);
2685         if (err) {
2686                 int saved_errno = errno;
2687
2688                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2689                 ui__error("%s", errbuf);
2690
2691                 err = -saved_errno;
2692                 goto out;
2693         }
2694
2695         /* Enable ignoring missing threads when -u/-p option is defined. */
2696         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2697
2698         err = -ENOMEM;
2699         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2700                 usage_with_options(record_usage, record_options);
2701
2702         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2703         if (err)
2704                 goto out;
2705
2706         /*
2707          * We take all buildids when the file contains
2708          * AUX area tracing data because we do not decode the
2709          * trace because it would take too long.
2710          */
2711         if (rec->opts.full_auxtrace)
2712                 rec->buildid_all = true;
2713
2714         if (record_opts__config(&rec->opts)) {
2715                 err = -EINVAL;
2716                 goto out;
2717         }
2718
2719         if (rec->opts.nr_cblocks > nr_cblocks_max)
2720                 rec->opts.nr_cblocks = nr_cblocks_max;
2721         pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2722
2723         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2724         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2725
2726         if (rec->opts.comp_level > comp_level_max)
2727                 rec->opts.comp_level = comp_level_max;
2728         pr_debug("comp level: %d\n", rec->opts.comp_level);
2729
2730         err = __cmd_record(&record, argc, argv);
2731 out:
2732         bitmap_free(rec->affinity_mask.bits);
2733         evlist__delete(rec->evlist);
2734         symbol__exit();
2735         auxtrace_record__free(rec->itr);
2736         return err;
2737 }
2738
2739 static void snapshot_sig_handler(int sig __maybe_unused)
2740 {
2741         struct record *rec = &record;
2742
2743         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2744                 trigger_hit(&auxtrace_snapshot_trigger);
2745                 auxtrace_record__snapshot_started = 1;
2746                 if (auxtrace_record__snapshot_start(record.itr))
2747                         trigger_error(&auxtrace_snapshot_trigger);
2748         }
2749
2750         if (switch_output_signal(rec))
2751                 trigger_hit(&switch_output_trigger);
2752 }
2753
2754 static void alarm_sig_handler(int sig __maybe_unused)
2755 {
2756         struct record *rec = &record;
2757
2758         if (switch_output_time(rec))
2759                 trigger_hit(&switch_output_trigger);
2760 }