Merge tag 'libnvdimm-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
[linux-2.6-microblaze.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "asm/bug.h"
50 #include "perf.h"
51
52 #include <errno.h>
53 #include <inttypes.h>
54 #include <locale.h>
55 #include <poll.h>
56 #include <pthread.h>
57 #include <unistd.h>
58 #include <sched.h>
59 #include <signal.h>
60 #ifdef HAVE_EVENTFD_SUPPORT
61 #include <sys/eventfd.h>
62 #endif
63 #include <sys/mman.h>
64 #include <sys/wait.h>
65 #include <sys/types.h>
66 #include <sys/stat.h>
67 #include <fcntl.h>
68 #include <linux/err.h>
69 #include <linux/string.h>
70 #include <linux/time64.h>
71 #include <linux/zalloc.h>
72 #include <linux/bitmap.h>
73
74 struct switch_output {
75         bool             enabled;
76         bool             signal;
77         unsigned long    size;
78         unsigned long    time;
79         const char      *str;
80         bool             set;
81         char             **filenames;
82         int              num_files;
83         int              cur_file;
84 };
85
86 struct record {
87         struct perf_tool        tool;
88         struct record_opts      opts;
89         u64                     bytes_written;
90         struct perf_data        data;
91         struct auxtrace_record  *itr;
92         struct evlist   *evlist;
93         struct perf_session     *session;
94         struct evlist           *sb_evlist;
95         pthread_t               thread_id;
96         int                     realtime_prio;
97         bool                    switch_output_event_set;
98         bool                    no_buildid;
99         bool                    no_buildid_set;
100         bool                    no_buildid_cache;
101         bool                    no_buildid_cache_set;
102         bool                    buildid_all;
103         bool                    timestamp_filename;
104         bool                    timestamp_boundary;
105         struct switch_output    switch_output;
106         unsigned long long      samples;
107         struct mmap_cpu_mask    affinity_mask;
108         unsigned long           output_max_size;        /* = 0: unlimited */
109 };
110
111 static volatile int done;
112
113 static volatile int auxtrace_record__snapshot_started;
114 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
115 static DEFINE_TRIGGER(switch_output_trigger);
116
117 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
118         "SYS", "NODE", "CPU"
119 };
120
121 static bool switch_output_signal(struct record *rec)
122 {
123         return rec->switch_output.signal &&
124                trigger_is_ready(&switch_output_trigger);
125 }
126
127 static bool switch_output_size(struct record *rec)
128 {
129         return rec->switch_output.size &&
130                trigger_is_ready(&switch_output_trigger) &&
131                (rec->bytes_written >= rec->switch_output.size);
132 }
133
134 static bool switch_output_time(struct record *rec)
135 {
136         return rec->switch_output.time &&
137                trigger_is_ready(&switch_output_trigger);
138 }
139
140 static bool record__output_max_size_exceeded(struct record *rec)
141 {
142         return rec->output_max_size &&
143                (rec->bytes_written >= rec->output_max_size);
144 }
145
146 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
147                          void *bf, size_t size)
148 {
149         struct perf_data_file *file = &rec->session->data->file;
150
151         if (perf_data_file__write(file, bf, size) < 0) {
152                 pr_err("failed to write perf data, error: %m\n");
153                 return -1;
154         }
155
156         rec->bytes_written += size;
157
158         if (record__output_max_size_exceeded(rec) && !done) {
159                 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
160                                 " stopping session ]\n",
161                                 rec->bytes_written >> 10);
162                 done = 1;
163         }
164
165         if (switch_output_size(rec))
166                 trigger_hit(&switch_output_trigger);
167
168         return 0;
169 }
170
171 static int record__aio_enabled(struct record *rec);
172 static int record__comp_enabled(struct record *rec);
173 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
174                             void *src, size_t src_size);
175
176 #ifdef HAVE_AIO_SUPPORT
177 static int record__aio_write(struct aiocb *cblock, int trace_fd,
178                 void *buf, size_t size, off_t off)
179 {
180         int rc;
181
182         cblock->aio_fildes = trace_fd;
183         cblock->aio_buf    = buf;
184         cblock->aio_nbytes = size;
185         cblock->aio_offset = off;
186         cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
187
188         do {
189                 rc = aio_write(cblock);
190                 if (rc == 0) {
191                         break;
192                 } else if (errno != EAGAIN) {
193                         cblock->aio_fildes = -1;
194                         pr_err("failed to queue perf data, error: %m\n");
195                         break;
196                 }
197         } while (1);
198
199         return rc;
200 }
201
202 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
203 {
204         void *rem_buf;
205         off_t rem_off;
206         size_t rem_size;
207         int rc, aio_errno;
208         ssize_t aio_ret, written;
209
210         aio_errno = aio_error(cblock);
211         if (aio_errno == EINPROGRESS)
212                 return 0;
213
214         written = aio_ret = aio_return(cblock);
215         if (aio_ret < 0) {
216                 if (aio_errno != EINTR)
217                         pr_err("failed to write perf data, error: %m\n");
218                 written = 0;
219         }
220
221         rem_size = cblock->aio_nbytes - written;
222
223         if (rem_size == 0) {
224                 cblock->aio_fildes = -1;
225                 /*
226                  * md->refcount is incremented in record__aio_pushfn() for
227                  * every aio write request started in record__aio_push() so
228                  * decrement it because the request is now complete.
229                  */
230                 perf_mmap__put(&md->core);
231                 rc = 1;
232         } else {
233                 /*
234                  * aio write request may require restart with the
235                  * reminder if the kernel didn't write whole
236                  * chunk at once.
237                  */
238                 rem_off = cblock->aio_offset + written;
239                 rem_buf = (void *)(cblock->aio_buf + written);
240                 record__aio_write(cblock, cblock->aio_fildes,
241                                 rem_buf, rem_size, rem_off);
242                 rc = 0;
243         }
244
245         return rc;
246 }
247
248 static int record__aio_sync(struct mmap *md, bool sync_all)
249 {
250         struct aiocb **aiocb = md->aio.aiocb;
251         struct aiocb *cblocks = md->aio.cblocks;
252         struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
253         int i, do_suspend;
254
255         do {
256                 do_suspend = 0;
257                 for (i = 0; i < md->aio.nr_cblocks; ++i) {
258                         if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
259                                 if (sync_all)
260                                         aiocb[i] = NULL;
261                                 else
262                                         return i;
263                         } else {
264                                 /*
265                                  * Started aio write is not complete yet
266                                  * so it has to be waited before the
267                                  * next allocation.
268                                  */
269                                 aiocb[i] = &cblocks[i];
270                                 do_suspend = 1;
271                         }
272                 }
273                 if (!do_suspend)
274                         return -1;
275
276                 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
277                         if (!(errno == EAGAIN || errno == EINTR))
278                                 pr_err("failed to sync perf data, error: %m\n");
279                 }
280         } while (1);
281 }
282
283 struct record_aio {
284         struct record   *rec;
285         void            *data;
286         size_t          size;
287 };
288
289 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
290 {
291         struct record_aio *aio = to;
292
293         /*
294          * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
295          * to release space in the kernel buffer as fast as possible, calling
296          * perf_mmap__consume() from perf_mmap__push() function.
297          *
298          * That lets the kernel to proceed with storing more profiling data into
299          * the kernel buffer earlier than other per-cpu kernel buffers are handled.
300          *
301          * Coping can be done in two steps in case the chunk of profiling data
302          * crosses the upper bound of the kernel buffer. In this case we first move
303          * part of data from map->start till the upper bound and then the reminder
304          * from the beginning of the kernel buffer till the end of the data chunk.
305          */
306
307         if (record__comp_enabled(aio->rec)) {
308                 size = zstd_compress(aio->rec->session, aio->data + aio->size,
309                                      mmap__mmap_len(map) - aio->size,
310                                      buf, size);
311         } else {
312                 memcpy(aio->data + aio->size, buf, size);
313         }
314
315         if (!aio->size) {
316                 /*
317                  * Increment map->refcount to guard map->aio.data[] buffer
318                  * from premature deallocation because map object can be
319                  * released earlier than aio write request started on
320                  * map->aio.data[] buffer is complete.
321                  *
322                  * perf_mmap__put() is done at record__aio_complete()
323                  * after started aio request completion or at record__aio_push()
324                  * if the request failed to start.
325                  */
326                 perf_mmap__get(&map->core);
327         }
328
329         aio->size += size;
330
331         return size;
332 }
333
334 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
335 {
336         int ret, idx;
337         int trace_fd = rec->session->data->file.fd;
338         struct record_aio aio = { .rec = rec, .size = 0 };
339
340         /*
341          * Call record__aio_sync() to wait till map->aio.data[] buffer
342          * becomes available after previous aio write operation.
343          */
344
345         idx = record__aio_sync(map, false);
346         aio.data = map->aio.data[idx];
347         ret = perf_mmap__push(map, &aio, record__aio_pushfn);
348         if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
349                 return ret;
350
351         rec->samples++;
352         ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
353         if (!ret) {
354                 *off += aio.size;
355                 rec->bytes_written += aio.size;
356                 if (switch_output_size(rec))
357                         trigger_hit(&switch_output_trigger);
358         } else {
359                 /*
360                  * Decrement map->refcount incremented in record__aio_pushfn()
361                  * back if record__aio_write() operation failed to start, otherwise
362                  * map->refcount is decremented in record__aio_complete() after
363                  * aio write operation finishes successfully.
364                  */
365                 perf_mmap__put(&map->core);
366         }
367
368         return ret;
369 }
370
371 static off_t record__aio_get_pos(int trace_fd)
372 {
373         return lseek(trace_fd, 0, SEEK_CUR);
374 }
375
376 static void record__aio_set_pos(int trace_fd, off_t pos)
377 {
378         lseek(trace_fd, pos, SEEK_SET);
379 }
380
381 static void record__aio_mmap_read_sync(struct record *rec)
382 {
383         int i;
384         struct evlist *evlist = rec->evlist;
385         struct mmap *maps = evlist->mmap;
386
387         if (!record__aio_enabled(rec))
388                 return;
389
390         for (i = 0; i < evlist->core.nr_mmaps; i++) {
391                 struct mmap *map = &maps[i];
392
393                 if (map->core.base)
394                         record__aio_sync(map, true);
395         }
396 }
397
398 static int nr_cblocks_default = 1;
399 static int nr_cblocks_max = 4;
400
401 static int record__aio_parse(const struct option *opt,
402                              const char *str,
403                              int unset)
404 {
405         struct record_opts *opts = (struct record_opts *)opt->value;
406
407         if (unset) {
408                 opts->nr_cblocks = 0;
409         } else {
410                 if (str)
411                         opts->nr_cblocks = strtol(str, NULL, 0);
412                 if (!opts->nr_cblocks)
413                         opts->nr_cblocks = nr_cblocks_default;
414         }
415
416         return 0;
417 }
418 #else /* HAVE_AIO_SUPPORT */
419 static int nr_cblocks_max = 0;
420
421 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
422                             off_t *off __maybe_unused)
423 {
424         return -1;
425 }
426
427 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
428 {
429         return -1;
430 }
431
432 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
433 {
434 }
435
436 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
437 {
438 }
439 #endif
440
441 static int record__aio_enabled(struct record *rec)
442 {
443         return rec->opts.nr_cblocks > 0;
444 }
445
446 #define MMAP_FLUSH_DEFAULT 1
447 static int record__mmap_flush_parse(const struct option *opt,
448                                     const char *str,
449                                     int unset)
450 {
451         int flush_max;
452         struct record_opts *opts = (struct record_opts *)opt->value;
453         static struct parse_tag tags[] = {
454                         { .tag  = 'B', .mult = 1       },
455                         { .tag  = 'K', .mult = 1 << 10 },
456                         { .tag  = 'M', .mult = 1 << 20 },
457                         { .tag  = 'G', .mult = 1 << 30 },
458                         { .tag  = 0 },
459         };
460
461         if (unset)
462                 return 0;
463
464         if (str) {
465                 opts->mmap_flush = parse_tag_value(str, tags);
466                 if (opts->mmap_flush == (int)-1)
467                         opts->mmap_flush = strtol(str, NULL, 0);
468         }
469
470         if (!opts->mmap_flush)
471                 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
472
473         flush_max = evlist__mmap_size(opts->mmap_pages);
474         flush_max /= 4;
475         if (opts->mmap_flush > flush_max)
476                 opts->mmap_flush = flush_max;
477
478         return 0;
479 }
480
481 #ifdef HAVE_ZSTD_SUPPORT
482 static unsigned int comp_level_default = 1;
483
484 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
485 {
486         struct record_opts *opts = opt->value;
487
488         if (unset) {
489                 opts->comp_level = 0;
490         } else {
491                 if (str)
492                         opts->comp_level = strtol(str, NULL, 0);
493                 if (!opts->comp_level)
494                         opts->comp_level = comp_level_default;
495         }
496
497         return 0;
498 }
499 #endif
500 static unsigned int comp_level_max = 22;
501
502 static int record__comp_enabled(struct record *rec)
503 {
504         return rec->opts.comp_level > 0;
505 }
506
507 static int process_synthesized_event(struct perf_tool *tool,
508                                      union perf_event *event,
509                                      struct perf_sample *sample __maybe_unused,
510                                      struct machine *machine __maybe_unused)
511 {
512         struct record *rec = container_of(tool, struct record, tool);
513         return record__write(rec, NULL, event, event->header.size);
514 }
515
516 static int process_locked_synthesized_event(struct perf_tool *tool,
517                                      union perf_event *event,
518                                      struct perf_sample *sample __maybe_unused,
519                                      struct machine *machine __maybe_unused)
520 {
521         static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
522         int ret;
523
524         pthread_mutex_lock(&synth_lock);
525         ret = process_synthesized_event(tool, event, sample, machine);
526         pthread_mutex_unlock(&synth_lock);
527         return ret;
528 }
529
530 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
531 {
532         struct record *rec = to;
533
534         if (record__comp_enabled(rec)) {
535                 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
536                 bf   = map->data;
537         }
538
539         rec->samples++;
540         return record__write(rec, map, bf, size);
541 }
542
543 static volatile int signr = -1;
544 static volatile int child_finished;
545 #ifdef HAVE_EVENTFD_SUPPORT
546 static int done_fd = -1;
547 #endif
548
549 static void sig_handler(int sig)
550 {
551         if (sig == SIGCHLD)
552                 child_finished = 1;
553         else
554                 signr = sig;
555
556         done = 1;
557 #ifdef HAVE_EVENTFD_SUPPORT
558 {
559         u64 tmp = 1;
560         /*
561          * It is possible for this signal handler to run after done is checked
562          * in the main loop, but before the perf counter fds are polled. If this
563          * happens, the poll() will continue to wait even though done is set,
564          * and will only break out if either another signal is received, or the
565          * counters are ready for read. To ensure the poll() doesn't sleep when
566          * done is set, use an eventfd (done_fd) to wake up the poll().
567          */
568         if (write(done_fd, &tmp, sizeof(tmp)) < 0)
569                 pr_err("failed to signal wakeup fd, error: %m\n");
570 }
571 #endif // HAVE_EVENTFD_SUPPORT
572 }
573
574 static void sigsegv_handler(int sig)
575 {
576         perf_hooks__recover();
577         sighandler_dump_stack(sig);
578 }
579
580 static void record__sig_exit(void)
581 {
582         if (signr == -1)
583                 return;
584
585         signal(signr, SIG_DFL);
586         raise(signr);
587 }
588
589 #ifdef HAVE_AUXTRACE_SUPPORT
590
591 static int record__process_auxtrace(struct perf_tool *tool,
592                                     struct mmap *map,
593                                     union perf_event *event, void *data1,
594                                     size_t len1, void *data2, size_t len2)
595 {
596         struct record *rec = container_of(tool, struct record, tool);
597         struct perf_data *data = &rec->data;
598         size_t padding;
599         u8 pad[8] = {0};
600
601         if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
602                 off_t file_offset;
603                 int fd = perf_data__fd(data);
604                 int err;
605
606                 file_offset = lseek(fd, 0, SEEK_CUR);
607                 if (file_offset == -1)
608                         return -1;
609                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
610                                                      event, file_offset);
611                 if (err)
612                         return err;
613         }
614
615         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
616         padding = (len1 + len2) & 7;
617         if (padding)
618                 padding = 8 - padding;
619
620         record__write(rec, map, event, event->header.size);
621         record__write(rec, map, data1, len1);
622         if (len2)
623                 record__write(rec, map, data2, len2);
624         record__write(rec, map, &pad, padding);
625
626         return 0;
627 }
628
629 static int record__auxtrace_mmap_read(struct record *rec,
630                                       struct mmap *map)
631 {
632         int ret;
633
634         ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
635                                   record__process_auxtrace);
636         if (ret < 0)
637                 return ret;
638
639         if (ret)
640                 rec->samples++;
641
642         return 0;
643 }
644
645 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
646                                                struct mmap *map)
647 {
648         int ret;
649
650         ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
651                                            record__process_auxtrace,
652                                            rec->opts.auxtrace_snapshot_size);
653         if (ret < 0)
654                 return ret;
655
656         if (ret)
657                 rec->samples++;
658
659         return 0;
660 }
661
662 static int record__auxtrace_read_snapshot_all(struct record *rec)
663 {
664         int i;
665         int rc = 0;
666
667         for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
668                 struct mmap *map = &rec->evlist->mmap[i];
669
670                 if (!map->auxtrace_mmap.base)
671                         continue;
672
673                 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
674                         rc = -1;
675                         goto out;
676                 }
677         }
678 out:
679         return rc;
680 }
681
682 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
683 {
684         pr_debug("Recording AUX area tracing snapshot\n");
685         if (record__auxtrace_read_snapshot_all(rec) < 0) {
686                 trigger_error(&auxtrace_snapshot_trigger);
687         } else {
688                 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
689                         trigger_error(&auxtrace_snapshot_trigger);
690                 else
691                         trigger_ready(&auxtrace_snapshot_trigger);
692         }
693 }
694
695 static int record__auxtrace_snapshot_exit(struct record *rec)
696 {
697         if (trigger_is_error(&auxtrace_snapshot_trigger))
698                 return 0;
699
700         if (!auxtrace_record__snapshot_started &&
701             auxtrace_record__snapshot_start(rec->itr))
702                 return -1;
703
704         record__read_auxtrace_snapshot(rec, true);
705         if (trigger_is_error(&auxtrace_snapshot_trigger))
706                 return -1;
707
708         return 0;
709 }
710
711 static int record__auxtrace_init(struct record *rec)
712 {
713         int err;
714
715         if (!rec->itr) {
716                 rec->itr = auxtrace_record__init(rec->evlist, &err);
717                 if (err)
718                         return err;
719         }
720
721         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
722                                               rec->opts.auxtrace_snapshot_opts);
723         if (err)
724                 return err;
725
726         err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
727                                             rec->opts.auxtrace_sample_opts);
728         if (err)
729                 return err;
730
731         return auxtrace_parse_filters(rec->evlist);
732 }
733
734 #else
735
736 static inline
737 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
738                                struct mmap *map __maybe_unused)
739 {
740         return 0;
741 }
742
743 static inline
744 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
745                                     bool on_exit __maybe_unused)
746 {
747 }
748
749 static inline
750 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
751 {
752         return 0;
753 }
754
755 static inline
756 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
757 {
758         return 0;
759 }
760
761 static int record__auxtrace_init(struct record *rec __maybe_unused)
762 {
763         return 0;
764 }
765
766 #endif
767
768 static bool record__kcore_readable(struct machine *machine)
769 {
770         char kcore[PATH_MAX];
771         int fd;
772
773         scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
774
775         fd = open(kcore, O_RDONLY);
776         if (fd < 0)
777                 return false;
778
779         close(fd);
780
781         return true;
782 }
783
784 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
785 {
786         char from_dir[PATH_MAX];
787         char kcore_dir[PATH_MAX];
788         int ret;
789
790         snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
791
792         ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
793         if (ret)
794                 return ret;
795
796         return kcore_copy(from_dir, kcore_dir);
797 }
798
799 static int record__mmap_evlist(struct record *rec,
800                                struct evlist *evlist)
801 {
802         struct record_opts *opts = &rec->opts;
803         bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
804                                   opts->auxtrace_sample_mode;
805         char msg[512];
806
807         if (opts->affinity != PERF_AFFINITY_SYS)
808                 cpu__setup_cpunode_map();
809
810         if (evlist__mmap_ex(evlist, opts->mmap_pages,
811                                  opts->auxtrace_mmap_pages,
812                                  auxtrace_overwrite,
813                                  opts->nr_cblocks, opts->affinity,
814                                  opts->mmap_flush, opts->comp_level) < 0) {
815                 if (errno == EPERM) {
816                         pr_err("Permission error mapping pages.\n"
817                                "Consider increasing "
818                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
819                                "or try again with a smaller value of -m/--mmap_pages.\n"
820                                "(current value: %u,%u)\n",
821                                opts->mmap_pages, opts->auxtrace_mmap_pages);
822                         return -errno;
823                 } else {
824                         pr_err("failed to mmap with %d (%s)\n", errno,
825                                 str_error_r(errno, msg, sizeof(msg)));
826                         if (errno)
827                                 return -errno;
828                         else
829                                 return -EINVAL;
830                 }
831         }
832         return 0;
833 }
834
835 static int record__mmap(struct record *rec)
836 {
837         return record__mmap_evlist(rec, rec->evlist);
838 }
839
840 static int record__open(struct record *rec)
841 {
842         char msg[BUFSIZ];
843         struct evsel *pos;
844         struct evlist *evlist = rec->evlist;
845         struct perf_session *session = rec->session;
846         struct record_opts *opts = &rec->opts;
847         int rc = 0;
848
849         /*
850          * For initial_delay or system wide, we need to add a dummy event so
851          * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
852          * event synthesis.
853          */
854         if (opts->initial_delay || target__has_cpu(&opts->target)) {
855                 if (perf_evlist__add_dummy(evlist))
856                         return -ENOMEM;
857
858                 /* Disable tracking of mmaps on lead event. */
859                 pos = evlist__first(evlist);
860                 pos->tracking = 0;
861                 /* Set up dummy event. */
862                 pos = evlist__last(evlist);
863                 pos->tracking = 1;
864                 /*
865                  * Enable the dummy event when the process is forked for
866                  * initial_delay, immediately for system wide.
867                  */
868                 if (opts->initial_delay)
869                         pos->core.attr.enable_on_exec = 1;
870                 else
871                         pos->immediate = 1;
872         }
873
874         perf_evlist__config(evlist, opts, &callchain_param);
875
876         evlist__for_each_entry(evlist, pos) {
877 try_again:
878                 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
879                         if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
880                                 if (verbose > 0)
881                                         ui__warning("%s\n", msg);
882                                 goto try_again;
883                         }
884                         if ((errno == EINVAL || errno == EBADF) &&
885                             pos->leader != pos &&
886                             pos->weak_group) {
887                                 pos = perf_evlist__reset_weak_group(evlist, pos, true);
888                                 goto try_again;
889                         }
890                         rc = -errno;
891                         evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
892                         ui__error("%s\n", msg);
893                         goto out;
894                 }
895
896                 pos->supported = true;
897         }
898
899         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
900                 pr_warning(
901 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
902 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
903 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
904 "file is not found in the buildid cache or in the vmlinux path.\n\n"
905 "Samples in kernel modules won't be resolved at all.\n\n"
906 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
907 "even with a suitable vmlinux or kallsyms file.\n\n");
908         }
909
910         if (perf_evlist__apply_filters(evlist, &pos)) {
911                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
912                         pos->filter, evsel__name(pos), errno,
913                         str_error_r(errno, msg, sizeof(msg)));
914                 rc = -1;
915                 goto out;
916         }
917
918         rc = record__mmap(rec);
919         if (rc)
920                 goto out;
921
922         session->evlist = evlist;
923         perf_session__set_id_hdr_size(session);
924 out:
925         return rc;
926 }
927
928 static int process_sample_event(struct perf_tool *tool,
929                                 union perf_event *event,
930                                 struct perf_sample *sample,
931                                 struct evsel *evsel,
932                                 struct machine *machine)
933 {
934         struct record *rec = container_of(tool, struct record, tool);
935
936         if (rec->evlist->first_sample_time == 0)
937                 rec->evlist->first_sample_time = sample->time;
938
939         rec->evlist->last_sample_time = sample->time;
940
941         if (rec->buildid_all)
942                 return 0;
943
944         rec->samples++;
945         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
946 }
947
948 static int process_buildids(struct record *rec)
949 {
950         struct perf_session *session = rec->session;
951
952         if (perf_data__size(&rec->data) == 0)
953                 return 0;
954
955         /*
956          * During this process, it'll load kernel map and replace the
957          * dso->long_name to a real pathname it found.  In this case
958          * we prefer the vmlinux path like
959          *   /lib/modules/3.16.4/build/vmlinux
960          *
961          * rather than build-id path (in debug directory).
962          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
963          */
964         symbol_conf.ignore_vmlinux_buildid = true;
965
966         /*
967          * If --buildid-all is given, it marks all DSO regardless of hits,
968          * so no need to process samples. But if timestamp_boundary is enabled,
969          * it still needs to walk on all samples to get the timestamps of
970          * first/last samples.
971          */
972         if (rec->buildid_all && !rec->timestamp_boundary)
973                 rec->tool.sample = NULL;
974
975         return perf_session__process_events(session);
976 }
977
978 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
979 {
980         int err;
981         struct perf_tool *tool = data;
982         /*
983          *As for guest kernel when processing subcommand record&report,
984          *we arrange module mmap prior to guest kernel mmap and trigger
985          *a preload dso because default guest module symbols are loaded
986          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
987          *method is used to avoid symbol missing when the first addr is
988          *in module instead of in guest kernel.
989          */
990         err = perf_event__synthesize_modules(tool, process_synthesized_event,
991                                              machine);
992         if (err < 0)
993                 pr_err("Couldn't record guest kernel [%d]'s reference"
994                        " relocation symbol.\n", machine->pid);
995
996         /*
997          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
998          * have no _text sometimes.
999          */
1000         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1001                                                  machine);
1002         if (err < 0)
1003                 pr_err("Couldn't record guest kernel [%d]'s reference"
1004                        " relocation symbol.\n", machine->pid);
1005 }
1006
1007 static struct perf_event_header finished_round_event = {
1008         .size = sizeof(struct perf_event_header),
1009         .type = PERF_RECORD_FINISHED_ROUND,
1010 };
1011
1012 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1013 {
1014         if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1015             !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
1016                           rec->affinity_mask.nbits)) {
1017                 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
1018                 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
1019                           map->affinity_mask.bits, rec->affinity_mask.nbits);
1020                 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
1021                                   (cpu_set_t *)rec->affinity_mask.bits);
1022                 if (verbose == 2)
1023                         mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
1024         }
1025 }
1026
1027 static size_t process_comp_header(void *record, size_t increment)
1028 {
1029         struct perf_record_compressed *event = record;
1030         size_t size = sizeof(*event);
1031
1032         if (increment) {
1033                 event->header.size += increment;
1034                 return increment;
1035         }
1036
1037         event->header.type = PERF_RECORD_COMPRESSED;
1038         event->header.size = size;
1039
1040         return size;
1041 }
1042
1043 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1044                             void *src, size_t src_size)
1045 {
1046         size_t compressed;
1047         size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1048
1049         compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1050                                                      max_record_size, process_comp_header);
1051
1052         session->bytes_transferred += src_size;
1053         session->bytes_compressed  += compressed;
1054
1055         return compressed;
1056 }
1057
1058 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1059                                     bool overwrite, bool synch)
1060 {
1061         u64 bytes_written = rec->bytes_written;
1062         int i;
1063         int rc = 0;
1064         struct mmap *maps;
1065         int trace_fd = rec->data.file.fd;
1066         off_t off = 0;
1067
1068         if (!evlist)
1069                 return 0;
1070
1071         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1072         if (!maps)
1073                 return 0;
1074
1075         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1076                 return 0;
1077
1078         if (record__aio_enabled(rec))
1079                 off = record__aio_get_pos(trace_fd);
1080
1081         for (i = 0; i < evlist->core.nr_mmaps; i++) {
1082                 u64 flush = 0;
1083                 struct mmap *map = &maps[i];
1084
1085                 if (map->core.base) {
1086                         record__adjust_affinity(rec, map);
1087                         if (synch) {
1088                                 flush = map->core.flush;
1089                                 map->core.flush = 1;
1090                         }
1091                         if (!record__aio_enabled(rec)) {
1092                                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1093                                         if (synch)
1094                                                 map->core.flush = flush;
1095                                         rc = -1;
1096                                         goto out;
1097                                 }
1098                         } else {
1099                                 if (record__aio_push(rec, map, &off) < 0) {
1100                                         record__aio_set_pos(trace_fd, off);
1101                                         if (synch)
1102                                                 map->core.flush = flush;
1103                                         rc = -1;
1104                                         goto out;
1105                                 }
1106                         }
1107                         if (synch)
1108                                 map->core.flush = flush;
1109                 }
1110
1111                 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1112                     !rec->opts.auxtrace_sample_mode &&
1113                     record__auxtrace_mmap_read(rec, map) != 0) {
1114                         rc = -1;
1115                         goto out;
1116                 }
1117         }
1118
1119         if (record__aio_enabled(rec))
1120                 record__aio_set_pos(trace_fd, off);
1121
1122         /*
1123          * Mark the round finished in case we wrote
1124          * at least one event.
1125          */
1126         if (bytes_written != rec->bytes_written)
1127                 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1128
1129         if (overwrite)
1130                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1131 out:
1132         return rc;
1133 }
1134
1135 static int record__mmap_read_all(struct record *rec, bool synch)
1136 {
1137         int err;
1138
1139         err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1140         if (err)
1141                 return err;
1142
1143         return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1144 }
1145
1146 static void record__init_features(struct record *rec)
1147 {
1148         struct perf_session *session = rec->session;
1149         int feat;
1150
1151         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1152                 perf_header__set_feat(&session->header, feat);
1153
1154         if (rec->no_buildid)
1155                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1156
1157         if (!have_tracepoints(&rec->evlist->core.entries))
1158                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1159
1160         if (!rec->opts.branch_stack)
1161                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1162
1163         if (!rec->opts.full_auxtrace)
1164                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1165
1166         if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1167                 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1168
1169         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1170         if (!record__comp_enabled(rec))
1171                 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1172
1173         perf_header__clear_feat(&session->header, HEADER_STAT);
1174 }
1175
1176 static void
1177 record__finish_output(struct record *rec)
1178 {
1179         struct perf_data *data = &rec->data;
1180         int fd = perf_data__fd(data);
1181
1182         if (data->is_pipe)
1183                 return;
1184
1185         rec->session->header.data_size += rec->bytes_written;
1186         data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1187
1188         if (!rec->no_buildid) {
1189                 process_buildids(rec);
1190
1191                 if (rec->buildid_all)
1192                         dsos__hit_all(rec->session);
1193         }
1194         perf_session__write_header(rec->session, rec->evlist, fd, true);
1195
1196         return;
1197 }
1198
1199 static int record__synthesize_workload(struct record *rec, bool tail)
1200 {
1201         int err;
1202         struct perf_thread_map *thread_map;
1203
1204         if (rec->opts.tail_synthesize != tail)
1205                 return 0;
1206
1207         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1208         if (thread_map == NULL)
1209                 return -1;
1210
1211         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1212                                                  process_synthesized_event,
1213                                                  &rec->session->machines.host,
1214                                                  rec->opts.sample_address);
1215         perf_thread_map__put(thread_map);
1216         return err;
1217 }
1218
1219 static int record__synthesize(struct record *rec, bool tail);
1220
1221 static int
1222 record__switch_output(struct record *rec, bool at_exit)
1223 {
1224         struct perf_data *data = &rec->data;
1225         int fd, err;
1226         char *new_filename;
1227
1228         /* Same Size:      "2015122520103046"*/
1229         char timestamp[] = "InvalidTimestamp";
1230
1231         record__aio_mmap_read_sync(rec);
1232
1233         record__synthesize(rec, true);
1234         if (target__none(&rec->opts.target))
1235                 record__synthesize_workload(rec, true);
1236
1237         rec->samples = 0;
1238         record__finish_output(rec);
1239         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1240         if (err) {
1241                 pr_err("Failed to get current timestamp\n");
1242                 return -EINVAL;
1243         }
1244
1245         fd = perf_data__switch(data, timestamp,
1246                                     rec->session->header.data_offset,
1247                                     at_exit, &new_filename);
1248         if (fd >= 0 && !at_exit) {
1249                 rec->bytes_written = 0;
1250                 rec->session->header.data_size = 0;
1251         }
1252
1253         if (!quiet)
1254                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1255                         data->path, timestamp);
1256
1257         if (rec->switch_output.num_files) {
1258                 int n = rec->switch_output.cur_file + 1;
1259
1260                 if (n >= rec->switch_output.num_files)
1261                         n = 0;
1262                 rec->switch_output.cur_file = n;
1263                 if (rec->switch_output.filenames[n]) {
1264                         remove(rec->switch_output.filenames[n]);
1265                         zfree(&rec->switch_output.filenames[n]);
1266                 }
1267                 rec->switch_output.filenames[n] = new_filename;
1268         } else {
1269                 free(new_filename);
1270         }
1271
1272         /* Output tracking events */
1273         if (!at_exit) {
1274                 record__synthesize(rec, false);
1275
1276                 /*
1277                  * In 'perf record --switch-output' without -a,
1278                  * record__synthesize() in record__switch_output() won't
1279                  * generate tracking events because there's no thread_map
1280                  * in evlist. Which causes newly created perf.data doesn't
1281                  * contain map and comm information.
1282                  * Create a fake thread_map and directly call
1283                  * perf_event__synthesize_thread_map() for those events.
1284                  */
1285                 if (target__none(&rec->opts.target))
1286                         record__synthesize_workload(rec, false);
1287         }
1288         return fd;
1289 }
1290
1291 static volatile int workload_exec_errno;
1292
1293 /*
1294  * perf_evlist__prepare_workload will send a SIGUSR1
1295  * if the fork fails, since we asked by setting its
1296  * want_signal to true.
1297  */
1298 static void workload_exec_failed_signal(int signo __maybe_unused,
1299                                         siginfo_t *info,
1300                                         void *ucontext __maybe_unused)
1301 {
1302         workload_exec_errno = info->si_value.sival_int;
1303         done = 1;
1304         child_finished = 1;
1305 }
1306
1307 static void snapshot_sig_handler(int sig);
1308 static void alarm_sig_handler(int sig);
1309
1310 static const struct perf_event_mmap_page *
1311 perf_evlist__pick_pc(struct evlist *evlist)
1312 {
1313         if (evlist) {
1314                 if (evlist->mmap && evlist->mmap[0].core.base)
1315                         return evlist->mmap[0].core.base;
1316                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1317                         return evlist->overwrite_mmap[0].core.base;
1318         }
1319         return NULL;
1320 }
1321
1322 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1323 {
1324         const struct perf_event_mmap_page *pc;
1325
1326         pc = perf_evlist__pick_pc(rec->evlist);
1327         if (pc)
1328                 return pc;
1329         return NULL;
1330 }
1331
1332 static int record__synthesize(struct record *rec, bool tail)
1333 {
1334         struct perf_session *session = rec->session;
1335         struct machine *machine = &session->machines.host;
1336         struct perf_data *data = &rec->data;
1337         struct record_opts *opts = &rec->opts;
1338         struct perf_tool *tool = &rec->tool;
1339         int fd = perf_data__fd(data);
1340         int err = 0;
1341         event_op f = process_synthesized_event;
1342
1343         if (rec->opts.tail_synthesize != tail)
1344                 return 0;
1345
1346         if (data->is_pipe) {
1347                 /*
1348                  * We need to synthesize events first, because some
1349                  * features works on top of them (on report side).
1350                  */
1351                 err = perf_event__synthesize_attrs(tool, rec->evlist,
1352                                                    process_synthesized_event);
1353                 if (err < 0) {
1354                         pr_err("Couldn't synthesize attrs.\n");
1355                         goto out;
1356                 }
1357
1358                 err = perf_event__synthesize_features(tool, session, rec->evlist,
1359                                                       process_synthesized_event);
1360                 if (err < 0) {
1361                         pr_err("Couldn't synthesize features.\n");
1362                         return err;
1363                 }
1364
1365                 if (have_tracepoints(&rec->evlist->core.entries)) {
1366                         /*
1367                          * FIXME err <= 0 here actually means that
1368                          * there were no tracepoints so its not really
1369                          * an error, just that we don't need to
1370                          * synthesize anything.  We really have to
1371                          * return this more properly and also
1372                          * propagate errors that now are calling die()
1373                          */
1374                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1375                                                                   process_synthesized_event);
1376                         if (err <= 0) {
1377                                 pr_err("Couldn't record tracing data.\n");
1378                                 goto out;
1379                         }
1380                         rec->bytes_written += err;
1381                 }
1382         }
1383
1384         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1385                                           process_synthesized_event, machine);
1386         if (err)
1387                 goto out;
1388
1389         /* Synthesize id_index before auxtrace_info */
1390         if (rec->opts.auxtrace_sample_mode) {
1391                 err = perf_event__synthesize_id_index(tool,
1392                                                       process_synthesized_event,
1393                                                       session->evlist, machine);
1394                 if (err)
1395                         goto out;
1396         }
1397
1398         if (rec->opts.full_auxtrace) {
1399                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1400                                         session, process_synthesized_event);
1401                 if (err)
1402                         goto out;
1403         }
1404
1405         if (!perf_evlist__exclude_kernel(rec->evlist)) {
1406                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1407                                                          machine);
1408                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1409                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1410                                    "Check /proc/kallsyms permission or run as root.\n");
1411
1412                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1413                                                      machine);
1414                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1415                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1416                                    "Check /proc/modules permission or run as root.\n");
1417         }
1418
1419         if (perf_guest) {
1420                 machines__process_guests(&session->machines,
1421                                          perf_event__synthesize_guest_os, tool);
1422         }
1423
1424         err = perf_event__synthesize_extra_attr(&rec->tool,
1425                                                 rec->evlist,
1426                                                 process_synthesized_event,
1427                                                 data->is_pipe);
1428         if (err)
1429                 goto out;
1430
1431         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1432                                                  process_synthesized_event,
1433                                                 NULL);
1434         if (err < 0) {
1435                 pr_err("Couldn't synthesize thread map.\n");
1436                 return err;
1437         }
1438
1439         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1440                                              process_synthesized_event, NULL);
1441         if (err < 0) {
1442                 pr_err("Couldn't synthesize cpu map.\n");
1443                 return err;
1444         }
1445
1446         err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1447                                                 machine, opts);
1448         if (err < 0)
1449                 pr_warning("Couldn't synthesize bpf events.\n");
1450
1451         err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1452                                              machine);
1453         if (err < 0)
1454                 pr_warning("Couldn't synthesize cgroup events.\n");
1455
1456         if (rec->opts.nr_threads_synthesize > 1) {
1457                 perf_set_multithreaded();
1458                 f = process_locked_synthesized_event;
1459         }
1460
1461         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1462                                             f, opts->sample_address,
1463                                             rec->opts.nr_threads_synthesize);
1464
1465         if (rec->opts.nr_threads_synthesize > 1)
1466                 perf_set_singlethreaded();
1467
1468 out:
1469         return err;
1470 }
1471
1472 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1473 {
1474         struct record *rec = data;
1475         pthread_kill(rec->thread_id, SIGUSR2);
1476         return 0;
1477 }
1478
1479 static int record__setup_sb_evlist(struct record *rec)
1480 {
1481         struct record_opts *opts = &rec->opts;
1482
1483         if (rec->sb_evlist != NULL) {
1484                 /*
1485                  * We get here if --switch-output-event populated the
1486                  * sb_evlist, so associate a callback that will send a SIGUSR2
1487                  * to the main thread.
1488                  */
1489                 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1490                 rec->thread_id = pthread_self();
1491         }
1492
1493         if (!opts->no_bpf_event) {
1494                 if (rec->sb_evlist == NULL) {
1495                         rec->sb_evlist = evlist__new();
1496
1497                         if (rec->sb_evlist == NULL) {
1498                                 pr_err("Couldn't create side band evlist.\n.");
1499                                 return -1;
1500                         }
1501                 }
1502
1503                 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1504                         pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1505                         return -1;
1506                 }
1507         }
1508
1509         if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1510                 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1511                 opts->no_bpf_event = true;
1512         }
1513
1514         return 0;
1515 }
1516
1517 static int __cmd_record(struct record *rec, int argc, const char **argv)
1518 {
1519         int err;
1520         int status = 0;
1521         unsigned long waking = 0;
1522         const bool forks = argc > 0;
1523         struct perf_tool *tool = &rec->tool;
1524         struct record_opts *opts = &rec->opts;
1525         struct perf_data *data = &rec->data;
1526         struct perf_session *session;
1527         bool disabled = false, draining = false;
1528         int fd;
1529         float ratio = 0;
1530
1531         atexit(record__sig_exit);
1532         signal(SIGCHLD, sig_handler);
1533         signal(SIGINT, sig_handler);
1534         signal(SIGTERM, sig_handler);
1535         signal(SIGSEGV, sigsegv_handler);
1536
1537         if (rec->opts.record_namespaces)
1538                 tool->namespace_events = true;
1539
1540         if (rec->opts.record_cgroup) {
1541 #ifdef HAVE_FILE_HANDLE
1542                 tool->cgroup_events = true;
1543 #else
1544                 pr_err("cgroup tracking is not supported\n");
1545                 return -1;
1546 #endif
1547         }
1548
1549         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1550                 signal(SIGUSR2, snapshot_sig_handler);
1551                 if (rec->opts.auxtrace_snapshot_mode)
1552                         trigger_on(&auxtrace_snapshot_trigger);
1553                 if (rec->switch_output.enabled)
1554                         trigger_on(&switch_output_trigger);
1555         } else {
1556                 signal(SIGUSR2, SIG_IGN);
1557         }
1558
1559         session = perf_session__new(data, false, tool);
1560         if (IS_ERR(session)) {
1561                 pr_err("Perf session creation failed.\n");
1562                 return PTR_ERR(session);
1563         }
1564
1565         fd = perf_data__fd(data);
1566         rec->session = session;
1567
1568         if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1569                 pr_err("Compression initialization failed.\n");
1570                 return -1;
1571         }
1572 #ifdef HAVE_EVENTFD_SUPPORT
1573         done_fd = eventfd(0, EFD_NONBLOCK);
1574         if (done_fd < 0) {
1575                 pr_err("Failed to create wakeup eventfd, error: %m\n");
1576                 status = -1;
1577                 goto out_delete_session;
1578         }
1579         err = evlist__add_pollfd(rec->evlist, done_fd);
1580         if (err < 0) {
1581                 pr_err("Failed to add wakeup eventfd to poll list\n");
1582                 status = err;
1583                 goto out_delete_session;
1584         }
1585 #endif // HAVE_EVENTFD_SUPPORT
1586
1587         session->header.env.comp_type  = PERF_COMP_ZSTD;
1588         session->header.env.comp_level = rec->opts.comp_level;
1589
1590         if (rec->opts.kcore &&
1591             !record__kcore_readable(&session->machines.host)) {
1592                 pr_err("ERROR: kcore is not readable.\n");
1593                 return -1;
1594         }
1595
1596         record__init_features(rec);
1597
1598         if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1599                 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1600
1601         if (forks) {
1602                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1603                                                     argv, data->is_pipe,
1604                                                     workload_exec_failed_signal);
1605                 if (err < 0) {
1606                         pr_err("Couldn't run the workload!\n");
1607                         status = err;
1608                         goto out_delete_session;
1609                 }
1610         }
1611
1612         /*
1613          * If we have just single event and are sending data
1614          * through pipe, we need to force the ids allocation,
1615          * because we synthesize event name through the pipe
1616          * and need the id for that.
1617          */
1618         if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1619                 rec->opts.sample_id = true;
1620
1621         if (record__open(rec) != 0) {
1622                 err = -1;
1623                 goto out_child;
1624         }
1625         session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1626
1627         if (rec->opts.kcore) {
1628                 err = record__kcore_copy(&session->machines.host, data);
1629                 if (err) {
1630                         pr_err("ERROR: Failed to copy kcore\n");
1631                         goto out_child;
1632                 }
1633         }
1634
1635         err = bpf__apply_obj_config();
1636         if (err) {
1637                 char errbuf[BUFSIZ];
1638
1639                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1640                 pr_err("ERROR: Apply config to BPF failed: %s\n",
1641                          errbuf);
1642                 goto out_child;
1643         }
1644
1645         /*
1646          * Normally perf_session__new would do this, but it doesn't have the
1647          * evlist.
1648          */
1649         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1650                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1651                 rec->tool.ordered_events = false;
1652         }
1653
1654         if (!rec->evlist->nr_groups)
1655                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1656
1657         if (data->is_pipe) {
1658                 err = perf_header__write_pipe(fd);
1659                 if (err < 0)
1660                         goto out_child;
1661         } else {
1662                 err = perf_session__write_header(session, rec->evlist, fd, false);
1663                 if (err < 0)
1664                         goto out_child;
1665         }
1666
1667         err = -1;
1668         if (!rec->no_buildid
1669             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1670                 pr_err("Couldn't generate buildids. "
1671                        "Use --no-buildid to profile anyway.\n");
1672                 goto out_child;
1673         }
1674
1675         err = record__setup_sb_evlist(rec);
1676         if (err)
1677                 goto out_child;
1678
1679         err = record__synthesize(rec, false);
1680         if (err < 0)
1681                 goto out_child;
1682
1683         if (rec->realtime_prio) {
1684                 struct sched_param param;
1685
1686                 param.sched_priority = rec->realtime_prio;
1687                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1688                         pr_err("Could not set realtime priority.\n");
1689                         err = -1;
1690                         goto out_child;
1691                 }
1692         }
1693
1694         /*
1695          * When perf is starting the traced process, all the events
1696          * (apart from group members) have enable_on_exec=1 set,
1697          * so don't spoil it by prematurely enabling them.
1698          */
1699         if (!target__none(&opts->target) && !opts->initial_delay)
1700                 evlist__enable(rec->evlist);
1701
1702         /*
1703          * Let the child rip
1704          */
1705         if (forks) {
1706                 struct machine *machine = &session->machines.host;
1707                 union perf_event *event;
1708                 pid_t tgid;
1709
1710                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1711                 if (event == NULL) {
1712                         err = -ENOMEM;
1713                         goto out_child;
1714                 }
1715
1716                 /*
1717                  * Some H/W events are generated before COMM event
1718                  * which is emitted during exec(), so perf script
1719                  * cannot see a correct process name for those events.
1720                  * Synthesize COMM event to prevent it.
1721                  */
1722                 tgid = perf_event__synthesize_comm(tool, event,
1723                                                    rec->evlist->workload.pid,
1724                                                    process_synthesized_event,
1725                                                    machine);
1726                 free(event);
1727
1728                 if (tgid == -1)
1729                         goto out_child;
1730
1731                 event = malloc(sizeof(event->namespaces) +
1732                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1733                                machine->id_hdr_size);
1734                 if (event == NULL) {
1735                         err = -ENOMEM;
1736                         goto out_child;
1737                 }
1738
1739                 /*
1740                  * Synthesize NAMESPACES event for the command specified.
1741                  */
1742                 perf_event__synthesize_namespaces(tool, event,
1743                                                   rec->evlist->workload.pid,
1744                                                   tgid, process_synthesized_event,
1745                                                   machine);
1746                 free(event);
1747
1748                 perf_evlist__start_workload(rec->evlist);
1749         }
1750
1751         if (opts->initial_delay) {
1752                 usleep(opts->initial_delay * USEC_PER_MSEC);
1753                 evlist__enable(rec->evlist);
1754         }
1755
1756         trigger_ready(&auxtrace_snapshot_trigger);
1757         trigger_ready(&switch_output_trigger);
1758         perf_hooks__invoke_record_start();
1759         for (;;) {
1760                 unsigned long long hits = rec->samples;
1761
1762                 /*
1763                  * rec->evlist->bkw_mmap_state is possible to be
1764                  * BKW_MMAP_EMPTY here: when done == true and
1765                  * hits != rec->samples in previous round.
1766                  *
1767                  * perf_evlist__toggle_bkw_mmap ensure we never
1768                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1769                  */
1770                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1771                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1772
1773                 if (record__mmap_read_all(rec, false) < 0) {
1774                         trigger_error(&auxtrace_snapshot_trigger);
1775                         trigger_error(&switch_output_trigger);
1776                         err = -1;
1777                         goto out_child;
1778                 }
1779
1780                 if (auxtrace_record__snapshot_started) {
1781                         auxtrace_record__snapshot_started = 0;
1782                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1783                                 record__read_auxtrace_snapshot(rec, false);
1784                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1785                                 pr_err("AUX area tracing snapshot failed\n");
1786                                 err = -1;
1787                                 goto out_child;
1788                         }
1789                 }
1790
1791                 if (trigger_is_hit(&switch_output_trigger)) {
1792                         /*
1793                          * If switch_output_trigger is hit, the data in
1794                          * overwritable ring buffer should have been collected,
1795                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1796                          *
1797                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1798                          * record__mmap_read_all() didn't collect data from
1799                          * overwritable ring buffer. Read again.
1800                          */
1801                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1802                                 continue;
1803                         trigger_ready(&switch_output_trigger);
1804
1805                         /*
1806                          * Reenable events in overwrite ring buffer after
1807                          * record__mmap_read_all(): we should have collected
1808                          * data from it.
1809                          */
1810                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1811
1812                         if (!quiet)
1813                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1814                                         waking);
1815                         waking = 0;
1816                         fd = record__switch_output(rec, false);
1817                         if (fd < 0) {
1818                                 pr_err("Failed to switch to new file\n");
1819                                 trigger_error(&switch_output_trigger);
1820                                 err = fd;
1821                                 goto out_child;
1822                         }
1823
1824                         /* re-arm the alarm */
1825                         if (rec->switch_output.time)
1826                                 alarm(rec->switch_output.time);
1827                 }
1828
1829                 if (hits == rec->samples) {
1830                         if (done || draining)
1831                                 break;
1832                         err = evlist__poll(rec->evlist, -1);
1833                         /*
1834                          * Propagate error, only if there's any. Ignore positive
1835                          * number of returned events and interrupt error.
1836                          */
1837                         if (err > 0 || (err < 0 && errno == EINTR))
1838                                 err = 0;
1839                         waking++;
1840
1841                         if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1842                                 draining = true;
1843                 }
1844
1845                 /*
1846                  * When perf is starting the traced process, at the end events
1847                  * die with the process and we wait for that. Thus no need to
1848                  * disable events in this case.
1849                  */
1850                 if (done && !disabled && !target__none(&opts->target)) {
1851                         trigger_off(&auxtrace_snapshot_trigger);
1852                         evlist__disable(rec->evlist);
1853                         disabled = true;
1854                 }
1855         }
1856
1857         trigger_off(&auxtrace_snapshot_trigger);
1858         trigger_off(&switch_output_trigger);
1859
1860         if (opts->auxtrace_snapshot_on_exit)
1861                 record__auxtrace_snapshot_exit(rec);
1862
1863         if (forks && workload_exec_errno) {
1864                 char msg[STRERR_BUFSIZE];
1865                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1866                 pr_err("Workload failed: %s\n", emsg);
1867                 err = -1;
1868                 goto out_child;
1869         }
1870
1871         if (!quiet)
1872                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1873
1874         if (target__none(&rec->opts.target))
1875                 record__synthesize_workload(rec, true);
1876
1877 out_child:
1878         record__mmap_read_all(rec, true);
1879         record__aio_mmap_read_sync(rec);
1880
1881         if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1882                 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1883                 session->header.env.comp_ratio = ratio + 0.5;
1884         }
1885
1886         if (forks) {
1887                 int exit_status;
1888
1889                 if (!child_finished)
1890                         kill(rec->evlist->workload.pid, SIGTERM);
1891
1892                 wait(&exit_status);
1893
1894                 if (err < 0)
1895                         status = err;
1896                 else if (WIFEXITED(exit_status))
1897                         status = WEXITSTATUS(exit_status);
1898                 else if (WIFSIGNALED(exit_status))
1899                         signr = WTERMSIG(exit_status);
1900         } else
1901                 status = err;
1902
1903         record__synthesize(rec, true);
1904         /* this will be recalculated during process_buildids() */
1905         rec->samples = 0;
1906
1907         if (!err) {
1908                 if (!rec->timestamp_filename) {
1909                         record__finish_output(rec);
1910                 } else {
1911                         fd = record__switch_output(rec, true);
1912                         if (fd < 0) {
1913                                 status = fd;
1914                                 goto out_delete_session;
1915                         }
1916                 }
1917         }
1918
1919         perf_hooks__invoke_record_end();
1920
1921         if (!err && !quiet) {
1922                 char samples[128];
1923                 const char *postfix = rec->timestamp_filename ?
1924                                         ".<timestamp>" : "";
1925
1926                 if (rec->samples && !rec->opts.full_auxtrace)
1927                         scnprintf(samples, sizeof(samples),
1928                                   " (%" PRIu64 " samples)", rec->samples);
1929                 else
1930                         samples[0] = '\0';
1931
1932                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1933                         perf_data__size(data) / 1024.0 / 1024.0,
1934                         data->path, postfix, samples);
1935                 if (ratio) {
1936                         fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1937                                         rec->session->bytes_transferred / 1024.0 / 1024.0,
1938                                         ratio);
1939                 }
1940                 fprintf(stderr, " ]\n");
1941         }
1942
1943 out_delete_session:
1944 #ifdef HAVE_EVENTFD_SUPPORT
1945         if (done_fd >= 0)
1946                 close(done_fd);
1947 #endif
1948         zstd_fini(&session->zstd_data);
1949         perf_session__delete(session);
1950
1951         if (!opts->no_bpf_event)
1952                 perf_evlist__stop_sb_thread(rec->sb_evlist);
1953         return status;
1954 }
1955
1956 static void callchain_debug(struct callchain_param *callchain)
1957 {
1958         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1959
1960         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1961
1962         if (callchain->record_mode == CALLCHAIN_DWARF)
1963                 pr_debug("callchain: stack dump size %d\n",
1964                          callchain->dump_size);
1965 }
1966
1967 int record_opts__parse_callchain(struct record_opts *record,
1968                                  struct callchain_param *callchain,
1969                                  const char *arg, bool unset)
1970 {
1971         int ret;
1972         callchain->enabled = !unset;
1973
1974         /* --no-call-graph */
1975         if (unset) {
1976                 callchain->record_mode = CALLCHAIN_NONE;
1977                 pr_debug("callchain: disabled\n");
1978                 return 0;
1979         }
1980
1981         ret = parse_callchain_record_opt(arg, callchain);
1982         if (!ret) {
1983                 /* Enable data address sampling for DWARF unwind. */
1984                 if (callchain->record_mode == CALLCHAIN_DWARF)
1985                         record->sample_address = true;
1986                 callchain_debug(callchain);
1987         }
1988
1989         return ret;
1990 }
1991
1992 int record_parse_callchain_opt(const struct option *opt,
1993                                const char *arg,
1994                                int unset)
1995 {
1996         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1997 }
1998
1999 int record_callchain_opt(const struct option *opt,
2000                          const char *arg __maybe_unused,
2001                          int unset __maybe_unused)
2002 {
2003         struct callchain_param *callchain = opt->value;
2004
2005         callchain->enabled = true;
2006
2007         if (callchain->record_mode == CALLCHAIN_NONE)
2008                 callchain->record_mode = CALLCHAIN_FP;
2009
2010         callchain_debug(callchain);
2011         return 0;
2012 }
2013
2014 static int perf_record_config(const char *var, const char *value, void *cb)
2015 {
2016         struct record *rec = cb;
2017
2018         if (!strcmp(var, "record.build-id")) {
2019                 if (!strcmp(value, "cache"))
2020                         rec->no_buildid_cache = false;
2021                 else if (!strcmp(value, "no-cache"))
2022                         rec->no_buildid_cache = true;
2023                 else if (!strcmp(value, "skip"))
2024                         rec->no_buildid = true;
2025                 else
2026                         return -1;
2027                 return 0;
2028         }
2029         if (!strcmp(var, "record.call-graph")) {
2030                 var = "call-graph.record-mode";
2031                 return perf_default_config(var, value, cb);
2032         }
2033 #ifdef HAVE_AIO_SUPPORT
2034         if (!strcmp(var, "record.aio")) {
2035                 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2036                 if (!rec->opts.nr_cblocks)
2037                         rec->opts.nr_cblocks = nr_cblocks_default;
2038         }
2039 #endif
2040
2041         return 0;
2042 }
2043
2044 struct clockid_map {
2045         const char *name;
2046         int clockid;
2047 };
2048
2049 #define CLOCKID_MAP(n, c)       \
2050         { .name = n, .clockid = (c), }
2051
2052 #define CLOCKID_END     { .name = NULL, }
2053
2054
2055 /*
2056  * Add the missing ones, we need to build on many distros...
2057  */
2058 #ifndef CLOCK_MONOTONIC_RAW
2059 #define CLOCK_MONOTONIC_RAW 4
2060 #endif
2061 #ifndef CLOCK_BOOTTIME
2062 #define CLOCK_BOOTTIME 7
2063 #endif
2064 #ifndef CLOCK_TAI
2065 #define CLOCK_TAI 11
2066 #endif
2067
2068 static const struct clockid_map clockids[] = {
2069         /* available for all events, NMI safe */
2070         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2071         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2072
2073         /* available for some events */
2074         CLOCKID_MAP("realtime", CLOCK_REALTIME),
2075         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2076         CLOCKID_MAP("tai", CLOCK_TAI),
2077
2078         /* available for the lazy */
2079         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2080         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2081         CLOCKID_MAP("real", CLOCK_REALTIME),
2082         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2083
2084         CLOCKID_END,
2085 };
2086
2087 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2088 {
2089         struct timespec res;
2090
2091         *res_ns = 0;
2092         if (!clock_getres(clk_id, &res))
2093                 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2094         else
2095                 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2096
2097         return 0;
2098 }
2099
2100 static int parse_clockid(const struct option *opt, const char *str, int unset)
2101 {
2102         struct record_opts *opts = (struct record_opts *)opt->value;
2103         const struct clockid_map *cm;
2104         const char *ostr = str;
2105
2106         if (unset) {
2107                 opts->use_clockid = 0;
2108                 return 0;
2109         }
2110
2111         /* no arg passed */
2112         if (!str)
2113                 return 0;
2114
2115         /* no setting it twice */
2116         if (opts->use_clockid)
2117                 return -1;
2118
2119         opts->use_clockid = true;
2120
2121         /* if its a number, we're done */
2122         if (sscanf(str, "%d", &opts->clockid) == 1)
2123                 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2124
2125         /* allow a "CLOCK_" prefix to the name */
2126         if (!strncasecmp(str, "CLOCK_", 6))
2127                 str += 6;
2128
2129         for (cm = clockids; cm->name; cm++) {
2130                 if (!strcasecmp(str, cm->name)) {
2131                         opts->clockid = cm->clockid;
2132                         return get_clockid_res(opts->clockid,
2133                                                &opts->clockid_res_ns);
2134                 }
2135         }
2136
2137         opts->use_clockid = false;
2138         ui__warning("unknown clockid %s, check man page\n", ostr);
2139         return -1;
2140 }
2141
2142 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2143 {
2144         struct record_opts *opts = (struct record_opts *)opt->value;
2145
2146         if (unset || !str)
2147                 return 0;
2148
2149         if (!strcasecmp(str, "node"))
2150                 opts->affinity = PERF_AFFINITY_NODE;
2151         else if (!strcasecmp(str, "cpu"))
2152                 opts->affinity = PERF_AFFINITY_CPU;
2153
2154         return 0;
2155 }
2156
2157 static int parse_output_max_size(const struct option *opt,
2158                                  const char *str, int unset)
2159 {
2160         unsigned long *s = (unsigned long *)opt->value;
2161         static struct parse_tag tags_size[] = {
2162                 { .tag  = 'B', .mult = 1       },
2163                 { .tag  = 'K', .mult = 1 << 10 },
2164                 { .tag  = 'M', .mult = 1 << 20 },
2165                 { .tag  = 'G', .mult = 1 << 30 },
2166                 { .tag  = 0 },
2167         };
2168         unsigned long val;
2169
2170         if (unset) {
2171                 *s = 0;
2172                 return 0;
2173         }
2174
2175         val = parse_tag_value(str, tags_size);
2176         if (val != (unsigned long) -1) {
2177                 *s = val;
2178                 return 0;
2179         }
2180
2181         return -1;
2182 }
2183
2184 static int record__parse_mmap_pages(const struct option *opt,
2185                                     const char *str,
2186                                     int unset __maybe_unused)
2187 {
2188         struct record_opts *opts = opt->value;
2189         char *s, *p;
2190         unsigned int mmap_pages;
2191         int ret;
2192
2193         if (!str)
2194                 return -EINVAL;
2195
2196         s = strdup(str);
2197         if (!s)
2198                 return -ENOMEM;
2199
2200         p = strchr(s, ',');
2201         if (p)
2202                 *p = '\0';
2203
2204         if (*s) {
2205                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2206                 if (ret)
2207                         goto out_free;
2208                 opts->mmap_pages = mmap_pages;
2209         }
2210
2211         if (!p) {
2212                 ret = 0;
2213                 goto out_free;
2214         }
2215
2216         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2217         if (ret)
2218                 goto out_free;
2219
2220         opts->auxtrace_mmap_pages = mmap_pages;
2221
2222 out_free:
2223         free(s);
2224         return ret;
2225 }
2226
2227 static void switch_output_size_warn(struct record *rec)
2228 {
2229         u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2230         struct switch_output *s = &rec->switch_output;
2231
2232         wakeup_size /= 2;
2233
2234         if (s->size < wakeup_size) {
2235                 char buf[100];
2236
2237                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2238                 pr_warning("WARNING: switch-output data size lower than "
2239                            "wakeup kernel buffer size (%s) "
2240                            "expect bigger perf.data sizes\n", buf);
2241         }
2242 }
2243
2244 static int switch_output_setup(struct record *rec)
2245 {
2246         struct switch_output *s = &rec->switch_output;
2247         static struct parse_tag tags_size[] = {
2248                 { .tag  = 'B', .mult = 1       },
2249                 { .tag  = 'K', .mult = 1 << 10 },
2250                 { .tag  = 'M', .mult = 1 << 20 },
2251                 { .tag  = 'G', .mult = 1 << 30 },
2252                 { .tag  = 0 },
2253         };
2254         static struct parse_tag tags_time[] = {
2255                 { .tag  = 's', .mult = 1        },
2256                 { .tag  = 'm', .mult = 60       },
2257                 { .tag  = 'h', .mult = 60*60    },
2258                 { .tag  = 'd', .mult = 60*60*24 },
2259                 { .tag  = 0 },
2260         };
2261         unsigned long val;
2262
2263         /*
2264          * If we're using --switch-output-events, then we imply its 
2265          * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2266          *  thread to its parent.
2267          */
2268         if (rec->switch_output_event_set)
2269                 goto do_signal;
2270
2271         if (!s->set)
2272                 return 0;
2273
2274         if (!strcmp(s->str, "signal")) {
2275 do_signal:
2276                 s->signal = true;
2277                 pr_debug("switch-output with SIGUSR2 signal\n");
2278                 goto enabled;
2279         }
2280
2281         val = parse_tag_value(s->str, tags_size);
2282         if (val != (unsigned long) -1) {
2283                 s->size = val;
2284                 pr_debug("switch-output with %s size threshold\n", s->str);
2285                 goto enabled;
2286         }
2287
2288         val = parse_tag_value(s->str, tags_time);
2289         if (val != (unsigned long) -1) {
2290                 s->time = val;
2291                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2292                          s->str, s->time);
2293                 goto enabled;
2294         }
2295
2296         return -1;
2297
2298 enabled:
2299         rec->timestamp_filename = true;
2300         s->enabled              = true;
2301
2302         if (s->size && !rec->opts.no_buffering)
2303                 switch_output_size_warn(rec);
2304
2305         return 0;
2306 }
2307
2308 static const char * const __record_usage[] = {
2309         "perf record [<options>] [<command>]",
2310         "perf record [<options>] -- <command> [<options>]",
2311         NULL
2312 };
2313 const char * const *record_usage = __record_usage;
2314
2315 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2316                                   struct perf_sample *sample, struct machine *machine)
2317 {
2318         /*
2319          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2320          * no need to add them twice.
2321          */
2322         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2323                 return 0;
2324         return perf_event__process_mmap(tool, event, sample, machine);
2325 }
2326
2327 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2328                                    struct perf_sample *sample, struct machine *machine)
2329 {
2330         /*
2331          * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2332          * no need to add them twice.
2333          */
2334         if (!(event->header.misc & PERF_RECORD_MISC_USER))
2335                 return 0;
2336
2337         return perf_event__process_mmap2(tool, event, sample, machine);
2338 }
2339
2340 /*
2341  * XXX Ideally would be local to cmd_record() and passed to a record__new
2342  * because we need to have access to it in record__exit, that is called
2343  * after cmd_record() exits, but since record_options need to be accessible to
2344  * builtin-script, leave it here.
2345  *
2346  * At least we don't ouch it in all the other functions here directly.
2347  *
2348  * Just say no to tons of global variables, sigh.
2349  */
2350 static struct record record = {
2351         .opts = {
2352                 .sample_time         = true,
2353                 .mmap_pages          = UINT_MAX,
2354                 .user_freq           = UINT_MAX,
2355                 .user_interval       = ULLONG_MAX,
2356                 .freq                = 4000,
2357                 .target              = {
2358                         .uses_mmap   = true,
2359                         .default_per_cpu = true,
2360                 },
2361                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
2362                 .nr_threads_synthesize = 1,
2363         },
2364         .tool = {
2365                 .sample         = process_sample_event,
2366                 .fork           = perf_event__process_fork,
2367                 .exit           = perf_event__process_exit,
2368                 .comm           = perf_event__process_comm,
2369                 .namespaces     = perf_event__process_namespaces,
2370                 .mmap           = build_id__process_mmap,
2371                 .mmap2          = build_id__process_mmap2,
2372                 .ordered_events = true,
2373         },
2374 };
2375
2376 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2377         "\n\t\t\t\tDefault: fp";
2378
2379 static bool dry_run;
2380
2381 /*
2382  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2383  * with it and switch to use the library functions in perf_evlist that came
2384  * from builtin-record.c, i.e. use record_opts,
2385  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2386  * using pipes, etc.
2387  */
2388 static struct option __record_options[] = {
2389         OPT_CALLBACK('e', "event", &record.evlist, "event",
2390                      "event selector. use 'perf list' to list available events",
2391                      parse_events_option),
2392         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2393                      "event filter", parse_filter),
2394         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2395                            NULL, "don't record events from perf itself",
2396                            exclude_perf),
2397         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2398                     "record events on existing process id"),
2399         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2400                     "record events on existing thread id"),
2401         OPT_INTEGER('r', "realtime", &record.realtime_prio,
2402                     "collect data with this RT SCHED_FIFO priority"),
2403         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2404                     "collect data without buffering"),
2405         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2406                     "collect raw sample records from all opened counters"),
2407         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2408                             "system-wide collection from all CPUs"),
2409         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2410                     "list of cpus to monitor"),
2411         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2412         OPT_STRING('o', "output", &record.data.path, "file",
2413                     "output file name"),
2414         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2415                         &record.opts.no_inherit_set,
2416                         "child tasks do not inherit counters"),
2417         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2418                     "synthesize non-sample events at the end of output"),
2419         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2420         OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2421         OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2422                     "Fail if the specified frequency can't be used"),
2423         OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2424                      "profile at this frequency",
2425                       record__parse_freq),
2426         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2427                      "number of mmap data pages and AUX area tracing mmap pages",
2428                      record__parse_mmap_pages),
2429         OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2430                      "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2431                      record__mmap_flush_parse),
2432         OPT_BOOLEAN(0, "group", &record.opts.group,
2433                     "put the counters into a counter group"),
2434         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2435                            NULL, "enables call-graph recording" ,
2436                            &record_callchain_opt),
2437         OPT_CALLBACK(0, "call-graph", &record.opts,
2438                      "record_mode[,record_size]", record_callchain_help,
2439                      &record_parse_callchain_opt),
2440         OPT_INCR('v', "verbose", &verbose,
2441                     "be more verbose (show counter open errors, etc)"),
2442         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2443         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2444                     "per thread counts"),
2445         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2446         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2447                     "Record the sample physical addresses"),
2448         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2449         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2450                         &record.opts.sample_time_set,
2451                         "Record the sample timestamps"),
2452         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2453                         "Record the sample period"),
2454         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2455                     "don't sample"),
2456         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2457                         &record.no_buildid_cache_set,
2458                         "do not update the buildid cache"),
2459         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2460                         &record.no_buildid_set,
2461                         "do not collect buildids in perf.data"),
2462         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2463                      "monitor event in cgroup name only",
2464                      parse_cgroups),
2465         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2466                   "ms to wait before starting measurement after program start"),
2467         OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2468         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2469                    "user to profile"),
2470
2471         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2472                      "branch any", "sample any taken branches",
2473                      parse_branch_stack),
2474
2475         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2476                      "branch filter mask", "branch stack filter modes",
2477                      parse_branch_stack),
2478         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2479                     "sample by weight (on special events only)"),
2480         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2481                     "sample transaction flags (special events only)"),
2482         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2483                     "use per-thread mmaps"),
2484         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2485                     "sample selected machine registers on interrupt,"
2486                     " use '-I?' to list register names", parse_intr_regs),
2487         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2488                     "sample selected machine registers on interrupt,"
2489                     " use '--user-regs=?' to list register names", parse_user_regs),
2490         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2491                     "Record running/enabled time of read (:S) events"),
2492         OPT_CALLBACK('k', "clockid", &record.opts,
2493         "clockid", "clockid to use for events, see clock_gettime()",
2494         parse_clockid),
2495         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2496                           "opts", "AUX area tracing Snapshot Mode", ""),
2497         OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2498                           "opts", "sample AUX area", ""),
2499         OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2500                         "per thread proc mmap processing timeout in ms"),
2501         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2502                     "Record namespaces events"),
2503         OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2504                     "Record cgroup events"),
2505         OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
2506                         &record.opts.record_switch_events_set,
2507                         "Record context switch events"),
2508         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2509                          "Configure all used events to run in kernel space.",
2510                          PARSE_OPT_EXCLUSIVE),
2511         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2512                          "Configure all used events to run in user space.",
2513                          PARSE_OPT_EXCLUSIVE),
2514         OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2515                     "collect kernel callchains"),
2516         OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2517                     "collect user callchains"),
2518         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2519                    "clang binary to use for compiling BPF scriptlets"),
2520         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2521                    "options passed to clang when compiling BPF scriptlets"),
2522         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2523                    "file", "vmlinux pathname"),
2524         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2525                     "Record build-id of all DSOs regardless of hits"),
2526         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2527                     "append timestamp to output filename"),
2528         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2529                     "Record timestamp boundary (time of first/last samples)"),
2530         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2531                           &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2532                           "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2533                           "signal"),
2534         OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2535                          "switch output event selector. use 'perf list' to list available events",
2536                          parse_events_option_new_evlist),
2537         OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2538                    "Limit number of switch output generated files"),
2539         OPT_BOOLEAN(0, "dry-run", &dry_run,
2540                     "Parse options then exit"),
2541 #ifdef HAVE_AIO_SUPPORT
2542         OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2543                      &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2544                      record__aio_parse),
2545 #endif
2546         OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2547                      "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2548                      record__parse_affinity),
2549 #ifdef HAVE_ZSTD_SUPPORT
2550         OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2551                             "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2552                             record__parse_comp_level),
2553 #endif
2554         OPT_CALLBACK(0, "max-size", &record.output_max_size,
2555                      "size", "Limit the maximum size of the output file", parse_output_max_size),
2556         OPT_UINTEGER(0, "num-thread-synthesize",
2557                      &record.opts.nr_threads_synthesize,
2558                      "number of threads to run for event synthesis"),
2559 #ifdef HAVE_LIBPFM
2560         OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
2561                 "libpfm4 event selector. use 'perf list' to list available events",
2562                 parse_libpfm_events_option),
2563 #endif
2564         OPT_END()
2565 };
2566
2567 struct option *record_options = __record_options;
2568
2569 int cmd_record(int argc, const char **argv)
2570 {
2571         int err;
2572         struct record *rec = &record;
2573         char errbuf[BUFSIZ];
2574
2575         setlocale(LC_ALL, "");
2576
2577 #ifndef HAVE_LIBBPF_SUPPORT
2578 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2579         set_nobuild('\0', "clang-path", true);
2580         set_nobuild('\0', "clang-opt", true);
2581 # undef set_nobuild
2582 #endif
2583
2584 #ifndef HAVE_BPF_PROLOGUE
2585 # if !defined (HAVE_DWARF_SUPPORT)
2586 #  define REASON  "NO_DWARF=1"
2587 # elif !defined (HAVE_LIBBPF_SUPPORT)
2588 #  define REASON  "NO_LIBBPF=1"
2589 # else
2590 #  define REASON  "this architecture doesn't support BPF prologue"
2591 # endif
2592 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2593         set_nobuild('\0', "vmlinux", true);
2594 # undef set_nobuild
2595 # undef REASON
2596 #endif
2597
2598         rec->opts.affinity = PERF_AFFINITY_SYS;
2599
2600         rec->evlist = evlist__new();
2601         if (rec->evlist == NULL)
2602                 return -ENOMEM;
2603
2604         err = perf_config(perf_record_config, rec);
2605         if (err)
2606                 return err;
2607
2608         argc = parse_options(argc, argv, record_options, record_usage,
2609                             PARSE_OPT_STOP_AT_NON_OPTION);
2610         if (quiet)
2611                 perf_quiet_option();
2612
2613         /* Make system wide (-a) the default target. */
2614         if (!argc && target__none(&rec->opts.target))
2615                 rec->opts.target.system_wide = true;
2616
2617         if (nr_cgroups && !rec->opts.target.system_wide) {
2618                 usage_with_options_msg(record_usage, record_options,
2619                         "cgroup monitoring only available in system-wide mode");
2620
2621         }
2622
2623         if (rec->opts.kcore)
2624                 rec->data.is_dir = true;
2625
2626         if (rec->opts.comp_level != 0) {
2627                 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2628                 rec->no_buildid = true;
2629         }
2630
2631         if (rec->opts.record_switch_events &&
2632             !perf_can_record_switch_events()) {
2633                 ui__error("kernel does not support recording context switch events\n");
2634                 parse_options_usage(record_usage, record_options, "switch-events", 0);
2635                 return -EINVAL;
2636         }
2637
2638         if (switch_output_setup(rec)) {
2639                 parse_options_usage(record_usage, record_options, "switch-output", 0);
2640                 return -EINVAL;
2641         }
2642
2643         if (rec->switch_output.time) {
2644                 signal(SIGALRM, alarm_sig_handler);
2645                 alarm(rec->switch_output.time);
2646         }
2647
2648         if (rec->switch_output.num_files) {
2649                 rec->switch_output.filenames = calloc(sizeof(char *),
2650                                                       rec->switch_output.num_files);
2651                 if (!rec->switch_output.filenames)
2652                         return -EINVAL;
2653         }
2654
2655         /*
2656          * Allow aliases to facilitate the lookup of symbols for address
2657          * filters. Refer to auxtrace_parse_filters().
2658          */
2659         symbol_conf.allow_aliases = true;
2660
2661         symbol__init(NULL);
2662
2663         if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2664                 rec->affinity_mask.nbits = cpu__max_cpu();
2665                 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2666                 if (!rec->affinity_mask.bits) {
2667                         pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2668                         return -ENOMEM;
2669                 }
2670                 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2671         }
2672
2673         err = record__auxtrace_init(rec);
2674         if (err)
2675                 goto out;
2676
2677         if (dry_run)
2678                 goto out;
2679
2680         err = bpf__setup_stdout(rec->evlist);
2681         if (err) {
2682                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2683                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2684                          errbuf);
2685                 goto out;
2686         }
2687
2688         err = -ENOMEM;
2689
2690         if (rec->no_buildid_cache || rec->no_buildid) {
2691                 disable_buildid_cache();
2692         } else if (rec->switch_output.enabled) {
2693                 /*
2694                  * In 'perf record --switch-output', disable buildid
2695                  * generation by default to reduce data file switching
2696                  * overhead. Still generate buildid if they are required
2697                  * explicitly using
2698                  *
2699                  *  perf record --switch-output --no-no-buildid \
2700                  *              --no-no-buildid-cache
2701                  *
2702                  * Following code equals to:
2703                  *
2704                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
2705                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2706                  *         disable_buildid_cache();
2707                  */
2708                 bool disable = true;
2709
2710                 if (rec->no_buildid_set && !rec->no_buildid)
2711                         disable = false;
2712                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2713                         disable = false;
2714                 if (disable) {
2715                         rec->no_buildid = true;
2716                         rec->no_buildid_cache = true;
2717                         disable_buildid_cache();
2718                 }
2719         }
2720
2721         if (record.opts.overwrite)
2722                 record.opts.tail_synthesize = true;
2723
2724         if (rec->evlist->core.nr_entries == 0 &&
2725             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2726                 pr_err("Not enough memory for event selector list\n");
2727                 goto out;
2728         }
2729
2730         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2731                 rec->opts.no_inherit = true;
2732
2733         err = target__validate(&rec->opts.target);
2734         if (err) {
2735                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2736                 ui__warning("%s\n", errbuf);
2737         }
2738
2739         err = target__parse_uid(&rec->opts.target);
2740         if (err) {
2741                 int saved_errno = errno;
2742
2743                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2744                 ui__error("%s", errbuf);
2745
2746                 err = -saved_errno;
2747                 goto out;
2748         }
2749
2750         /* Enable ignoring missing threads when -u/-p option is defined. */
2751         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2752
2753         err = -ENOMEM;
2754         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2755                 usage_with_options(record_usage, record_options);
2756
2757         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2758         if (err)
2759                 goto out;
2760
2761         /*
2762          * We take all buildids when the file contains
2763          * AUX area tracing data because we do not decode the
2764          * trace because it would take too long.
2765          */
2766         if (rec->opts.full_auxtrace)
2767                 rec->buildid_all = true;
2768
2769         if (record_opts__config(&rec->opts)) {
2770                 err = -EINVAL;
2771                 goto out;
2772         }
2773
2774         if (rec->opts.nr_cblocks > nr_cblocks_max)
2775                 rec->opts.nr_cblocks = nr_cblocks_max;
2776         pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2777
2778         pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2779         pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2780
2781         if (rec->opts.comp_level > comp_level_max)
2782                 rec->opts.comp_level = comp_level_max;
2783         pr_debug("comp level: %d\n", rec->opts.comp_level);
2784
2785         err = __cmd_record(&record, argc, argv);
2786 out:
2787         bitmap_free(rec->affinity_mask.bits);
2788         evlist__delete(rec->evlist);
2789         symbol__exit();
2790         auxtrace_record__free(rec->itr);
2791         return err;
2792 }
2793
2794 static void snapshot_sig_handler(int sig __maybe_unused)
2795 {
2796         struct record *rec = &record;
2797
2798         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2799                 trigger_hit(&auxtrace_snapshot_trigger);
2800                 auxtrace_record__snapshot_started = 1;
2801                 if (auxtrace_record__snapshot_start(record.itr))
2802                         trigger_error(&auxtrace_snapshot_trigger);
2803         }
2804
2805         if (switch_output_signal(rec))
2806                 trigger_hit(&switch_output_trigger);
2807 }
2808
2809 static void alarm_sig_handler(int sig __maybe_unused)
2810 {
2811         struct record *rec = &record;
2812
2813         if (switch_output_time(rec))
2814                 trigger_hit(&switch_output_trigger);
2815 }