1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
59 #ifdef HAVE_EVENTFD_SUPPORT
60 #include <sys/eventfd.h>
64 #include <sys/types.h>
67 #include <linux/err.h>
68 #include <linux/string.h>
69 #include <linux/time64.h>
70 #include <linux/zalloc.h>
71 #include <linux/bitmap.h>
73 struct switch_output {
86 struct perf_tool tool;
87 struct record_opts opts;
89 struct perf_data data;
90 struct auxtrace_record *itr;
91 struct evlist *evlist;
92 struct perf_session *session;
93 struct evlist *sb_evlist;
96 bool switch_output_event_set;
99 bool no_buildid_cache;
100 bool no_buildid_cache_set;
102 bool timestamp_filename;
103 bool timestamp_boundary;
104 struct switch_output switch_output;
105 unsigned long long samples;
106 struct mmap_cpu_mask affinity_mask;
107 unsigned long output_max_size; /* = 0: unlimited */
110 static volatile int done;
112 static volatile int auxtrace_record__snapshot_started;
113 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
114 static DEFINE_TRIGGER(switch_output_trigger);
116 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
120 static bool switch_output_signal(struct record *rec)
122 return rec->switch_output.signal &&
123 trigger_is_ready(&switch_output_trigger);
126 static bool switch_output_size(struct record *rec)
128 return rec->switch_output.size &&
129 trigger_is_ready(&switch_output_trigger) &&
130 (rec->bytes_written >= rec->switch_output.size);
133 static bool switch_output_time(struct record *rec)
135 return rec->switch_output.time &&
136 trigger_is_ready(&switch_output_trigger);
139 static bool record__output_max_size_exceeded(struct record *rec)
141 return rec->output_max_size &&
142 (rec->bytes_written >= rec->output_max_size);
145 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
146 void *bf, size_t size)
148 struct perf_data_file *file = &rec->session->data->file;
150 if (perf_data_file__write(file, bf, size) < 0) {
151 pr_err("failed to write perf data, error: %m\n");
155 rec->bytes_written += size;
157 if (record__output_max_size_exceeded(rec) && !done) {
158 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
159 " stopping session ]\n",
160 rec->bytes_written >> 10);
164 if (switch_output_size(rec))
165 trigger_hit(&switch_output_trigger);
170 static int record__aio_enabled(struct record *rec);
171 static int record__comp_enabled(struct record *rec);
172 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
173 void *src, size_t src_size);
175 #ifdef HAVE_AIO_SUPPORT
176 static int record__aio_write(struct aiocb *cblock, int trace_fd,
177 void *buf, size_t size, off_t off)
181 cblock->aio_fildes = trace_fd;
182 cblock->aio_buf = buf;
183 cblock->aio_nbytes = size;
184 cblock->aio_offset = off;
185 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
188 rc = aio_write(cblock);
191 } else if (errno != EAGAIN) {
192 cblock->aio_fildes = -1;
193 pr_err("failed to queue perf data, error: %m\n");
201 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
207 ssize_t aio_ret, written;
209 aio_errno = aio_error(cblock);
210 if (aio_errno == EINPROGRESS)
213 written = aio_ret = aio_return(cblock);
215 if (aio_errno != EINTR)
216 pr_err("failed to write perf data, error: %m\n");
220 rem_size = cblock->aio_nbytes - written;
223 cblock->aio_fildes = -1;
225 * md->refcount is incremented in record__aio_pushfn() for
226 * every aio write request started in record__aio_push() so
227 * decrement it because the request is now complete.
229 perf_mmap__put(&md->core);
233 * aio write request may require restart with the
234 * reminder if the kernel didn't write whole
237 rem_off = cblock->aio_offset + written;
238 rem_buf = (void *)(cblock->aio_buf + written);
239 record__aio_write(cblock, cblock->aio_fildes,
240 rem_buf, rem_size, rem_off);
247 static int record__aio_sync(struct mmap *md, bool sync_all)
249 struct aiocb **aiocb = md->aio.aiocb;
250 struct aiocb *cblocks = md->aio.cblocks;
251 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
256 for (i = 0; i < md->aio.nr_cblocks; ++i) {
257 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
264 * Started aio write is not complete yet
265 * so it has to be waited before the
268 aiocb[i] = &cblocks[i];
275 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
276 if (!(errno == EAGAIN || errno == EINTR))
277 pr_err("failed to sync perf data, error: %m\n");
288 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
290 struct record_aio *aio = to;
293 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
294 * to release space in the kernel buffer as fast as possible, calling
295 * perf_mmap__consume() from perf_mmap__push() function.
297 * That lets the kernel to proceed with storing more profiling data into
298 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
300 * Coping can be done in two steps in case the chunk of profiling data
301 * crosses the upper bound of the kernel buffer. In this case we first move
302 * part of data from map->start till the upper bound and then the reminder
303 * from the beginning of the kernel buffer till the end of the data chunk.
306 if (record__comp_enabled(aio->rec)) {
307 size = zstd_compress(aio->rec->session, aio->data + aio->size,
308 mmap__mmap_len(map) - aio->size,
311 memcpy(aio->data + aio->size, buf, size);
316 * Increment map->refcount to guard map->aio.data[] buffer
317 * from premature deallocation because map object can be
318 * released earlier than aio write request started on
319 * map->aio.data[] buffer is complete.
321 * perf_mmap__put() is done at record__aio_complete()
322 * after started aio request completion or at record__aio_push()
323 * if the request failed to start.
325 perf_mmap__get(&map->core);
333 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
336 int trace_fd = rec->session->data->file.fd;
337 struct record_aio aio = { .rec = rec, .size = 0 };
340 * Call record__aio_sync() to wait till map->aio.data[] buffer
341 * becomes available after previous aio write operation.
344 idx = record__aio_sync(map, false);
345 aio.data = map->aio.data[idx];
346 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
347 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
351 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
354 rec->bytes_written += aio.size;
355 if (switch_output_size(rec))
356 trigger_hit(&switch_output_trigger);
359 * Decrement map->refcount incremented in record__aio_pushfn()
360 * back if record__aio_write() operation failed to start, otherwise
361 * map->refcount is decremented in record__aio_complete() after
362 * aio write operation finishes successfully.
364 perf_mmap__put(&map->core);
370 static off_t record__aio_get_pos(int trace_fd)
372 return lseek(trace_fd, 0, SEEK_CUR);
375 static void record__aio_set_pos(int trace_fd, off_t pos)
377 lseek(trace_fd, pos, SEEK_SET);
380 static void record__aio_mmap_read_sync(struct record *rec)
383 struct evlist *evlist = rec->evlist;
384 struct mmap *maps = evlist->mmap;
386 if (!record__aio_enabled(rec))
389 for (i = 0; i < evlist->core.nr_mmaps; i++) {
390 struct mmap *map = &maps[i];
393 record__aio_sync(map, true);
397 static int nr_cblocks_default = 1;
398 static int nr_cblocks_max = 4;
400 static int record__aio_parse(const struct option *opt,
404 struct record_opts *opts = (struct record_opts *)opt->value;
407 opts->nr_cblocks = 0;
410 opts->nr_cblocks = strtol(str, NULL, 0);
411 if (!opts->nr_cblocks)
412 opts->nr_cblocks = nr_cblocks_default;
417 #else /* HAVE_AIO_SUPPORT */
418 static int nr_cblocks_max = 0;
420 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
421 off_t *off __maybe_unused)
426 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
431 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
435 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
440 static int record__aio_enabled(struct record *rec)
442 return rec->opts.nr_cblocks > 0;
445 #define MMAP_FLUSH_DEFAULT 1
446 static int record__mmap_flush_parse(const struct option *opt,
451 struct record_opts *opts = (struct record_opts *)opt->value;
452 static struct parse_tag tags[] = {
453 { .tag = 'B', .mult = 1 },
454 { .tag = 'K', .mult = 1 << 10 },
455 { .tag = 'M', .mult = 1 << 20 },
456 { .tag = 'G', .mult = 1 << 30 },
464 opts->mmap_flush = parse_tag_value(str, tags);
465 if (opts->mmap_flush == (int)-1)
466 opts->mmap_flush = strtol(str, NULL, 0);
469 if (!opts->mmap_flush)
470 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
472 flush_max = evlist__mmap_size(opts->mmap_pages);
474 if (opts->mmap_flush > flush_max)
475 opts->mmap_flush = flush_max;
480 #ifdef HAVE_ZSTD_SUPPORT
481 static unsigned int comp_level_default = 1;
483 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
485 struct record_opts *opts = opt->value;
488 opts->comp_level = 0;
491 opts->comp_level = strtol(str, NULL, 0);
492 if (!opts->comp_level)
493 opts->comp_level = comp_level_default;
499 static unsigned int comp_level_max = 22;
501 static int record__comp_enabled(struct record *rec)
503 return rec->opts.comp_level > 0;
506 static int process_synthesized_event(struct perf_tool *tool,
507 union perf_event *event,
508 struct perf_sample *sample __maybe_unused,
509 struct machine *machine __maybe_unused)
511 struct record *rec = container_of(tool, struct record, tool);
512 return record__write(rec, NULL, event, event->header.size);
515 static int process_locked_synthesized_event(struct perf_tool *tool,
516 union perf_event *event,
517 struct perf_sample *sample __maybe_unused,
518 struct machine *machine __maybe_unused)
520 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
523 pthread_mutex_lock(&synth_lock);
524 ret = process_synthesized_event(tool, event, sample, machine);
525 pthread_mutex_unlock(&synth_lock);
529 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
531 struct record *rec = to;
533 if (record__comp_enabled(rec)) {
534 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
539 return record__write(rec, map, bf, size);
542 static volatile int signr = -1;
543 static volatile int child_finished;
544 #ifdef HAVE_EVENTFD_SUPPORT
545 static int done_fd = -1;
548 static void sig_handler(int sig)
556 #ifdef HAVE_EVENTFD_SUPPORT
560 * It is possible for this signal handler to run after done is checked
561 * in the main loop, but before the perf counter fds are polled. If this
562 * happens, the poll() will continue to wait even though done is set,
563 * and will only break out if either another signal is received, or the
564 * counters are ready for read. To ensure the poll() doesn't sleep when
565 * done is set, use an eventfd (done_fd) to wake up the poll().
567 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
568 pr_err("failed to signal wakeup fd, error: %m\n");
570 #endif // HAVE_EVENTFD_SUPPORT
573 static void sigsegv_handler(int sig)
575 perf_hooks__recover();
576 sighandler_dump_stack(sig);
579 static void record__sig_exit(void)
584 signal(signr, SIG_DFL);
588 #ifdef HAVE_AUXTRACE_SUPPORT
590 static int record__process_auxtrace(struct perf_tool *tool,
592 union perf_event *event, void *data1,
593 size_t len1, void *data2, size_t len2)
595 struct record *rec = container_of(tool, struct record, tool);
596 struct perf_data *data = &rec->data;
600 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
602 int fd = perf_data__fd(data);
605 file_offset = lseek(fd, 0, SEEK_CUR);
606 if (file_offset == -1)
608 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
614 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
615 padding = (len1 + len2) & 7;
617 padding = 8 - padding;
619 record__write(rec, map, event, event->header.size);
620 record__write(rec, map, data1, len1);
622 record__write(rec, map, data2, len2);
623 record__write(rec, map, &pad, padding);
628 static int record__auxtrace_mmap_read(struct record *rec,
633 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
634 record__process_auxtrace);
644 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
649 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
650 record__process_auxtrace,
651 rec->opts.auxtrace_snapshot_size);
661 static int record__auxtrace_read_snapshot_all(struct record *rec)
666 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
667 struct mmap *map = &rec->evlist->mmap[i];
669 if (!map->auxtrace_mmap.base)
672 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
681 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
683 pr_debug("Recording AUX area tracing snapshot\n");
684 if (record__auxtrace_read_snapshot_all(rec) < 0) {
685 trigger_error(&auxtrace_snapshot_trigger);
687 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
688 trigger_error(&auxtrace_snapshot_trigger);
690 trigger_ready(&auxtrace_snapshot_trigger);
694 static int record__auxtrace_snapshot_exit(struct record *rec)
696 if (trigger_is_error(&auxtrace_snapshot_trigger))
699 if (!auxtrace_record__snapshot_started &&
700 auxtrace_record__snapshot_start(rec->itr))
703 record__read_auxtrace_snapshot(rec, true);
704 if (trigger_is_error(&auxtrace_snapshot_trigger))
710 static int record__auxtrace_init(struct record *rec)
715 rec->itr = auxtrace_record__init(rec->evlist, &err);
720 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
721 rec->opts.auxtrace_snapshot_opts);
725 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
726 rec->opts.auxtrace_sample_opts);
730 return auxtrace_parse_filters(rec->evlist);
736 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
737 struct mmap *map __maybe_unused)
743 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
744 bool on_exit __maybe_unused)
749 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
755 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
760 static int record__auxtrace_init(struct record *rec __maybe_unused)
767 static bool record__kcore_readable(struct machine *machine)
769 char kcore[PATH_MAX];
772 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
774 fd = open(kcore, O_RDONLY);
783 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
785 char from_dir[PATH_MAX];
786 char kcore_dir[PATH_MAX];
789 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
791 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
795 return kcore_copy(from_dir, kcore_dir);
798 static int record__mmap_evlist(struct record *rec,
799 struct evlist *evlist)
801 struct record_opts *opts = &rec->opts;
802 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
803 opts->auxtrace_sample_mode;
806 if (opts->affinity != PERF_AFFINITY_SYS)
807 cpu__setup_cpunode_map();
809 if (evlist__mmap_ex(evlist, opts->mmap_pages,
810 opts->auxtrace_mmap_pages,
812 opts->nr_cblocks, opts->affinity,
813 opts->mmap_flush, opts->comp_level) < 0) {
814 if (errno == EPERM) {
815 pr_err("Permission error mapping pages.\n"
816 "Consider increasing "
817 "/proc/sys/kernel/perf_event_mlock_kb,\n"
818 "or try again with a smaller value of -m/--mmap_pages.\n"
819 "(current value: %u,%u)\n",
820 opts->mmap_pages, opts->auxtrace_mmap_pages);
823 pr_err("failed to mmap with %d (%s)\n", errno,
824 str_error_r(errno, msg, sizeof(msg)));
834 static int record__mmap(struct record *rec)
836 return record__mmap_evlist(rec, rec->evlist);
839 static int record__open(struct record *rec)
843 struct evlist *evlist = rec->evlist;
844 struct perf_session *session = rec->session;
845 struct record_opts *opts = &rec->opts;
849 * For initial_delay or system wide, we need to add a dummy event so
850 * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
853 if (opts->initial_delay || target__has_cpu(&opts->target)) {
854 if (perf_evlist__add_dummy(evlist))
857 /* Disable tracking of mmaps on lead event. */
858 pos = evlist__first(evlist);
860 /* Set up dummy event. */
861 pos = evlist__last(evlist);
864 * Enable the dummy event when the process is forked for
865 * initial_delay, immediately for system wide.
867 if (opts->initial_delay)
868 pos->core.attr.enable_on_exec = 1;
873 perf_evlist__config(evlist, opts, &callchain_param);
875 evlist__for_each_entry(evlist, pos) {
877 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
878 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
880 ui__warning("%s\n", msg);
883 if ((errno == EINVAL || errno == EBADF) &&
884 pos->leader != pos &&
886 pos = perf_evlist__reset_weak_group(evlist, pos, true);
890 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
891 ui__error("%s\n", msg);
895 pos->supported = true;
898 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
900 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
901 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
902 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
903 "file is not found in the buildid cache or in the vmlinux path.\n\n"
904 "Samples in kernel modules won't be resolved at all.\n\n"
905 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
906 "even with a suitable vmlinux or kallsyms file.\n\n");
909 if (perf_evlist__apply_filters(evlist, &pos)) {
910 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
911 pos->filter, evsel__name(pos), errno,
912 str_error_r(errno, msg, sizeof(msg)));
917 rc = record__mmap(rec);
921 session->evlist = evlist;
922 perf_session__set_id_hdr_size(session);
927 static int process_sample_event(struct perf_tool *tool,
928 union perf_event *event,
929 struct perf_sample *sample,
931 struct machine *machine)
933 struct record *rec = container_of(tool, struct record, tool);
935 if (rec->evlist->first_sample_time == 0)
936 rec->evlist->first_sample_time = sample->time;
938 rec->evlist->last_sample_time = sample->time;
940 if (rec->buildid_all)
944 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
947 static int process_buildids(struct record *rec)
949 struct perf_session *session = rec->session;
951 if (perf_data__size(&rec->data) == 0)
955 * During this process, it'll load kernel map and replace the
956 * dso->long_name to a real pathname it found. In this case
957 * we prefer the vmlinux path like
958 * /lib/modules/3.16.4/build/vmlinux
960 * rather than build-id path (in debug directory).
961 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
963 symbol_conf.ignore_vmlinux_buildid = true;
966 * If --buildid-all is given, it marks all DSO regardless of hits,
967 * so no need to process samples. But if timestamp_boundary is enabled,
968 * it still needs to walk on all samples to get the timestamps of
969 * first/last samples.
971 if (rec->buildid_all && !rec->timestamp_boundary)
972 rec->tool.sample = NULL;
974 return perf_session__process_events(session);
977 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
980 struct perf_tool *tool = data;
982 *As for guest kernel when processing subcommand record&report,
983 *we arrange module mmap prior to guest kernel mmap and trigger
984 *a preload dso because default guest module symbols are loaded
985 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
986 *method is used to avoid symbol missing when the first addr is
987 *in module instead of in guest kernel.
989 err = perf_event__synthesize_modules(tool, process_synthesized_event,
992 pr_err("Couldn't record guest kernel [%d]'s reference"
993 " relocation symbol.\n", machine->pid);
996 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
997 * have no _text sometimes.
999 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1002 pr_err("Couldn't record guest kernel [%d]'s reference"
1003 " relocation symbol.\n", machine->pid);
1006 static struct perf_event_header finished_round_event = {
1007 .size = sizeof(struct perf_event_header),
1008 .type = PERF_RECORD_FINISHED_ROUND,
1011 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1013 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1014 !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
1015 rec->affinity_mask.nbits)) {
1016 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
1017 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
1018 map->affinity_mask.bits, rec->affinity_mask.nbits);
1019 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
1020 (cpu_set_t *)rec->affinity_mask.bits);
1022 mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
1026 static size_t process_comp_header(void *record, size_t increment)
1028 struct perf_record_compressed *event = record;
1029 size_t size = sizeof(*event);
1032 event->header.size += increment;
1036 event->header.type = PERF_RECORD_COMPRESSED;
1037 event->header.size = size;
1042 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1043 void *src, size_t src_size)
1046 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1048 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1049 max_record_size, process_comp_header);
1051 session->bytes_transferred += src_size;
1052 session->bytes_compressed += compressed;
1057 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1058 bool overwrite, bool synch)
1060 u64 bytes_written = rec->bytes_written;
1064 int trace_fd = rec->data.file.fd;
1070 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1074 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1077 if (record__aio_enabled(rec))
1078 off = record__aio_get_pos(trace_fd);
1080 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1082 struct mmap *map = &maps[i];
1084 if (map->core.base) {
1085 record__adjust_affinity(rec, map);
1087 flush = map->core.flush;
1088 map->core.flush = 1;
1090 if (!record__aio_enabled(rec)) {
1091 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1093 map->core.flush = flush;
1098 if (record__aio_push(rec, map, &off) < 0) {
1099 record__aio_set_pos(trace_fd, off);
1101 map->core.flush = flush;
1107 map->core.flush = flush;
1110 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1111 !rec->opts.auxtrace_sample_mode &&
1112 record__auxtrace_mmap_read(rec, map) != 0) {
1118 if (record__aio_enabled(rec))
1119 record__aio_set_pos(trace_fd, off);
1122 * Mark the round finished in case we wrote
1123 * at least one event.
1125 if (bytes_written != rec->bytes_written)
1126 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1129 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1134 static int record__mmap_read_all(struct record *rec, bool synch)
1138 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1142 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1145 static void record__init_features(struct record *rec)
1147 struct perf_session *session = rec->session;
1150 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1151 perf_header__set_feat(&session->header, feat);
1153 if (rec->no_buildid)
1154 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1156 if (!have_tracepoints(&rec->evlist->core.entries))
1157 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1159 if (!rec->opts.branch_stack)
1160 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1162 if (!rec->opts.full_auxtrace)
1163 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1165 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1166 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1168 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1169 if (!record__comp_enabled(rec))
1170 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1172 perf_header__clear_feat(&session->header, HEADER_STAT);
1176 record__finish_output(struct record *rec)
1178 struct perf_data *data = &rec->data;
1179 int fd = perf_data__fd(data);
1184 rec->session->header.data_size += rec->bytes_written;
1185 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1187 if (!rec->no_buildid) {
1188 process_buildids(rec);
1190 if (rec->buildid_all)
1191 dsos__hit_all(rec->session);
1193 perf_session__write_header(rec->session, rec->evlist, fd, true);
1198 static int record__synthesize_workload(struct record *rec, bool tail)
1201 struct perf_thread_map *thread_map;
1203 if (rec->opts.tail_synthesize != tail)
1206 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1207 if (thread_map == NULL)
1210 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1211 process_synthesized_event,
1212 &rec->session->machines.host,
1213 rec->opts.sample_address);
1214 perf_thread_map__put(thread_map);
1218 static int record__synthesize(struct record *rec, bool tail);
1221 record__switch_output(struct record *rec, bool at_exit)
1223 struct perf_data *data = &rec->data;
1227 /* Same Size: "2015122520103046"*/
1228 char timestamp[] = "InvalidTimestamp";
1230 record__aio_mmap_read_sync(rec);
1232 record__synthesize(rec, true);
1233 if (target__none(&rec->opts.target))
1234 record__synthesize_workload(rec, true);
1237 record__finish_output(rec);
1238 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1240 pr_err("Failed to get current timestamp\n");
1244 fd = perf_data__switch(data, timestamp,
1245 rec->session->header.data_offset,
1246 at_exit, &new_filename);
1247 if (fd >= 0 && !at_exit) {
1248 rec->bytes_written = 0;
1249 rec->session->header.data_size = 0;
1253 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1254 data->path, timestamp);
1256 if (rec->switch_output.num_files) {
1257 int n = rec->switch_output.cur_file + 1;
1259 if (n >= rec->switch_output.num_files)
1261 rec->switch_output.cur_file = n;
1262 if (rec->switch_output.filenames[n]) {
1263 remove(rec->switch_output.filenames[n]);
1264 zfree(&rec->switch_output.filenames[n]);
1266 rec->switch_output.filenames[n] = new_filename;
1271 /* Output tracking events */
1273 record__synthesize(rec, false);
1276 * In 'perf record --switch-output' without -a,
1277 * record__synthesize() in record__switch_output() won't
1278 * generate tracking events because there's no thread_map
1279 * in evlist. Which causes newly created perf.data doesn't
1280 * contain map and comm information.
1281 * Create a fake thread_map and directly call
1282 * perf_event__synthesize_thread_map() for those events.
1284 if (target__none(&rec->opts.target))
1285 record__synthesize_workload(rec, false);
1290 static volatile int workload_exec_errno;
1293 * perf_evlist__prepare_workload will send a SIGUSR1
1294 * if the fork fails, since we asked by setting its
1295 * want_signal to true.
1297 static void workload_exec_failed_signal(int signo __maybe_unused,
1299 void *ucontext __maybe_unused)
1301 workload_exec_errno = info->si_value.sival_int;
1306 static void snapshot_sig_handler(int sig);
1307 static void alarm_sig_handler(int sig);
1309 static const struct perf_event_mmap_page *
1310 perf_evlist__pick_pc(struct evlist *evlist)
1313 if (evlist->mmap && evlist->mmap[0].core.base)
1314 return evlist->mmap[0].core.base;
1315 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1316 return evlist->overwrite_mmap[0].core.base;
1321 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1323 const struct perf_event_mmap_page *pc;
1325 pc = perf_evlist__pick_pc(rec->evlist);
1331 static int record__synthesize(struct record *rec, bool tail)
1333 struct perf_session *session = rec->session;
1334 struct machine *machine = &session->machines.host;
1335 struct perf_data *data = &rec->data;
1336 struct record_opts *opts = &rec->opts;
1337 struct perf_tool *tool = &rec->tool;
1338 int fd = perf_data__fd(data);
1340 event_op f = process_synthesized_event;
1342 if (rec->opts.tail_synthesize != tail)
1345 if (data->is_pipe) {
1347 * We need to synthesize events first, because some
1348 * features works on top of them (on report side).
1350 err = perf_event__synthesize_attrs(tool, rec->evlist,
1351 process_synthesized_event);
1353 pr_err("Couldn't synthesize attrs.\n");
1357 err = perf_event__synthesize_features(tool, session, rec->evlist,
1358 process_synthesized_event);
1360 pr_err("Couldn't synthesize features.\n");
1364 if (have_tracepoints(&rec->evlist->core.entries)) {
1366 * FIXME err <= 0 here actually means that
1367 * there were no tracepoints so its not really
1368 * an error, just that we don't need to
1369 * synthesize anything. We really have to
1370 * return this more properly and also
1371 * propagate errors that now are calling die()
1373 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1374 process_synthesized_event);
1376 pr_err("Couldn't record tracing data.\n");
1379 rec->bytes_written += err;
1383 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1384 process_synthesized_event, machine);
1388 /* Synthesize id_index before auxtrace_info */
1389 if (rec->opts.auxtrace_sample_mode) {
1390 err = perf_event__synthesize_id_index(tool,
1391 process_synthesized_event,
1392 session->evlist, machine);
1397 if (rec->opts.full_auxtrace) {
1398 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1399 session, process_synthesized_event);
1404 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1405 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1407 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1408 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1409 "Check /proc/kallsyms permission or run as root.\n");
1411 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1413 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1414 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1415 "Check /proc/modules permission or run as root.\n");
1419 machines__process_guests(&session->machines,
1420 perf_event__synthesize_guest_os, tool);
1423 err = perf_event__synthesize_extra_attr(&rec->tool,
1425 process_synthesized_event,
1430 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1431 process_synthesized_event,
1434 pr_err("Couldn't synthesize thread map.\n");
1438 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1439 process_synthesized_event, NULL);
1441 pr_err("Couldn't synthesize cpu map.\n");
1445 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1448 pr_warning("Couldn't synthesize bpf events.\n");
1450 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1453 pr_warning("Couldn't synthesize cgroup events.\n");
1455 if (rec->opts.nr_threads_synthesize > 1) {
1456 perf_set_multithreaded();
1457 f = process_locked_synthesized_event;
1460 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1461 f, opts->sample_address,
1462 rec->opts.nr_threads_synthesize);
1464 if (rec->opts.nr_threads_synthesize > 1)
1465 perf_set_singlethreaded();
1471 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1473 struct record *rec = data;
1474 pthread_kill(rec->thread_id, SIGUSR2);
1478 static int record__setup_sb_evlist(struct record *rec)
1480 struct record_opts *opts = &rec->opts;
1482 if (rec->sb_evlist != NULL) {
1484 * We get here if --switch-output-event populated the
1485 * sb_evlist, so associate a callback that will send a SIGUSR2
1486 * to the main thread.
1488 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1489 rec->thread_id = pthread_self();
1492 if (!opts->no_bpf_event) {
1493 if (rec->sb_evlist == NULL) {
1494 rec->sb_evlist = evlist__new();
1496 if (rec->sb_evlist == NULL) {
1497 pr_err("Couldn't create side band evlist.\n.");
1502 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1503 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1508 if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1509 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1510 opts->no_bpf_event = true;
1516 static int __cmd_record(struct record *rec, int argc, const char **argv)
1520 unsigned long waking = 0;
1521 const bool forks = argc > 0;
1522 struct perf_tool *tool = &rec->tool;
1523 struct record_opts *opts = &rec->opts;
1524 struct perf_data *data = &rec->data;
1525 struct perf_session *session;
1526 bool disabled = false, draining = false;
1530 atexit(record__sig_exit);
1531 signal(SIGCHLD, sig_handler);
1532 signal(SIGINT, sig_handler);
1533 signal(SIGTERM, sig_handler);
1534 signal(SIGSEGV, sigsegv_handler);
1536 if (rec->opts.record_namespaces)
1537 tool->namespace_events = true;
1539 if (rec->opts.record_cgroup) {
1540 #ifdef HAVE_FILE_HANDLE
1541 tool->cgroup_events = true;
1543 pr_err("cgroup tracking is not supported\n");
1548 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1549 signal(SIGUSR2, snapshot_sig_handler);
1550 if (rec->opts.auxtrace_snapshot_mode)
1551 trigger_on(&auxtrace_snapshot_trigger);
1552 if (rec->switch_output.enabled)
1553 trigger_on(&switch_output_trigger);
1555 signal(SIGUSR2, SIG_IGN);
1558 session = perf_session__new(data, false, tool);
1559 if (IS_ERR(session)) {
1560 pr_err("Perf session creation failed.\n");
1561 return PTR_ERR(session);
1564 fd = perf_data__fd(data);
1565 rec->session = session;
1567 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1568 pr_err("Compression initialization failed.\n");
1571 #ifdef HAVE_EVENTFD_SUPPORT
1572 done_fd = eventfd(0, EFD_NONBLOCK);
1574 pr_err("Failed to create wakeup eventfd, error: %m\n");
1576 goto out_delete_session;
1578 err = evlist__add_pollfd(rec->evlist, done_fd);
1580 pr_err("Failed to add wakeup eventfd to poll list\n");
1582 goto out_delete_session;
1584 #endif // HAVE_EVENTFD_SUPPORT
1586 session->header.env.comp_type = PERF_COMP_ZSTD;
1587 session->header.env.comp_level = rec->opts.comp_level;
1589 if (rec->opts.kcore &&
1590 !record__kcore_readable(&session->machines.host)) {
1591 pr_err("ERROR: kcore is not readable.\n");
1595 record__init_features(rec);
1597 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1598 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1601 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1602 argv, data->is_pipe,
1603 workload_exec_failed_signal);
1605 pr_err("Couldn't run the workload!\n");
1607 goto out_delete_session;
1612 * If we have just single event and are sending data
1613 * through pipe, we need to force the ids allocation,
1614 * because we synthesize event name through the pipe
1615 * and need the id for that.
1617 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1618 rec->opts.sample_id = true;
1620 if (record__open(rec) != 0) {
1624 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1626 if (rec->opts.kcore) {
1627 err = record__kcore_copy(&session->machines.host, data);
1629 pr_err("ERROR: Failed to copy kcore\n");
1634 err = bpf__apply_obj_config();
1636 char errbuf[BUFSIZ];
1638 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1639 pr_err("ERROR: Apply config to BPF failed: %s\n",
1645 * Normally perf_session__new would do this, but it doesn't have the
1648 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1649 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1650 rec->tool.ordered_events = false;
1653 if (!rec->evlist->nr_groups)
1654 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1656 if (data->is_pipe) {
1657 err = perf_header__write_pipe(fd);
1661 err = perf_session__write_header(session, rec->evlist, fd, false);
1667 if (!rec->no_buildid
1668 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1669 pr_err("Couldn't generate buildids. "
1670 "Use --no-buildid to profile anyway.\n");
1674 err = record__setup_sb_evlist(rec);
1678 err = record__synthesize(rec, false);
1682 if (rec->realtime_prio) {
1683 struct sched_param param;
1685 param.sched_priority = rec->realtime_prio;
1686 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
1687 pr_err("Could not set realtime priority.\n");
1694 * When perf is starting the traced process, all the events
1695 * (apart from group members) have enable_on_exec=1 set,
1696 * so don't spoil it by prematurely enabling them.
1698 if (!target__none(&opts->target) && !opts->initial_delay)
1699 evlist__enable(rec->evlist);
1705 struct machine *machine = &session->machines.host;
1706 union perf_event *event;
1709 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1710 if (event == NULL) {
1716 * Some H/W events are generated before COMM event
1717 * which is emitted during exec(), so perf script
1718 * cannot see a correct process name for those events.
1719 * Synthesize COMM event to prevent it.
1721 tgid = perf_event__synthesize_comm(tool, event,
1722 rec->evlist->workload.pid,
1723 process_synthesized_event,
1730 event = malloc(sizeof(event->namespaces) +
1731 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1732 machine->id_hdr_size);
1733 if (event == NULL) {
1739 * Synthesize NAMESPACES event for the command specified.
1741 perf_event__synthesize_namespaces(tool, event,
1742 rec->evlist->workload.pid,
1743 tgid, process_synthesized_event,
1747 perf_evlist__start_workload(rec->evlist);
1750 if (opts->initial_delay) {
1751 usleep(opts->initial_delay * USEC_PER_MSEC);
1752 evlist__enable(rec->evlist);
1755 trigger_ready(&auxtrace_snapshot_trigger);
1756 trigger_ready(&switch_output_trigger);
1757 perf_hooks__invoke_record_start();
1759 unsigned long long hits = rec->samples;
1762 * rec->evlist->bkw_mmap_state is possible to be
1763 * BKW_MMAP_EMPTY here: when done == true and
1764 * hits != rec->samples in previous round.
1766 * perf_evlist__toggle_bkw_mmap ensure we never
1767 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1769 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1770 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1772 if (record__mmap_read_all(rec, false) < 0) {
1773 trigger_error(&auxtrace_snapshot_trigger);
1774 trigger_error(&switch_output_trigger);
1779 if (auxtrace_record__snapshot_started) {
1780 auxtrace_record__snapshot_started = 0;
1781 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1782 record__read_auxtrace_snapshot(rec, false);
1783 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1784 pr_err("AUX area tracing snapshot failed\n");
1790 if (trigger_is_hit(&switch_output_trigger)) {
1792 * If switch_output_trigger is hit, the data in
1793 * overwritable ring buffer should have been collected,
1794 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1796 * If SIGUSR2 raise after or during record__mmap_read_all(),
1797 * record__mmap_read_all() didn't collect data from
1798 * overwritable ring buffer. Read again.
1800 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1802 trigger_ready(&switch_output_trigger);
1805 * Reenable events in overwrite ring buffer after
1806 * record__mmap_read_all(): we should have collected
1809 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1812 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1815 fd = record__switch_output(rec, false);
1817 pr_err("Failed to switch to new file\n");
1818 trigger_error(&switch_output_trigger);
1823 /* re-arm the alarm */
1824 if (rec->switch_output.time)
1825 alarm(rec->switch_output.time);
1828 if (hits == rec->samples) {
1829 if (done || draining)
1831 err = evlist__poll(rec->evlist, -1);
1833 * Propagate error, only if there's any. Ignore positive
1834 * number of returned events and interrupt error.
1836 if (err > 0 || (err < 0 && errno == EINTR))
1840 if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1845 * When perf is starting the traced process, at the end events
1846 * die with the process and we wait for that. Thus no need to
1847 * disable events in this case.
1849 if (done && !disabled && !target__none(&opts->target)) {
1850 trigger_off(&auxtrace_snapshot_trigger);
1851 evlist__disable(rec->evlist);
1856 trigger_off(&auxtrace_snapshot_trigger);
1857 trigger_off(&switch_output_trigger);
1859 if (opts->auxtrace_snapshot_on_exit)
1860 record__auxtrace_snapshot_exit(rec);
1862 if (forks && workload_exec_errno) {
1863 char msg[STRERR_BUFSIZE];
1864 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1865 pr_err("Workload failed: %s\n", emsg);
1871 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1873 if (target__none(&rec->opts.target))
1874 record__synthesize_workload(rec, true);
1877 record__mmap_read_all(rec, true);
1878 record__aio_mmap_read_sync(rec);
1880 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1881 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1882 session->header.env.comp_ratio = ratio + 0.5;
1888 if (!child_finished)
1889 kill(rec->evlist->workload.pid, SIGTERM);
1895 else if (WIFEXITED(exit_status))
1896 status = WEXITSTATUS(exit_status);
1897 else if (WIFSIGNALED(exit_status))
1898 signr = WTERMSIG(exit_status);
1902 record__synthesize(rec, true);
1903 /* this will be recalculated during process_buildids() */
1907 if (!rec->timestamp_filename) {
1908 record__finish_output(rec);
1910 fd = record__switch_output(rec, true);
1913 goto out_delete_session;
1918 perf_hooks__invoke_record_end();
1920 if (!err && !quiet) {
1922 const char *postfix = rec->timestamp_filename ?
1923 ".<timestamp>" : "";
1925 if (rec->samples && !rec->opts.full_auxtrace)
1926 scnprintf(samples, sizeof(samples),
1927 " (%" PRIu64 " samples)", rec->samples);
1931 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1932 perf_data__size(data) / 1024.0 / 1024.0,
1933 data->path, postfix, samples);
1935 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1936 rec->session->bytes_transferred / 1024.0 / 1024.0,
1939 fprintf(stderr, " ]\n");
1943 #ifdef HAVE_EVENTFD_SUPPORT
1947 zstd_fini(&session->zstd_data);
1948 perf_session__delete(session);
1950 if (!opts->no_bpf_event)
1951 perf_evlist__stop_sb_thread(rec->sb_evlist);
1955 static void callchain_debug(struct callchain_param *callchain)
1957 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1959 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1961 if (callchain->record_mode == CALLCHAIN_DWARF)
1962 pr_debug("callchain: stack dump size %d\n",
1963 callchain->dump_size);
1966 int record_opts__parse_callchain(struct record_opts *record,
1967 struct callchain_param *callchain,
1968 const char *arg, bool unset)
1971 callchain->enabled = !unset;
1973 /* --no-call-graph */
1975 callchain->record_mode = CALLCHAIN_NONE;
1976 pr_debug("callchain: disabled\n");
1980 ret = parse_callchain_record_opt(arg, callchain);
1982 /* Enable data address sampling for DWARF unwind. */
1983 if (callchain->record_mode == CALLCHAIN_DWARF)
1984 record->sample_address = true;
1985 callchain_debug(callchain);
1991 int record_parse_callchain_opt(const struct option *opt,
1995 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1998 int record_callchain_opt(const struct option *opt,
1999 const char *arg __maybe_unused,
2000 int unset __maybe_unused)
2002 struct callchain_param *callchain = opt->value;
2004 callchain->enabled = true;
2006 if (callchain->record_mode == CALLCHAIN_NONE)
2007 callchain->record_mode = CALLCHAIN_FP;
2009 callchain_debug(callchain);
2013 static int perf_record_config(const char *var, const char *value, void *cb)
2015 struct record *rec = cb;
2017 if (!strcmp(var, "record.build-id")) {
2018 if (!strcmp(value, "cache"))
2019 rec->no_buildid_cache = false;
2020 else if (!strcmp(value, "no-cache"))
2021 rec->no_buildid_cache = true;
2022 else if (!strcmp(value, "skip"))
2023 rec->no_buildid = true;
2028 if (!strcmp(var, "record.call-graph")) {
2029 var = "call-graph.record-mode";
2030 return perf_default_config(var, value, cb);
2032 #ifdef HAVE_AIO_SUPPORT
2033 if (!strcmp(var, "record.aio")) {
2034 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2035 if (!rec->opts.nr_cblocks)
2036 rec->opts.nr_cblocks = nr_cblocks_default;
2043 struct clockid_map {
2048 #define CLOCKID_MAP(n, c) \
2049 { .name = n, .clockid = (c), }
2051 #define CLOCKID_END { .name = NULL, }
2055 * Add the missing ones, we need to build on many distros...
2057 #ifndef CLOCK_MONOTONIC_RAW
2058 #define CLOCK_MONOTONIC_RAW 4
2060 #ifndef CLOCK_BOOTTIME
2061 #define CLOCK_BOOTTIME 7
2064 #define CLOCK_TAI 11
2067 static const struct clockid_map clockids[] = {
2068 /* available for all events, NMI safe */
2069 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2070 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2072 /* available for some events */
2073 CLOCKID_MAP("realtime", CLOCK_REALTIME),
2074 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2075 CLOCKID_MAP("tai", CLOCK_TAI),
2077 /* available for the lazy */
2078 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2079 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2080 CLOCKID_MAP("real", CLOCK_REALTIME),
2081 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2086 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2088 struct timespec res;
2091 if (!clock_getres(clk_id, &res))
2092 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2094 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2099 static int parse_clockid(const struct option *opt, const char *str, int unset)
2101 struct record_opts *opts = (struct record_opts *)opt->value;
2102 const struct clockid_map *cm;
2103 const char *ostr = str;
2106 opts->use_clockid = 0;
2114 /* no setting it twice */
2115 if (opts->use_clockid)
2118 opts->use_clockid = true;
2120 /* if its a number, we're done */
2121 if (sscanf(str, "%d", &opts->clockid) == 1)
2122 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2124 /* allow a "CLOCK_" prefix to the name */
2125 if (!strncasecmp(str, "CLOCK_", 6))
2128 for (cm = clockids; cm->name; cm++) {
2129 if (!strcasecmp(str, cm->name)) {
2130 opts->clockid = cm->clockid;
2131 return get_clockid_res(opts->clockid,
2132 &opts->clockid_res_ns);
2136 opts->use_clockid = false;
2137 ui__warning("unknown clockid %s, check man page\n", ostr);
2141 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2143 struct record_opts *opts = (struct record_opts *)opt->value;
2148 if (!strcasecmp(str, "node"))
2149 opts->affinity = PERF_AFFINITY_NODE;
2150 else if (!strcasecmp(str, "cpu"))
2151 opts->affinity = PERF_AFFINITY_CPU;
2156 static int parse_output_max_size(const struct option *opt,
2157 const char *str, int unset)
2159 unsigned long *s = (unsigned long *)opt->value;
2160 static struct parse_tag tags_size[] = {
2161 { .tag = 'B', .mult = 1 },
2162 { .tag = 'K', .mult = 1 << 10 },
2163 { .tag = 'M', .mult = 1 << 20 },
2164 { .tag = 'G', .mult = 1 << 30 },
2174 val = parse_tag_value(str, tags_size);
2175 if (val != (unsigned long) -1) {
2183 static int record__parse_mmap_pages(const struct option *opt,
2185 int unset __maybe_unused)
2187 struct record_opts *opts = opt->value;
2189 unsigned int mmap_pages;
2204 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2207 opts->mmap_pages = mmap_pages;
2215 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2219 opts->auxtrace_mmap_pages = mmap_pages;
2226 static void switch_output_size_warn(struct record *rec)
2228 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2229 struct switch_output *s = &rec->switch_output;
2233 if (s->size < wakeup_size) {
2236 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2237 pr_warning("WARNING: switch-output data size lower than "
2238 "wakeup kernel buffer size (%s) "
2239 "expect bigger perf.data sizes\n", buf);
2243 static int switch_output_setup(struct record *rec)
2245 struct switch_output *s = &rec->switch_output;
2246 static struct parse_tag tags_size[] = {
2247 { .tag = 'B', .mult = 1 },
2248 { .tag = 'K', .mult = 1 << 10 },
2249 { .tag = 'M', .mult = 1 << 20 },
2250 { .tag = 'G', .mult = 1 << 30 },
2253 static struct parse_tag tags_time[] = {
2254 { .tag = 's', .mult = 1 },
2255 { .tag = 'm', .mult = 60 },
2256 { .tag = 'h', .mult = 60*60 },
2257 { .tag = 'd', .mult = 60*60*24 },
2263 * If we're using --switch-output-events, then we imply its
2264 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2265 * thread to its parent.
2267 if (rec->switch_output_event_set)
2273 if (!strcmp(s->str, "signal")) {
2276 pr_debug("switch-output with SIGUSR2 signal\n");
2280 val = parse_tag_value(s->str, tags_size);
2281 if (val != (unsigned long) -1) {
2283 pr_debug("switch-output with %s size threshold\n", s->str);
2287 val = parse_tag_value(s->str, tags_time);
2288 if (val != (unsigned long) -1) {
2290 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2298 rec->timestamp_filename = true;
2301 if (s->size && !rec->opts.no_buffering)
2302 switch_output_size_warn(rec);
2307 static const char * const __record_usage[] = {
2308 "perf record [<options>] [<command>]",
2309 "perf record [<options>] -- <command> [<options>]",
2312 const char * const *record_usage = __record_usage;
2314 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2315 struct perf_sample *sample, struct machine *machine)
2318 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2319 * no need to add them twice.
2321 if (!(event->header.misc & PERF_RECORD_MISC_USER))
2323 return perf_event__process_mmap(tool, event, sample, machine);
2326 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2327 struct perf_sample *sample, struct machine *machine)
2330 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2331 * no need to add them twice.
2333 if (!(event->header.misc & PERF_RECORD_MISC_USER))
2336 return perf_event__process_mmap2(tool, event, sample, machine);
2340 * XXX Ideally would be local to cmd_record() and passed to a record__new
2341 * because we need to have access to it in record__exit, that is called
2342 * after cmd_record() exits, but since record_options need to be accessible to
2343 * builtin-script, leave it here.
2345 * At least we don't ouch it in all the other functions here directly.
2347 * Just say no to tons of global variables, sigh.
2349 static struct record record = {
2351 .sample_time = true,
2352 .mmap_pages = UINT_MAX,
2353 .user_freq = UINT_MAX,
2354 .user_interval = ULLONG_MAX,
2358 .default_per_cpu = true,
2360 .mmap_flush = MMAP_FLUSH_DEFAULT,
2361 .nr_threads_synthesize = 1,
2364 .sample = process_sample_event,
2365 .fork = perf_event__process_fork,
2366 .exit = perf_event__process_exit,
2367 .comm = perf_event__process_comm,
2368 .namespaces = perf_event__process_namespaces,
2369 .mmap = build_id__process_mmap,
2370 .mmap2 = build_id__process_mmap2,
2371 .ordered_events = true,
2375 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2376 "\n\t\t\t\tDefault: fp";
2378 static bool dry_run;
2381 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2382 * with it and switch to use the library functions in perf_evlist that came
2383 * from builtin-record.c, i.e. use record_opts,
2384 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2387 static struct option __record_options[] = {
2388 OPT_CALLBACK('e', "event", &record.evlist, "event",
2389 "event selector. use 'perf list' to list available events",
2390 parse_events_option),
2391 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2392 "event filter", parse_filter),
2393 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2394 NULL, "don't record events from perf itself",
2396 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2397 "record events on existing process id"),
2398 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2399 "record events on existing thread id"),
2400 OPT_INTEGER('r', "realtime", &record.realtime_prio,
2401 "collect data with this RT SCHED_FIFO priority"),
2402 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2403 "collect data without buffering"),
2404 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2405 "collect raw sample records from all opened counters"),
2406 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2407 "system-wide collection from all CPUs"),
2408 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2409 "list of cpus to monitor"),
2410 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2411 OPT_STRING('o', "output", &record.data.path, "file",
2412 "output file name"),
2413 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2414 &record.opts.no_inherit_set,
2415 "child tasks do not inherit counters"),
2416 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2417 "synthesize non-sample events at the end of output"),
2418 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2419 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2420 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2421 "Fail if the specified frequency can't be used"),
2422 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2423 "profile at this frequency",
2424 record__parse_freq),
2425 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2426 "number of mmap data pages and AUX area tracing mmap pages",
2427 record__parse_mmap_pages),
2428 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2429 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2430 record__mmap_flush_parse),
2431 OPT_BOOLEAN(0, "group", &record.opts.group,
2432 "put the counters into a counter group"),
2433 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2434 NULL, "enables call-graph recording" ,
2435 &record_callchain_opt),
2436 OPT_CALLBACK(0, "call-graph", &record.opts,
2437 "record_mode[,record_size]", record_callchain_help,
2438 &record_parse_callchain_opt),
2439 OPT_INCR('v', "verbose", &verbose,
2440 "be more verbose (show counter open errors, etc)"),
2441 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2442 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2443 "per thread counts"),
2444 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2445 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2446 "Record the sample physical addresses"),
2447 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2448 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2449 &record.opts.sample_time_set,
2450 "Record the sample timestamps"),
2451 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2452 "Record the sample period"),
2453 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2455 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2456 &record.no_buildid_cache_set,
2457 "do not update the buildid cache"),
2458 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2459 &record.no_buildid_set,
2460 "do not collect buildids in perf.data"),
2461 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2462 "monitor event in cgroup name only",
2464 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2465 "ms to wait before starting measurement after program start"),
2466 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2467 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2470 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2471 "branch any", "sample any taken branches",
2472 parse_branch_stack),
2474 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2475 "branch filter mask", "branch stack filter modes",
2476 parse_branch_stack),
2477 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2478 "sample by weight (on special events only)"),
2479 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2480 "sample transaction flags (special events only)"),
2481 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2482 "use per-thread mmaps"),
2483 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2484 "sample selected machine registers on interrupt,"
2485 " use '-I?' to list register names", parse_intr_regs),
2486 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2487 "sample selected machine registers on interrupt,"
2488 " use '--user-regs=?' to list register names", parse_user_regs),
2489 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2490 "Record running/enabled time of read (:S) events"),
2491 OPT_CALLBACK('k', "clockid", &record.opts,
2492 "clockid", "clockid to use for events, see clock_gettime()",
2494 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2495 "opts", "AUX area tracing Snapshot Mode", ""),
2496 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2497 "opts", "sample AUX area", ""),
2498 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2499 "per thread proc mmap processing timeout in ms"),
2500 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2501 "Record namespaces events"),
2502 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2503 "Record cgroup events"),
2504 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2505 "Record context switch events"),
2506 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2507 "Configure all used events to run in kernel space.",
2508 PARSE_OPT_EXCLUSIVE),
2509 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2510 "Configure all used events to run in user space.",
2511 PARSE_OPT_EXCLUSIVE),
2512 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2513 "collect kernel callchains"),
2514 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2515 "collect user callchains"),
2516 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2517 "clang binary to use for compiling BPF scriptlets"),
2518 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2519 "options passed to clang when compiling BPF scriptlets"),
2520 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2521 "file", "vmlinux pathname"),
2522 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2523 "Record build-id of all DSOs regardless of hits"),
2524 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2525 "append timestamp to output filename"),
2526 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2527 "Record timestamp boundary (time of first/last samples)"),
2528 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2529 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2530 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2532 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2533 "switch output event selector. use 'perf list' to list available events",
2534 parse_events_option_new_evlist),
2535 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2536 "Limit number of switch output generated files"),
2537 OPT_BOOLEAN(0, "dry-run", &dry_run,
2538 "Parse options then exit"),
2539 #ifdef HAVE_AIO_SUPPORT
2540 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2541 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2544 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2545 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2546 record__parse_affinity),
2547 #ifdef HAVE_ZSTD_SUPPORT
2548 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2549 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2550 record__parse_comp_level),
2552 OPT_CALLBACK(0, "max-size", &record.output_max_size,
2553 "size", "Limit the maximum size of the output file", parse_output_max_size),
2554 OPT_UINTEGER(0, "num-thread-synthesize",
2555 &record.opts.nr_threads_synthesize,
2556 "number of threads to run for event synthesis"),
2560 struct option *record_options = __record_options;
2562 int cmd_record(int argc, const char **argv)
2565 struct record *rec = &record;
2566 char errbuf[BUFSIZ];
2568 setlocale(LC_ALL, "");
2570 #ifndef HAVE_LIBBPF_SUPPORT
2571 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2572 set_nobuild('\0', "clang-path", true);
2573 set_nobuild('\0', "clang-opt", true);
2577 #ifndef HAVE_BPF_PROLOGUE
2578 # if !defined (HAVE_DWARF_SUPPORT)
2579 # define REASON "NO_DWARF=1"
2580 # elif !defined (HAVE_LIBBPF_SUPPORT)
2581 # define REASON "NO_LIBBPF=1"
2583 # define REASON "this architecture doesn't support BPF prologue"
2585 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2586 set_nobuild('\0', "vmlinux", true);
2591 rec->opts.affinity = PERF_AFFINITY_SYS;
2593 rec->evlist = evlist__new();
2594 if (rec->evlist == NULL)
2597 err = perf_config(perf_record_config, rec);
2601 argc = parse_options(argc, argv, record_options, record_usage,
2602 PARSE_OPT_STOP_AT_NON_OPTION);
2604 perf_quiet_option();
2606 /* Make system wide (-a) the default target. */
2607 if (!argc && target__none(&rec->opts.target))
2608 rec->opts.target.system_wide = true;
2610 if (nr_cgroups && !rec->opts.target.system_wide) {
2611 usage_with_options_msg(record_usage, record_options,
2612 "cgroup monitoring only available in system-wide mode");
2616 if (rec->opts.kcore)
2617 rec->data.is_dir = true;
2619 if (rec->opts.comp_level != 0) {
2620 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2621 rec->no_buildid = true;
2624 if (rec->opts.record_switch_events &&
2625 !perf_can_record_switch_events()) {
2626 ui__error("kernel does not support recording context switch events\n");
2627 parse_options_usage(record_usage, record_options, "switch-events", 0);
2631 if (switch_output_setup(rec)) {
2632 parse_options_usage(record_usage, record_options, "switch-output", 0);
2636 if (rec->switch_output.time) {
2637 signal(SIGALRM, alarm_sig_handler);
2638 alarm(rec->switch_output.time);
2641 if (rec->switch_output.num_files) {
2642 rec->switch_output.filenames = calloc(sizeof(char *),
2643 rec->switch_output.num_files);
2644 if (!rec->switch_output.filenames)
2649 * Allow aliases to facilitate the lookup of symbols for address
2650 * filters. Refer to auxtrace_parse_filters().
2652 symbol_conf.allow_aliases = true;
2656 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2657 rec->affinity_mask.nbits = cpu__max_cpu();
2658 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2659 if (!rec->affinity_mask.bits) {
2660 pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2663 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2666 err = record__auxtrace_init(rec);
2673 err = bpf__setup_stdout(rec->evlist);
2675 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2676 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2683 if (rec->no_buildid_cache || rec->no_buildid) {
2684 disable_buildid_cache();
2685 } else if (rec->switch_output.enabled) {
2687 * In 'perf record --switch-output', disable buildid
2688 * generation by default to reduce data file switching
2689 * overhead. Still generate buildid if they are required
2692 * perf record --switch-output --no-no-buildid \
2693 * --no-no-buildid-cache
2695 * Following code equals to:
2697 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2698 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2699 * disable_buildid_cache();
2701 bool disable = true;
2703 if (rec->no_buildid_set && !rec->no_buildid)
2705 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2708 rec->no_buildid = true;
2709 rec->no_buildid_cache = true;
2710 disable_buildid_cache();
2714 if (record.opts.overwrite)
2715 record.opts.tail_synthesize = true;
2717 if (rec->evlist->core.nr_entries == 0 &&
2718 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2719 pr_err("Not enough memory for event selector list\n");
2723 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2724 rec->opts.no_inherit = true;
2726 err = target__validate(&rec->opts.target);
2728 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2729 ui__warning("%s\n", errbuf);
2732 err = target__parse_uid(&rec->opts.target);
2734 int saved_errno = errno;
2736 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2737 ui__error("%s", errbuf);
2743 /* Enable ignoring missing threads when -u/-p option is defined. */
2744 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2747 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2748 usage_with_options(record_usage, record_options);
2750 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2755 * We take all buildids when the file contains
2756 * AUX area tracing data because we do not decode the
2757 * trace because it would take too long.
2759 if (rec->opts.full_auxtrace)
2760 rec->buildid_all = true;
2762 if (record_opts__config(&rec->opts)) {
2767 if (rec->opts.nr_cblocks > nr_cblocks_max)
2768 rec->opts.nr_cblocks = nr_cblocks_max;
2769 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2771 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2772 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2774 if (rec->opts.comp_level > comp_level_max)
2775 rec->opts.comp_level = comp_level_max;
2776 pr_debug("comp level: %d\n", rec->opts.comp_level);
2778 err = __cmd_record(&record, argc, argv);
2780 bitmap_free(rec->affinity_mask.bits);
2781 evlist__delete(rec->evlist);
2783 auxtrace_record__free(rec->itr);
2787 static void snapshot_sig_handler(int sig __maybe_unused)
2789 struct record *rec = &record;
2791 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2792 trigger_hit(&auxtrace_snapshot_trigger);
2793 auxtrace_record__snapshot_started = 1;
2794 if (auxtrace_record__snapshot_start(record.itr))
2795 trigger_error(&auxtrace_snapshot_trigger);
2798 if (switch_output_signal(rec))
2799 trigger_hit(&switch_output_trigger);
2802 static void alarm_sig_handler(int sig __maybe_unused)
2804 struct record *rec = &record;
2806 if (switch_output_time(rec))
2807 trigger_hit(&switch_output_trigger);