1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
56 #include <linux/time64.h>
58 struct switch_output {
71 struct perf_tool tool;
72 struct record_opts opts;
74 struct perf_data data;
75 struct auxtrace_record *itr;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
81 bool no_buildid_cache;
82 bool no_buildid_cache_set;
84 bool timestamp_filename;
85 bool timestamp_boundary;
86 struct switch_output switch_output;
87 unsigned long long samples;
88 cpu_set_t affinity_mask;
91 static volatile int auxtrace_record__snapshot_started;
92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93 static DEFINE_TRIGGER(switch_output_trigger);
95 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
99 static bool switch_output_signal(struct record *rec)
101 return rec->switch_output.signal &&
102 trigger_is_ready(&switch_output_trigger);
105 static bool switch_output_size(struct record *rec)
107 return rec->switch_output.size &&
108 trigger_is_ready(&switch_output_trigger) &&
109 (rec->bytes_written >= rec->switch_output.size);
112 static bool switch_output_time(struct record *rec)
114 return rec->switch_output.time &&
115 trigger_is_ready(&switch_output_trigger);
118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 void *bf, size_t size)
121 struct perf_data_file *file = &rec->session->data->file;
123 if (perf_data_file__write(file, bf, size) < 0) {
124 pr_err("failed to write perf data, error: %m\n");
128 rec->bytes_written += size;
130 if (switch_output_size(rec))
131 trigger_hit(&switch_output_trigger);
136 #ifdef HAVE_AIO_SUPPORT
137 static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 void *buf, size_t size, off_t off)
142 cblock->aio_fildes = trace_fd;
143 cblock->aio_buf = buf;
144 cblock->aio_nbytes = size;
145 cblock->aio_offset = off;
146 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
149 rc = aio_write(cblock);
152 } else if (errno != EAGAIN) {
153 cblock->aio_fildes = -1;
154 pr_err("failed to queue perf data, error: %m\n");
162 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
168 ssize_t aio_ret, written;
170 aio_errno = aio_error(cblock);
171 if (aio_errno == EINPROGRESS)
174 written = aio_ret = aio_return(cblock);
176 if (aio_errno != EINTR)
177 pr_err("failed to write perf data, error: %m\n");
181 rem_size = cblock->aio_nbytes - written;
184 cblock->aio_fildes = -1;
186 * md->refcount is incremented in perf_mmap__push() for
187 * every enqueued aio write request so decrement it because
188 * the request is now complete.
194 * aio write request may require restart with the
195 * reminder if the kernel didn't write whole
198 rem_off = cblock->aio_offset + written;
199 rem_buf = (void *)(cblock->aio_buf + written);
200 record__aio_write(cblock, cblock->aio_fildes,
201 rem_buf, rem_size, rem_off);
208 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
210 struct aiocb **aiocb = md->aio.aiocb;
211 struct aiocb *cblocks = md->aio.cblocks;
212 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
217 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
225 * Started aio write is not complete yet
226 * so it has to be waited before the
229 aiocb[i] = &cblocks[i];
236 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
237 if (!(errno == EAGAIN || errno == EINTR))
238 pr_err("failed to sync perf data, error: %m\n");
243 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
245 struct record *rec = to;
246 int ret, trace_fd = rec->session->data->file.fd;
250 ret = record__aio_write(cblock, trace_fd, bf, size, off);
252 rec->bytes_written += size;
253 if (switch_output_size(rec))
254 trigger_hit(&switch_output_trigger);
260 static off_t record__aio_get_pos(int trace_fd)
262 return lseek(trace_fd, 0, SEEK_CUR);
265 static void record__aio_set_pos(int trace_fd, off_t pos)
267 lseek(trace_fd, pos, SEEK_SET);
270 static void record__aio_mmap_read_sync(struct record *rec)
273 struct perf_evlist *evlist = rec->evlist;
274 struct perf_mmap *maps = evlist->mmap;
276 if (!rec->opts.nr_cblocks)
279 for (i = 0; i < evlist->nr_mmaps; i++) {
280 struct perf_mmap *map = &maps[i];
283 record__aio_sync(map, true);
287 static int nr_cblocks_default = 1;
288 static int nr_cblocks_max = 4;
290 static int record__aio_parse(const struct option *opt,
294 struct record_opts *opts = (struct record_opts *)opt->value;
297 opts->nr_cblocks = 0;
300 opts->nr_cblocks = strtol(str, NULL, 0);
301 if (!opts->nr_cblocks)
302 opts->nr_cblocks = nr_cblocks_default;
307 #else /* HAVE_AIO_SUPPORT */
308 static int nr_cblocks_max = 0;
310 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
315 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
321 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
326 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
330 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
335 static int record__aio_enabled(struct record *rec)
337 return rec->opts.nr_cblocks > 0;
340 #define MMAP_FLUSH_DEFAULT 1
341 static int record__mmap_flush_parse(const struct option *opt,
346 struct record_opts *opts = (struct record_opts *)opt->value;
347 static struct parse_tag tags[] = {
348 { .tag = 'B', .mult = 1 },
349 { .tag = 'K', .mult = 1 << 10 },
350 { .tag = 'M', .mult = 1 << 20 },
351 { .tag = 'G', .mult = 1 << 30 },
359 opts->mmap_flush = parse_tag_value(str, tags);
360 if (opts->mmap_flush == (int)-1)
361 opts->mmap_flush = strtol(str, NULL, 0);
364 if (!opts->mmap_flush)
365 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
367 flush_max = perf_evlist__mmap_size(opts->mmap_pages);
369 if (opts->mmap_flush > flush_max)
370 opts->mmap_flush = flush_max;
375 static int process_synthesized_event(struct perf_tool *tool,
376 union perf_event *event,
377 struct perf_sample *sample __maybe_unused,
378 struct machine *machine __maybe_unused)
380 struct record *rec = container_of(tool, struct record, tool);
381 return record__write(rec, NULL, event, event->header.size);
384 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
386 struct record *rec = to;
389 return record__write(rec, map, bf, size);
392 static volatile int done;
393 static volatile int signr = -1;
394 static volatile int child_finished;
396 static void sig_handler(int sig)
406 static void sigsegv_handler(int sig)
408 perf_hooks__recover();
409 sighandler_dump_stack(sig);
412 static void record__sig_exit(void)
417 signal(signr, SIG_DFL);
421 #ifdef HAVE_AUXTRACE_SUPPORT
423 static int record__process_auxtrace(struct perf_tool *tool,
424 struct perf_mmap *map,
425 union perf_event *event, void *data1,
426 size_t len1, void *data2, size_t len2)
428 struct record *rec = container_of(tool, struct record, tool);
429 struct perf_data *data = &rec->data;
433 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
435 int fd = perf_data__fd(data);
438 file_offset = lseek(fd, 0, SEEK_CUR);
439 if (file_offset == -1)
441 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
447 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
448 padding = (len1 + len2) & 7;
450 padding = 8 - padding;
452 record__write(rec, map, event, event->header.size);
453 record__write(rec, map, data1, len1);
455 record__write(rec, map, data2, len2);
456 record__write(rec, map, &pad, padding);
461 static int record__auxtrace_mmap_read(struct record *rec,
462 struct perf_mmap *map)
466 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
467 record__process_auxtrace);
477 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
478 struct perf_mmap *map)
482 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
483 record__process_auxtrace,
484 rec->opts.auxtrace_snapshot_size);
494 static int record__auxtrace_read_snapshot_all(struct record *rec)
499 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
500 struct perf_mmap *map = &rec->evlist->mmap[i];
502 if (!map->auxtrace_mmap.base)
505 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
514 static void record__read_auxtrace_snapshot(struct record *rec)
516 pr_debug("Recording AUX area tracing snapshot\n");
517 if (record__auxtrace_read_snapshot_all(rec) < 0) {
518 trigger_error(&auxtrace_snapshot_trigger);
520 if (auxtrace_record__snapshot_finish(rec->itr))
521 trigger_error(&auxtrace_snapshot_trigger);
523 trigger_ready(&auxtrace_snapshot_trigger);
527 static int record__auxtrace_init(struct record *rec)
532 rec->itr = auxtrace_record__init(rec->evlist, &err);
537 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
538 rec->opts.auxtrace_snapshot_opts);
542 return auxtrace_parse_filters(rec->evlist);
548 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
549 struct perf_mmap *map __maybe_unused)
555 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
560 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
565 static int record__auxtrace_init(struct record *rec __maybe_unused)
572 static int record__mmap_evlist(struct record *rec,
573 struct perf_evlist *evlist)
575 struct record_opts *opts = &rec->opts;
578 if (opts->affinity != PERF_AFFINITY_SYS)
579 cpu__setup_cpunode_map();
581 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
582 opts->auxtrace_mmap_pages,
583 opts->auxtrace_snapshot_mode,
584 opts->nr_cblocks, opts->affinity,
585 opts->mmap_flush) < 0) {
586 if (errno == EPERM) {
587 pr_err("Permission error mapping pages.\n"
588 "Consider increasing "
589 "/proc/sys/kernel/perf_event_mlock_kb,\n"
590 "or try again with a smaller value of -m/--mmap_pages.\n"
591 "(current value: %u,%u)\n",
592 opts->mmap_pages, opts->auxtrace_mmap_pages);
595 pr_err("failed to mmap with %d (%s)\n", errno,
596 str_error_r(errno, msg, sizeof(msg)));
606 static int record__mmap(struct record *rec)
608 return record__mmap_evlist(rec, rec->evlist);
611 static int record__open(struct record *rec)
614 struct perf_evsel *pos;
615 struct perf_evlist *evlist = rec->evlist;
616 struct perf_session *session = rec->session;
617 struct record_opts *opts = &rec->opts;
621 * For initial_delay we need to add a dummy event so that we can track
622 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
623 * real events, the ones asked by the user.
625 if (opts->initial_delay) {
626 if (perf_evlist__add_dummy(evlist))
629 pos = perf_evlist__first(evlist);
631 pos = perf_evlist__last(evlist);
633 pos->attr.enable_on_exec = 1;
636 perf_evlist__config(evlist, opts, &callchain_param);
638 evlist__for_each_entry(evlist, pos) {
640 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
641 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
643 ui__warning("%s\n", msg);
646 if ((errno == EINVAL || errno == EBADF) &&
647 pos->leader != pos &&
649 pos = perf_evlist__reset_weak_group(evlist, pos);
653 perf_evsel__open_strerror(pos, &opts->target,
654 errno, msg, sizeof(msg));
655 ui__error("%s\n", msg);
659 pos->supported = true;
662 if (perf_evlist__apply_filters(evlist, &pos)) {
663 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
664 pos->filter, perf_evsel__name(pos), errno,
665 str_error_r(errno, msg, sizeof(msg)));
670 rc = record__mmap(rec);
674 session->evlist = evlist;
675 perf_session__set_id_hdr_size(session);
680 static int process_sample_event(struct perf_tool *tool,
681 union perf_event *event,
682 struct perf_sample *sample,
683 struct perf_evsel *evsel,
684 struct machine *machine)
686 struct record *rec = container_of(tool, struct record, tool);
688 if (rec->evlist->first_sample_time == 0)
689 rec->evlist->first_sample_time = sample->time;
691 rec->evlist->last_sample_time = sample->time;
693 if (rec->buildid_all)
697 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
700 static int process_buildids(struct record *rec)
702 struct perf_session *session = rec->session;
704 if (perf_data__size(&rec->data) == 0)
708 * During this process, it'll load kernel map and replace the
709 * dso->long_name to a real pathname it found. In this case
710 * we prefer the vmlinux path like
711 * /lib/modules/3.16.4/build/vmlinux
713 * rather than build-id path (in debug directory).
714 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
716 symbol_conf.ignore_vmlinux_buildid = true;
719 * If --buildid-all is given, it marks all DSO regardless of hits,
720 * so no need to process samples. But if timestamp_boundary is enabled,
721 * it still needs to walk on all samples to get the timestamps of
722 * first/last samples.
724 if (rec->buildid_all && !rec->timestamp_boundary)
725 rec->tool.sample = NULL;
727 return perf_session__process_events(session);
730 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
733 struct perf_tool *tool = data;
735 *As for guest kernel when processing subcommand record&report,
736 *we arrange module mmap prior to guest kernel mmap and trigger
737 *a preload dso because default guest module symbols are loaded
738 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
739 *method is used to avoid symbol missing when the first addr is
740 *in module instead of in guest kernel.
742 err = perf_event__synthesize_modules(tool, process_synthesized_event,
745 pr_err("Couldn't record guest kernel [%d]'s reference"
746 " relocation symbol.\n", machine->pid);
749 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
750 * have no _text sometimes.
752 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
755 pr_err("Couldn't record guest kernel [%d]'s reference"
756 " relocation symbol.\n", machine->pid);
759 static struct perf_event_header finished_round_event = {
760 .size = sizeof(struct perf_event_header),
761 .type = PERF_RECORD_FINISHED_ROUND,
764 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
766 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
767 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
768 CPU_ZERO(&rec->affinity_mask);
769 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
770 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
774 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
775 bool overwrite, bool synch)
777 u64 bytes_written = rec->bytes_written;
780 struct perf_mmap *maps;
781 int trace_fd = rec->data.file.fd;
787 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
791 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
794 if (record__aio_enabled(rec))
795 off = record__aio_get_pos(trace_fd);
797 for (i = 0; i < evlist->nr_mmaps; i++) {
799 struct perf_mmap *map = &maps[i];
802 record__adjust_affinity(rec, map);
807 if (!record__aio_enabled(rec)) {
808 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
817 * Call record__aio_sync() to wait till map->data buffer
818 * becomes available after previous aio write request.
820 idx = record__aio_sync(map, false);
821 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
822 record__aio_set_pos(trace_fd, off);
833 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
834 record__auxtrace_mmap_read(rec, map) != 0) {
840 if (record__aio_enabled(rec))
841 record__aio_set_pos(trace_fd, off);
844 * Mark the round finished in case we wrote
845 * at least one event.
847 if (bytes_written != rec->bytes_written)
848 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
851 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
856 static int record__mmap_read_all(struct record *rec, bool synch)
860 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
864 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
867 static void record__init_features(struct record *rec)
869 struct perf_session *session = rec->session;
872 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
873 perf_header__set_feat(&session->header, feat);
876 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
878 if (!have_tracepoints(&rec->evlist->entries))
879 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
881 if (!rec->opts.branch_stack)
882 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
884 if (!rec->opts.full_auxtrace)
885 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
887 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
888 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
890 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
892 perf_header__clear_feat(&session->header, HEADER_STAT);
896 record__finish_output(struct record *rec)
898 struct perf_data *data = &rec->data;
899 int fd = perf_data__fd(data);
904 rec->session->header.data_size += rec->bytes_written;
905 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
907 if (!rec->no_buildid) {
908 process_buildids(rec);
910 if (rec->buildid_all)
911 dsos__hit_all(rec->session);
913 perf_session__write_header(rec->session, rec->evlist, fd, true);
918 static int record__synthesize_workload(struct record *rec, bool tail)
921 struct thread_map *thread_map;
923 if (rec->opts.tail_synthesize != tail)
926 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
927 if (thread_map == NULL)
930 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
931 process_synthesized_event,
932 &rec->session->machines.host,
933 rec->opts.sample_address);
934 thread_map__put(thread_map);
938 static int record__synthesize(struct record *rec, bool tail);
941 record__switch_output(struct record *rec, bool at_exit)
943 struct perf_data *data = &rec->data;
947 /* Same Size: "2015122520103046"*/
948 char timestamp[] = "InvalidTimestamp";
950 record__aio_mmap_read_sync(rec);
952 record__synthesize(rec, true);
953 if (target__none(&rec->opts.target))
954 record__synthesize_workload(rec, true);
957 record__finish_output(rec);
958 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
960 pr_err("Failed to get current timestamp\n");
964 fd = perf_data__switch(data, timestamp,
965 rec->session->header.data_offset,
966 at_exit, &new_filename);
967 if (fd >= 0 && !at_exit) {
968 rec->bytes_written = 0;
969 rec->session->header.data_size = 0;
973 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
974 data->path, timestamp);
976 if (rec->switch_output.num_files) {
977 int n = rec->switch_output.cur_file + 1;
979 if (n >= rec->switch_output.num_files)
981 rec->switch_output.cur_file = n;
982 if (rec->switch_output.filenames[n]) {
983 remove(rec->switch_output.filenames[n]);
984 free(rec->switch_output.filenames[n]);
986 rec->switch_output.filenames[n] = new_filename;
991 /* Output tracking events */
993 record__synthesize(rec, false);
996 * In 'perf record --switch-output' without -a,
997 * record__synthesize() in record__switch_output() won't
998 * generate tracking events because there's no thread_map
999 * in evlist. Which causes newly created perf.data doesn't
1000 * contain map and comm information.
1001 * Create a fake thread_map and directly call
1002 * perf_event__synthesize_thread_map() for those events.
1004 if (target__none(&rec->opts.target))
1005 record__synthesize_workload(rec, false);
1010 static volatile int workload_exec_errno;
1013 * perf_evlist__prepare_workload will send a SIGUSR1
1014 * if the fork fails, since we asked by setting its
1015 * want_signal to true.
1017 static void workload_exec_failed_signal(int signo __maybe_unused,
1019 void *ucontext __maybe_unused)
1021 workload_exec_errno = info->si_value.sival_int;
1026 static void snapshot_sig_handler(int sig);
1027 static void alarm_sig_handler(int sig);
1030 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
1031 struct perf_tool *tool __maybe_unused,
1032 perf_event__handler_t process __maybe_unused,
1033 struct machine *machine __maybe_unused)
1038 static const struct perf_event_mmap_page *
1039 perf_evlist__pick_pc(struct perf_evlist *evlist)
1042 if (evlist->mmap && evlist->mmap[0].base)
1043 return evlist->mmap[0].base;
1044 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
1045 return evlist->overwrite_mmap[0].base;
1050 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1052 const struct perf_event_mmap_page *pc;
1054 pc = perf_evlist__pick_pc(rec->evlist);
1060 static int record__synthesize(struct record *rec, bool tail)
1062 struct perf_session *session = rec->session;
1063 struct machine *machine = &session->machines.host;
1064 struct perf_data *data = &rec->data;
1065 struct record_opts *opts = &rec->opts;
1066 struct perf_tool *tool = &rec->tool;
1067 int fd = perf_data__fd(data);
1070 if (rec->opts.tail_synthesize != tail)
1073 if (data->is_pipe) {
1075 * We need to synthesize events first, because some
1076 * features works on top of them (on report side).
1078 err = perf_event__synthesize_attrs(tool, rec->evlist,
1079 process_synthesized_event);
1081 pr_err("Couldn't synthesize attrs.\n");
1085 err = perf_event__synthesize_features(tool, session, rec->evlist,
1086 process_synthesized_event);
1088 pr_err("Couldn't synthesize features.\n");
1092 if (have_tracepoints(&rec->evlist->entries)) {
1094 * FIXME err <= 0 here actually means that
1095 * there were no tracepoints so its not really
1096 * an error, just that we don't need to
1097 * synthesize anything. We really have to
1098 * return this more properly and also
1099 * propagate errors that now are calling die()
1101 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1102 process_synthesized_event);
1104 pr_err("Couldn't record tracing data.\n");
1107 rec->bytes_written += err;
1111 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1112 process_synthesized_event, machine);
1116 if (rec->opts.full_auxtrace) {
1117 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1118 session, process_synthesized_event);
1123 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1124 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1126 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1127 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1128 "Check /proc/kallsyms permission or run as root.\n");
1130 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1132 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1133 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1134 "Check /proc/modules permission or run as root.\n");
1138 machines__process_guests(&session->machines,
1139 perf_event__synthesize_guest_os, tool);
1142 err = perf_event__synthesize_extra_attr(&rec->tool,
1144 process_synthesized_event,
1149 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1150 process_synthesized_event,
1153 pr_err("Couldn't synthesize thread map.\n");
1157 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1158 process_synthesized_event, NULL);
1160 pr_err("Couldn't synthesize cpu map.\n");
1164 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1167 pr_warning("Couldn't synthesize bpf events.\n");
1169 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1170 process_synthesized_event, opts->sample_address,
1176 static int __cmd_record(struct record *rec, int argc, const char **argv)
1180 unsigned long waking = 0;
1181 const bool forks = argc > 0;
1182 struct perf_tool *tool = &rec->tool;
1183 struct record_opts *opts = &rec->opts;
1184 struct perf_data *data = &rec->data;
1185 struct perf_session *session;
1186 bool disabled = false, draining = false;
1187 struct perf_evlist *sb_evlist = NULL;
1190 atexit(record__sig_exit);
1191 signal(SIGCHLD, sig_handler);
1192 signal(SIGINT, sig_handler);
1193 signal(SIGTERM, sig_handler);
1194 signal(SIGSEGV, sigsegv_handler);
1196 if (rec->opts.record_namespaces)
1197 tool->namespace_events = true;
1199 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1200 signal(SIGUSR2, snapshot_sig_handler);
1201 if (rec->opts.auxtrace_snapshot_mode)
1202 trigger_on(&auxtrace_snapshot_trigger);
1203 if (rec->switch_output.enabled)
1204 trigger_on(&switch_output_trigger);
1206 signal(SIGUSR2, SIG_IGN);
1209 session = perf_session__new(data, false, tool);
1210 if (session == NULL) {
1211 pr_err("Perf session creation failed.\n");
1215 fd = perf_data__fd(data);
1216 rec->session = session;
1218 record__init_features(rec);
1220 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1221 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1224 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1225 argv, data->is_pipe,
1226 workload_exec_failed_signal);
1228 pr_err("Couldn't run the workload!\n");
1230 goto out_delete_session;
1235 * If we have just single event and are sending data
1236 * through pipe, we need to force the ids allocation,
1237 * because we synthesize event name through the pipe
1238 * and need the id for that.
1240 if (data->is_pipe && rec->evlist->nr_entries == 1)
1241 rec->opts.sample_id = true;
1243 if (record__open(rec) != 0) {
1248 err = bpf__apply_obj_config();
1250 char errbuf[BUFSIZ];
1252 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1253 pr_err("ERROR: Apply config to BPF failed: %s\n",
1259 * Normally perf_session__new would do this, but it doesn't have the
1262 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1263 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1264 rec->tool.ordered_events = false;
1267 if (!rec->evlist->nr_groups)
1268 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1270 if (data->is_pipe) {
1271 err = perf_header__write_pipe(fd);
1275 err = perf_session__write_header(session, rec->evlist, fd, false);
1280 if (!rec->no_buildid
1281 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1282 pr_err("Couldn't generate buildids. "
1283 "Use --no-buildid to profile anyway.\n");
1288 if (!opts->no_bpf_event)
1289 bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1291 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1292 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1293 opts->no_bpf_event = true;
1296 err = record__synthesize(rec, false);
1300 if (rec->realtime_prio) {
1301 struct sched_param param;
1303 param.sched_priority = rec->realtime_prio;
1304 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
1305 pr_err("Could not set realtime priority.\n");
1312 * When perf is starting the traced process, all the events
1313 * (apart from group members) have enable_on_exec=1 set,
1314 * so don't spoil it by prematurely enabling them.
1316 if (!target__none(&opts->target) && !opts->initial_delay)
1317 perf_evlist__enable(rec->evlist);
1323 struct machine *machine = &session->machines.host;
1324 union perf_event *event;
1327 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1328 if (event == NULL) {
1334 * Some H/W events are generated before COMM event
1335 * which is emitted during exec(), so perf script
1336 * cannot see a correct process name for those events.
1337 * Synthesize COMM event to prevent it.
1339 tgid = perf_event__synthesize_comm(tool, event,
1340 rec->evlist->workload.pid,
1341 process_synthesized_event,
1348 event = malloc(sizeof(event->namespaces) +
1349 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1350 machine->id_hdr_size);
1351 if (event == NULL) {
1357 * Synthesize NAMESPACES event for the command specified.
1359 perf_event__synthesize_namespaces(tool, event,
1360 rec->evlist->workload.pid,
1361 tgid, process_synthesized_event,
1365 perf_evlist__start_workload(rec->evlist);
1368 if (opts->initial_delay) {
1369 usleep(opts->initial_delay * USEC_PER_MSEC);
1370 perf_evlist__enable(rec->evlist);
1373 trigger_ready(&auxtrace_snapshot_trigger);
1374 trigger_ready(&switch_output_trigger);
1375 perf_hooks__invoke_record_start();
1377 unsigned long long hits = rec->samples;
1380 * rec->evlist->bkw_mmap_state is possible to be
1381 * BKW_MMAP_EMPTY here: when done == true and
1382 * hits != rec->samples in previous round.
1384 * perf_evlist__toggle_bkw_mmap ensure we never
1385 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1387 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1388 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1390 if (record__mmap_read_all(rec, false) < 0) {
1391 trigger_error(&auxtrace_snapshot_trigger);
1392 trigger_error(&switch_output_trigger);
1397 if (auxtrace_record__snapshot_started) {
1398 auxtrace_record__snapshot_started = 0;
1399 if (!trigger_is_error(&auxtrace_snapshot_trigger))
1400 record__read_auxtrace_snapshot(rec);
1401 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1402 pr_err("AUX area tracing snapshot failed\n");
1408 if (trigger_is_hit(&switch_output_trigger)) {
1410 * If switch_output_trigger is hit, the data in
1411 * overwritable ring buffer should have been collected,
1412 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1414 * If SIGUSR2 raise after or during record__mmap_read_all(),
1415 * record__mmap_read_all() didn't collect data from
1416 * overwritable ring buffer. Read again.
1418 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1420 trigger_ready(&switch_output_trigger);
1423 * Reenable events in overwrite ring buffer after
1424 * record__mmap_read_all(): we should have collected
1427 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1430 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1433 fd = record__switch_output(rec, false);
1435 pr_err("Failed to switch to new file\n");
1436 trigger_error(&switch_output_trigger);
1441 /* re-arm the alarm */
1442 if (rec->switch_output.time)
1443 alarm(rec->switch_output.time);
1446 if (hits == rec->samples) {
1447 if (done || draining)
1449 err = perf_evlist__poll(rec->evlist, -1);
1451 * Propagate error, only if there's any. Ignore positive
1452 * number of returned events and interrupt error.
1454 if (err > 0 || (err < 0 && errno == EINTR))
1458 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1463 * When perf is starting the traced process, at the end events
1464 * die with the process and we wait for that. Thus no need to
1465 * disable events in this case.
1467 if (done && !disabled && !target__none(&opts->target)) {
1468 trigger_off(&auxtrace_snapshot_trigger);
1469 perf_evlist__disable(rec->evlist);
1473 trigger_off(&auxtrace_snapshot_trigger);
1474 trigger_off(&switch_output_trigger);
1476 if (forks && workload_exec_errno) {
1477 char msg[STRERR_BUFSIZE];
1478 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1479 pr_err("Workload failed: %s\n", emsg);
1485 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1487 if (target__none(&rec->opts.target))
1488 record__synthesize_workload(rec, true);
1491 record__mmap_read_all(rec, true);
1492 record__aio_mmap_read_sync(rec);
1497 if (!child_finished)
1498 kill(rec->evlist->workload.pid, SIGTERM);
1504 else if (WIFEXITED(exit_status))
1505 status = WEXITSTATUS(exit_status);
1506 else if (WIFSIGNALED(exit_status))
1507 signr = WTERMSIG(exit_status);
1511 record__synthesize(rec, true);
1512 /* this will be recalculated during process_buildids() */
1516 if (!rec->timestamp_filename) {
1517 record__finish_output(rec);
1519 fd = record__switch_output(rec, true);
1522 goto out_delete_session;
1527 perf_hooks__invoke_record_end();
1529 if (!err && !quiet) {
1531 const char *postfix = rec->timestamp_filename ?
1532 ".<timestamp>" : "";
1534 if (rec->samples && !rec->opts.full_auxtrace)
1535 scnprintf(samples, sizeof(samples),
1536 " (%" PRIu64 " samples)", rec->samples);
1540 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1541 perf_data__size(data) / 1024.0 / 1024.0,
1542 data->path, postfix, samples);
1546 perf_session__delete(session);
1548 if (!opts->no_bpf_event)
1549 perf_evlist__stop_sb_thread(sb_evlist);
1553 static void callchain_debug(struct callchain_param *callchain)
1555 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1557 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1559 if (callchain->record_mode == CALLCHAIN_DWARF)
1560 pr_debug("callchain: stack dump size %d\n",
1561 callchain->dump_size);
1564 int record_opts__parse_callchain(struct record_opts *record,
1565 struct callchain_param *callchain,
1566 const char *arg, bool unset)
1569 callchain->enabled = !unset;
1571 /* --no-call-graph */
1573 callchain->record_mode = CALLCHAIN_NONE;
1574 pr_debug("callchain: disabled\n");
1578 ret = parse_callchain_record_opt(arg, callchain);
1580 /* Enable data address sampling for DWARF unwind. */
1581 if (callchain->record_mode == CALLCHAIN_DWARF)
1582 record->sample_address = true;
1583 callchain_debug(callchain);
1589 int record_parse_callchain_opt(const struct option *opt,
1593 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1596 int record_callchain_opt(const struct option *opt,
1597 const char *arg __maybe_unused,
1598 int unset __maybe_unused)
1600 struct callchain_param *callchain = opt->value;
1602 callchain->enabled = true;
1604 if (callchain->record_mode == CALLCHAIN_NONE)
1605 callchain->record_mode = CALLCHAIN_FP;
1607 callchain_debug(callchain);
1611 static int perf_record_config(const char *var, const char *value, void *cb)
1613 struct record *rec = cb;
1615 if (!strcmp(var, "record.build-id")) {
1616 if (!strcmp(value, "cache"))
1617 rec->no_buildid_cache = false;
1618 else if (!strcmp(value, "no-cache"))
1619 rec->no_buildid_cache = true;
1620 else if (!strcmp(value, "skip"))
1621 rec->no_buildid = true;
1626 if (!strcmp(var, "record.call-graph")) {
1627 var = "call-graph.record-mode";
1628 return perf_default_config(var, value, cb);
1630 #ifdef HAVE_AIO_SUPPORT
1631 if (!strcmp(var, "record.aio")) {
1632 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1633 if (!rec->opts.nr_cblocks)
1634 rec->opts.nr_cblocks = nr_cblocks_default;
1641 struct clockid_map {
1646 #define CLOCKID_MAP(n, c) \
1647 { .name = n, .clockid = (c), }
1649 #define CLOCKID_END { .name = NULL, }
1653 * Add the missing ones, we need to build on many distros...
1655 #ifndef CLOCK_MONOTONIC_RAW
1656 #define CLOCK_MONOTONIC_RAW 4
1658 #ifndef CLOCK_BOOTTIME
1659 #define CLOCK_BOOTTIME 7
1662 #define CLOCK_TAI 11
1665 static const struct clockid_map clockids[] = {
1666 /* available for all events, NMI safe */
1667 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1668 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1670 /* available for some events */
1671 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1672 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1673 CLOCKID_MAP("tai", CLOCK_TAI),
1675 /* available for the lazy */
1676 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1677 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1678 CLOCKID_MAP("real", CLOCK_REALTIME),
1679 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1684 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1686 struct timespec res;
1689 if (!clock_getres(clk_id, &res))
1690 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1692 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1697 static int parse_clockid(const struct option *opt, const char *str, int unset)
1699 struct record_opts *opts = (struct record_opts *)opt->value;
1700 const struct clockid_map *cm;
1701 const char *ostr = str;
1704 opts->use_clockid = 0;
1712 /* no setting it twice */
1713 if (opts->use_clockid)
1716 opts->use_clockid = true;
1718 /* if its a number, we're done */
1719 if (sscanf(str, "%d", &opts->clockid) == 1)
1720 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1722 /* allow a "CLOCK_" prefix to the name */
1723 if (!strncasecmp(str, "CLOCK_", 6))
1726 for (cm = clockids; cm->name; cm++) {
1727 if (!strcasecmp(str, cm->name)) {
1728 opts->clockid = cm->clockid;
1729 return get_clockid_res(opts->clockid,
1730 &opts->clockid_res_ns);
1734 opts->use_clockid = false;
1735 ui__warning("unknown clockid %s, check man page\n", ostr);
1739 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1741 struct record_opts *opts = (struct record_opts *)opt->value;
1746 if (!strcasecmp(str, "node"))
1747 opts->affinity = PERF_AFFINITY_NODE;
1748 else if (!strcasecmp(str, "cpu"))
1749 opts->affinity = PERF_AFFINITY_CPU;
1754 static int record__parse_mmap_pages(const struct option *opt,
1756 int unset __maybe_unused)
1758 struct record_opts *opts = opt->value;
1760 unsigned int mmap_pages;
1775 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1778 opts->mmap_pages = mmap_pages;
1786 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1790 opts->auxtrace_mmap_pages = mmap_pages;
1797 static void switch_output_size_warn(struct record *rec)
1799 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1800 struct switch_output *s = &rec->switch_output;
1804 if (s->size < wakeup_size) {
1807 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1808 pr_warning("WARNING: switch-output data size lower than "
1809 "wakeup kernel buffer size (%s) "
1810 "expect bigger perf.data sizes\n", buf);
1814 static int switch_output_setup(struct record *rec)
1816 struct switch_output *s = &rec->switch_output;
1817 static struct parse_tag tags_size[] = {
1818 { .tag = 'B', .mult = 1 },
1819 { .tag = 'K', .mult = 1 << 10 },
1820 { .tag = 'M', .mult = 1 << 20 },
1821 { .tag = 'G', .mult = 1 << 30 },
1824 static struct parse_tag tags_time[] = {
1825 { .tag = 's', .mult = 1 },
1826 { .tag = 'm', .mult = 60 },
1827 { .tag = 'h', .mult = 60*60 },
1828 { .tag = 'd', .mult = 60*60*24 },
1836 if (!strcmp(s->str, "signal")) {
1838 pr_debug("switch-output with SIGUSR2 signal\n");
1842 val = parse_tag_value(s->str, tags_size);
1843 if (val != (unsigned long) -1) {
1845 pr_debug("switch-output with %s size threshold\n", s->str);
1849 val = parse_tag_value(s->str, tags_time);
1850 if (val != (unsigned long) -1) {
1852 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1860 rec->timestamp_filename = true;
1863 if (s->size && !rec->opts.no_buffering)
1864 switch_output_size_warn(rec);
1869 static const char * const __record_usage[] = {
1870 "perf record [<options>] [<command>]",
1871 "perf record [<options>] -- <command> [<options>]",
1874 const char * const *record_usage = __record_usage;
1877 * XXX Ideally would be local to cmd_record() and passed to a record__new
1878 * because we need to have access to it in record__exit, that is called
1879 * after cmd_record() exits, but since record_options need to be accessible to
1880 * builtin-script, leave it here.
1882 * At least we don't ouch it in all the other functions here directly.
1884 * Just say no to tons of global variables, sigh.
1886 static struct record record = {
1888 .sample_time = true,
1889 .mmap_pages = UINT_MAX,
1890 .user_freq = UINT_MAX,
1891 .user_interval = ULLONG_MAX,
1895 .default_per_cpu = true,
1897 .mmap_flush = MMAP_FLUSH_DEFAULT,
1900 .sample = process_sample_event,
1901 .fork = perf_event__process_fork,
1902 .exit = perf_event__process_exit,
1903 .comm = perf_event__process_comm,
1904 .namespaces = perf_event__process_namespaces,
1905 .mmap = perf_event__process_mmap,
1906 .mmap2 = perf_event__process_mmap2,
1907 .ordered_events = true,
1911 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1912 "\n\t\t\t\tDefault: fp";
1914 static bool dry_run;
1917 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1918 * with it and switch to use the library functions in perf_evlist that came
1919 * from builtin-record.c, i.e. use record_opts,
1920 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1923 static struct option __record_options[] = {
1924 OPT_CALLBACK('e', "event", &record.evlist, "event",
1925 "event selector. use 'perf list' to list available events",
1926 parse_events_option),
1927 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1928 "event filter", parse_filter),
1929 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1930 NULL, "don't record events from perf itself",
1932 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1933 "record events on existing process id"),
1934 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1935 "record events on existing thread id"),
1936 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1937 "collect data with this RT SCHED_FIFO priority"),
1938 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1939 "collect data without buffering"),
1940 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1941 "collect raw sample records from all opened counters"),
1942 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1943 "system-wide collection from all CPUs"),
1944 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1945 "list of cpus to monitor"),
1946 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1947 OPT_STRING('o', "output", &record.data.path, "file",
1948 "output file name"),
1949 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1950 &record.opts.no_inherit_set,
1951 "child tasks do not inherit counters"),
1952 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1953 "synthesize non-sample events at the end of output"),
1954 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1955 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
1956 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1957 "Fail if the specified frequency can't be used"),
1958 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1959 "profile at this frequency",
1960 record__parse_freq),
1961 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1962 "number of mmap data pages and AUX area tracing mmap pages",
1963 record__parse_mmap_pages),
1964 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
1965 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
1966 record__mmap_flush_parse),
1967 OPT_BOOLEAN(0, "group", &record.opts.group,
1968 "put the counters into a counter group"),
1969 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1970 NULL, "enables call-graph recording" ,
1971 &record_callchain_opt),
1972 OPT_CALLBACK(0, "call-graph", &record.opts,
1973 "record_mode[,record_size]", record_callchain_help,
1974 &record_parse_callchain_opt),
1975 OPT_INCR('v', "verbose", &verbose,
1976 "be more verbose (show counter open errors, etc)"),
1977 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1978 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1979 "per thread counts"),
1980 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1981 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1982 "Record the sample physical addresses"),
1983 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1984 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1985 &record.opts.sample_time_set,
1986 "Record the sample timestamps"),
1987 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1988 "Record the sample period"),
1989 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1991 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1992 &record.no_buildid_cache_set,
1993 "do not update the buildid cache"),
1994 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1995 &record.no_buildid_set,
1996 "do not collect buildids in perf.data"),
1997 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1998 "monitor event in cgroup name only",
2000 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2001 "ms to wait before starting measurement after program start"),
2002 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2005 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2006 "branch any", "sample any taken branches",
2007 parse_branch_stack),
2009 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2010 "branch filter mask", "branch stack filter modes",
2011 parse_branch_stack),
2012 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2013 "sample by weight (on special events only)"),
2014 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2015 "sample transaction flags (special events only)"),
2016 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2017 "use per-thread mmaps"),
2018 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2019 "sample selected machine registers on interrupt,"
2020 " use -I ? to list register names", parse_regs),
2021 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2022 "sample selected machine registers on interrupt,"
2023 " use -I ? to list register names", parse_regs),
2024 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2025 "Record running/enabled time of read (:S) events"),
2026 OPT_CALLBACK('k', "clockid", &record.opts,
2027 "clockid", "clockid to use for events, see clock_gettime()",
2029 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2030 "opts", "AUX area tracing Snapshot Mode", ""),
2031 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2032 "per thread proc mmap processing timeout in ms"),
2033 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2034 "Record namespaces events"),
2035 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2036 "Record context switch events"),
2037 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2038 "Configure all used events to run in kernel space.",
2039 PARSE_OPT_EXCLUSIVE),
2040 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2041 "Configure all used events to run in user space.",
2042 PARSE_OPT_EXCLUSIVE),
2043 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2044 "clang binary to use for compiling BPF scriptlets"),
2045 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2046 "options passed to clang when compiling BPF scriptlets"),
2047 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2048 "file", "vmlinux pathname"),
2049 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2050 "Record build-id of all DSOs regardless of hits"),
2051 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2052 "append timestamp to output filename"),
2053 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2054 "Record timestamp boundary (time of first/last samples)"),
2055 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2056 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2057 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2059 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2060 "Limit number of switch output generated files"),
2061 OPT_BOOLEAN(0, "dry-run", &dry_run,
2062 "Parse options then exit"),
2063 #ifdef HAVE_AIO_SUPPORT
2064 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2065 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2068 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2069 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2070 record__parse_affinity),
2074 struct option *record_options = __record_options;
2076 int cmd_record(int argc, const char **argv)
2079 struct record *rec = &record;
2080 char errbuf[BUFSIZ];
2082 setlocale(LC_ALL, "");
2084 #ifndef HAVE_LIBBPF_SUPPORT
2085 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2086 set_nobuild('\0', "clang-path", true);
2087 set_nobuild('\0', "clang-opt", true);
2091 #ifndef HAVE_BPF_PROLOGUE
2092 # if !defined (HAVE_DWARF_SUPPORT)
2093 # define REASON "NO_DWARF=1"
2094 # elif !defined (HAVE_LIBBPF_SUPPORT)
2095 # define REASON "NO_LIBBPF=1"
2097 # define REASON "this architecture doesn't support BPF prologue"
2099 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2100 set_nobuild('\0', "vmlinux", true);
2105 CPU_ZERO(&rec->affinity_mask);
2106 rec->opts.affinity = PERF_AFFINITY_SYS;
2108 rec->evlist = perf_evlist__new();
2109 if (rec->evlist == NULL)
2112 err = perf_config(perf_record_config, rec);
2116 argc = parse_options(argc, argv, record_options, record_usage,
2117 PARSE_OPT_STOP_AT_NON_OPTION);
2119 perf_quiet_option();
2121 /* Make system wide (-a) the default target. */
2122 if (!argc && target__none(&rec->opts.target))
2123 rec->opts.target.system_wide = true;
2125 if (nr_cgroups && !rec->opts.target.system_wide) {
2126 usage_with_options_msg(record_usage, record_options,
2127 "cgroup monitoring only available in system-wide mode");
2130 if (rec->opts.record_switch_events &&
2131 !perf_can_record_switch_events()) {
2132 ui__error("kernel does not support recording context switch events\n");
2133 parse_options_usage(record_usage, record_options, "switch-events", 0);
2137 if (switch_output_setup(rec)) {
2138 parse_options_usage(record_usage, record_options, "switch-output", 0);
2142 if (rec->switch_output.time) {
2143 signal(SIGALRM, alarm_sig_handler);
2144 alarm(rec->switch_output.time);
2147 if (rec->switch_output.num_files) {
2148 rec->switch_output.filenames = calloc(sizeof(char *),
2149 rec->switch_output.num_files);
2150 if (!rec->switch_output.filenames)
2155 * Allow aliases to facilitate the lookup of symbols for address
2156 * filters. Refer to auxtrace_parse_filters().
2158 symbol_conf.allow_aliases = true;
2162 err = record__auxtrace_init(rec);
2169 err = bpf__setup_stdout(rec->evlist);
2171 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2172 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2179 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2181 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2182 "check /proc/sys/kernel/kptr_restrict.\n\n"
2183 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2184 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2185 "Samples in kernel modules won't be resolved at all.\n\n"
2186 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2187 "even with a suitable vmlinux or kallsyms file.\n\n");
2189 if (rec->no_buildid_cache || rec->no_buildid) {
2190 disable_buildid_cache();
2191 } else if (rec->switch_output.enabled) {
2193 * In 'perf record --switch-output', disable buildid
2194 * generation by default to reduce data file switching
2195 * overhead. Still generate buildid if they are required
2198 * perf record --switch-output --no-no-buildid \
2199 * --no-no-buildid-cache
2201 * Following code equals to:
2203 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2204 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2205 * disable_buildid_cache();
2207 bool disable = true;
2209 if (rec->no_buildid_set && !rec->no_buildid)
2211 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2214 rec->no_buildid = true;
2215 rec->no_buildid_cache = true;
2216 disable_buildid_cache();
2220 if (record.opts.overwrite)
2221 record.opts.tail_synthesize = true;
2223 if (rec->evlist->nr_entries == 0 &&
2224 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2225 pr_err("Not enough memory for event selector list\n");
2229 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2230 rec->opts.no_inherit = true;
2232 err = target__validate(&rec->opts.target);
2234 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2235 ui__warning("%s\n", errbuf);
2238 err = target__parse_uid(&rec->opts.target);
2240 int saved_errno = errno;
2242 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2243 ui__error("%s", errbuf);
2249 /* Enable ignoring missing threads when -u/-p option is defined. */
2250 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2253 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2254 usage_with_options(record_usage, record_options);
2256 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2261 * We take all buildids when the file contains
2262 * AUX area tracing data because we do not decode the
2263 * trace because it would take too long.
2265 if (rec->opts.full_auxtrace)
2266 rec->buildid_all = true;
2268 if (record_opts__config(&rec->opts)) {
2273 if (rec->opts.nr_cblocks > nr_cblocks_max)
2274 rec->opts.nr_cblocks = nr_cblocks_max;
2276 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2278 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2279 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2281 err = __cmd_record(&record, argc, argv);
2283 perf_evlist__delete(rec->evlist);
2285 auxtrace_record__free(rec->itr);
2289 static void snapshot_sig_handler(int sig __maybe_unused)
2291 struct record *rec = &record;
2293 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2294 trigger_hit(&auxtrace_snapshot_trigger);
2295 auxtrace_record__snapshot_started = 1;
2296 if (auxtrace_record__snapshot_start(record.itr))
2297 trigger_error(&auxtrace_snapshot_trigger);
2300 if (switch_output_signal(rec))
2301 trigger_hit(&switch_output_trigger);
2304 static void alarm_sig_handler(int sig __maybe_unused)
2306 struct record *rec = &record;
2308 if (switch_output_time(rec))
2309 trigger_hit(&switch_output_trigger);