1 // SPDX-License-Identifier: GPL-2.0
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
67 #ifdef HAVE_EVENTFD_SUPPORT
68 #include <sys/eventfd.h>
72 #include <sys/types.h>
75 #include <linux/err.h>
76 #include <linux/string.h>
77 #include <linux/time64.h>
78 #include <linux/zalloc.h>
79 #include <linux/bitmap.h>
82 struct switch_output {
95 struct mmap_cpu_mask maps;
96 struct mmap_cpu_mask affinity;
99 struct record_thread {
101 struct thread_mask *mask;
106 struct fdarray pollfd;
110 struct mmap **overwrite_maps;
112 unsigned long long samples;
113 unsigned long waking;
115 u64 bytes_transferred;
116 u64 bytes_compressed;
119 static __thread struct record_thread *thread;
122 THREAD_MSG__UNDEFINED = 0,
127 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 THREAD_SPEC__UNDEFINED = 0,
135 THREAD_SPEC__PACKAGE,
141 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
142 "undefined", "cpu", "core", "package", "numa", "user"
146 struct perf_tool tool;
147 struct record_opts opts;
149 struct perf_data data;
150 struct auxtrace_record *itr;
151 struct evlist *evlist;
152 struct perf_session *session;
153 struct evlist *sb_evlist;
156 bool switch_output_event_set;
159 bool no_buildid_cache;
160 bool no_buildid_cache_set;
163 bool timestamp_filename;
164 bool timestamp_boundary;
165 struct switch_output switch_output;
166 unsigned long long samples;
167 unsigned long output_max_size; /* = 0: unlimited */
168 struct perf_debuginfod debuginfod;
170 struct thread_mask *thread_masks;
171 struct record_thread *thread_data;
174 static volatile int done;
176 static volatile int auxtrace_record__snapshot_started;
177 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
178 static DEFINE_TRIGGER(switch_output_trigger);
180 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
185 static inline pid_t gettid(void)
187 return (pid_t)syscall(__NR_gettid);
191 static int record__threads_enabled(struct record *rec)
193 return rec->opts.threads_spec;
196 static bool switch_output_signal(struct record *rec)
198 return rec->switch_output.signal &&
199 trigger_is_ready(&switch_output_trigger);
202 static bool switch_output_size(struct record *rec)
204 return rec->switch_output.size &&
205 trigger_is_ready(&switch_output_trigger) &&
206 (rec->bytes_written >= rec->switch_output.size);
209 static bool switch_output_time(struct record *rec)
211 return rec->switch_output.time &&
212 trigger_is_ready(&switch_output_trigger);
215 static u64 record__bytes_written(struct record *rec)
218 u64 bytes_written = rec->bytes_written;
219 struct record_thread *thread_data = rec->thread_data;
221 for (t = 0; t < rec->nr_threads; t++)
222 bytes_written += thread_data[t].bytes_written;
224 return bytes_written;
227 static bool record__output_max_size_exceeded(struct record *rec)
229 return rec->output_max_size &&
230 (record__bytes_written(rec) >= rec->output_max_size);
233 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
234 void *bf, size_t size)
236 struct perf_data_file *file = &rec->session->data->file;
238 if (map && map->file)
241 if (perf_data_file__write(file, bf, size) < 0) {
242 pr_err("failed to write perf data, error: %m\n");
246 if (map && map->file)
247 thread->bytes_written += size;
249 rec->bytes_written += size;
251 if (record__output_max_size_exceeded(rec) && !done) {
252 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
253 " stopping session ]\n",
254 record__bytes_written(rec) >> 10);
258 if (switch_output_size(rec))
259 trigger_hit(&switch_output_trigger);
264 static int record__aio_enabled(struct record *rec);
265 static int record__comp_enabled(struct record *rec);
266 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
267 void *dst, size_t dst_size, void *src, size_t src_size);
269 #ifdef HAVE_AIO_SUPPORT
270 static int record__aio_write(struct aiocb *cblock, int trace_fd,
271 void *buf, size_t size, off_t off)
275 cblock->aio_fildes = trace_fd;
276 cblock->aio_buf = buf;
277 cblock->aio_nbytes = size;
278 cblock->aio_offset = off;
279 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
282 rc = aio_write(cblock);
285 } else if (errno != EAGAIN) {
286 cblock->aio_fildes = -1;
287 pr_err("failed to queue perf data, error: %m\n");
295 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
301 ssize_t aio_ret, written;
303 aio_errno = aio_error(cblock);
304 if (aio_errno == EINPROGRESS)
307 written = aio_ret = aio_return(cblock);
309 if (aio_errno != EINTR)
310 pr_err("failed to write perf data, error: %m\n");
314 rem_size = cblock->aio_nbytes - written;
317 cblock->aio_fildes = -1;
319 * md->refcount is incremented in record__aio_pushfn() for
320 * every aio write request started in record__aio_push() so
321 * decrement it because the request is now complete.
323 perf_mmap__put(&md->core);
327 * aio write request may require restart with the
328 * reminder if the kernel didn't write whole
331 rem_off = cblock->aio_offset + written;
332 rem_buf = (void *)(cblock->aio_buf + written);
333 record__aio_write(cblock, cblock->aio_fildes,
334 rem_buf, rem_size, rem_off);
341 static int record__aio_sync(struct mmap *md, bool sync_all)
343 struct aiocb **aiocb = md->aio.aiocb;
344 struct aiocb *cblocks = md->aio.cblocks;
345 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
350 for (i = 0; i < md->aio.nr_cblocks; ++i) {
351 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
358 * Started aio write is not complete yet
359 * so it has to be waited before the
362 aiocb[i] = &cblocks[i];
369 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
370 if (!(errno == EAGAIN || errno == EINTR))
371 pr_err("failed to sync perf data, error: %m\n");
382 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
384 struct record_aio *aio = to;
387 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
388 * to release space in the kernel buffer as fast as possible, calling
389 * perf_mmap__consume() from perf_mmap__push() function.
391 * That lets the kernel to proceed with storing more profiling data into
392 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
394 * Coping can be done in two steps in case the chunk of profiling data
395 * crosses the upper bound of the kernel buffer. In this case we first move
396 * part of data from map->start till the upper bound and then the reminder
397 * from the beginning of the kernel buffer till the end of the data chunk.
400 if (record__comp_enabled(aio->rec)) {
401 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
402 mmap__mmap_len(map) - aio->size,
405 memcpy(aio->data + aio->size, buf, size);
410 * Increment map->refcount to guard map->aio.data[] buffer
411 * from premature deallocation because map object can be
412 * released earlier than aio write request started on
413 * map->aio.data[] buffer is complete.
415 * perf_mmap__put() is done at record__aio_complete()
416 * after started aio request completion or at record__aio_push()
417 * if the request failed to start.
419 perf_mmap__get(&map->core);
427 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
430 int trace_fd = rec->session->data->file.fd;
431 struct record_aio aio = { .rec = rec, .size = 0 };
434 * Call record__aio_sync() to wait till map->aio.data[] buffer
435 * becomes available after previous aio write operation.
438 idx = record__aio_sync(map, false);
439 aio.data = map->aio.data[idx];
440 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
441 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
445 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
448 rec->bytes_written += aio.size;
449 if (switch_output_size(rec))
450 trigger_hit(&switch_output_trigger);
453 * Decrement map->refcount incremented in record__aio_pushfn()
454 * back if record__aio_write() operation failed to start, otherwise
455 * map->refcount is decremented in record__aio_complete() after
456 * aio write operation finishes successfully.
458 perf_mmap__put(&map->core);
464 static off_t record__aio_get_pos(int trace_fd)
466 return lseek(trace_fd, 0, SEEK_CUR);
469 static void record__aio_set_pos(int trace_fd, off_t pos)
471 lseek(trace_fd, pos, SEEK_SET);
474 static void record__aio_mmap_read_sync(struct record *rec)
477 struct evlist *evlist = rec->evlist;
478 struct mmap *maps = evlist->mmap;
480 if (!record__aio_enabled(rec))
483 for (i = 0; i < evlist->core.nr_mmaps; i++) {
484 struct mmap *map = &maps[i];
487 record__aio_sync(map, true);
491 static int nr_cblocks_default = 1;
492 static int nr_cblocks_max = 4;
494 static int record__aio_parse(const struct option *opt,
498 struct record_opts *opts = (struct record_opts *)opt->value;
501 opts->nr_cblocks = 0;
504 opts->nr_cblocks = strtol(str, NULL, 0);
505 if (!opts->nr_cblocks)
506 opts->nr_cblocks = nr_cblocks_default;
511 #else /* HAVE_AIO_SUPPORT */
512 static int nr_cblocks_max = 0;
514 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
515 off_t *off __maybe_unused)
520 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
525 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
529 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
534 static int record__aio_enabled(struct record *rec)
536 return rec->opts.nr_cblocks > 0;
539 #define MMAP_FLUSH_DEFAULT 1
540 static int record__mmap_flush_parse(const struct option *opt,
545 struct record_opts *opts = (struct record_opts *)opt->value;
546 static struct parse_tag tags[] = {
547 { .tag = 'B', .mult = 1 },
548 { .tag = 'K', .mult = 1 << 10 },
549 { .tag = 'M', .mult = 1 << 20 },
550 { .tag = 'G', .mult = 1 << 30 },
558 opts->mmap_flush = parse_tag_value(str, tags);
559 if (opts->mmap_flush == (int)-1)
560 opts->mmap_flush = strtol(str, NULL, 0);
563 if (!opts->mmap_flush)
564 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
566 flush_max = evlist__mmap_size(opts->mmap_pages);
568 if (opts->mmap_flush > flush_max)
569 opts->mmap_flush = flush_max;
574 #ifdef HAVE_ZSTD_SUPPORT
575 static unsigned int comp_level_default = 1;
577 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
579 struct record_opts *opts = opt->value;
582 opts->comp_level = 0;
585 opts->comp_level = strtol(str, NULL, 0);
586 if (!opts->comp_level)
587 opts->comp_level = comp_level_default;
593 static unsigned int comp_level_max = 22;
595 static int record__comp_enabled(struct record *rec)
597 return rec->opts.comp_level > 0;
600 static int process_synthesized_event(struct perf_tool *tool,
601 union perf_event *event,
602 struct perf_sample *sample __maybe_unused,
603 struct machine *machine __maybe_unused)
605 struct record *rec = container_of(tool, struct record, tool);
606 return record__write(rec, NULL, event, event->header.size);
609 static int process_locked_synthesized_event(struct perf_tool *tool,
610 union perf_event *event,
611 struct perf_sample *sample __maybe_unused,
612 struct machine *machine __maybe_unused)
614 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
617 pthread_mutex_lock(&synth_lock);
618 ret = process_synthesized_event(tool, event, sample, machine);
619 pthread_mutex_unlock(&synth_lock);
623 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
625 struct record *rec = to;
627 if (record__comp_enabled(rec)) {
628 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
633 return record__write(rec, map, bf, size);
636 static volatile int signr = -1;
637 static volatile int child_finished;
638 #ifdef HAVE_EVENTFD_SUPPORT
639 static int done_fd = -1;
642 static void sig_handler(int sig)
650 #ifdef HAVE_EVENTFD_SUPPORT
654 * It is possible for this signal handler to run after done is checked
655 * in the main loop, but before the perf counter fds are polled. If this
656 * happens, the poll() will continue to wait even though done is set,
657 * and will only break out if either another signal is received, or the
658 * counters are ready for read. To ensure the poll() doesn't sleep when
659 * done is set, use an eventfd (done_fd) to wake up the poll().
661 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
662 pr_err("failed to signal wakeup fd, error: %m\n");
664 #endif // HAVE_EVENTFD_SUPPORT
667 static void sigsegv_handler(int sig)
669 perf_hooks__recover();
670 sighandler_dump_stack(sig);
673 static void record__sig_exit(void)
678 signal(signr, SIG_DFL);
682 #ifdef HAVE_AUXTRACE_SUPPORT
684 static int record__process_auxtrace(struct perf_tool *tool,
686 union perf_event *event, void *data1,
687 size_t len1, void *data2, size_t len2)
689 struct record *rec = container_of(tool, struct record, tool);
690 struct perf_data *data = &rec->data;
694 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
696 int fd = perf_data__fd(data);
699 file_offset = lseek(fd, 0, SEEK_CUR);
700 if (file_offset == -1)
702 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
708 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
709 padding = (len1 + len2) & 7;
711 padding = 8 - padding;
713 record__write(rec, map, event, event->header.size);
714 record__write(rec, map, data1, len1);
716 record__write(rec, map, data2, len2);
717 record__write(rec, map, &pad, padding);
722 static int record__auxtrace_mmap_read(struct record *rec,
727 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
728 record__process_auxtrace);
738 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
743 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
744 record__process_auxtrace,
745 rec->opts.auxtrace_snapshot_size);
755 static int record__auxtrace_read_snapshot_all(struct record *rec)
760 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
761 struct mmap *map = &rec->evlist->mmap[i];
763 if (!map->auxtrace_mmap.base)
766 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
775 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
777 pr_debug("Recording AUX area tracing snapshot\n");
778 if (record__auxtrace_read_snapshot_all(rec) < 0) {
779 trigger_error(&auxtrace_snapshot_trigger);
781 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
782 trigger_error(&auxtrace_snapshot_trigger);
784 trigger_ready(&auxtrace_snapshot_trigger);
788 static int record__auxtrace_snapshot_exit(struct record *rec)
790 if (trigger_is_error(&auxtrace_snapshot_trigger))
793 if (!auxtrace_record__snapshot_started &&
794 auxtrace_record__snapshot_start(rec->itr))
797 record__read_auxtrace_snapshot(rec, true);
798 if (trigger_is_error(&auxtrace_snapshot_trigger))
804 static int record__auxtrace_init(struct record *rec)
808 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
809 && record__threads_enabled(rec)) {
810 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
815 rec->itr = auxtrace_record__init(rec->evlist, &err);
820 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
821 rec->opts.auxtrace_snapshot_opts);
825 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
826 rec->opts.auxtrace_sample_opts);
830 auxtrace_regroup_aux_output(rec->evlist);
832 return auxtrace_parse_filters(rec->evlist);
838 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
839 struct mmap *map __maybe_unused)
845 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
846 bool on_exit __maybe_unused)
851 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
857 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
862 static int record__auxtrace_init(struct record *rec __maybe_unused)
869 static int record__config_text_poke(struct evlist *evlist)
874 /* Nothing to do if text poke is already configured */
875 evlist__for_each_entry(evlist, evsel) {
876 if (evsel->core.attr.text_poke)
880 err = parse_events(evlist, "dummy:u", NULL);
884 evsel = evlist__last(evlist);
886 evsel->core.attr.freq = 0;
887 evsel->core.attr.sample_period = 1;
888 evsel->core.attr.text_poke = 1;
889 evsel->core.attr.ksymbol = 1;
891 evsel->core.system_wide = true;
892 evsel->no_aux_samples = true;
893 evsel->immediate = true;
895 /* Text poke must be collected on all CPUs */
896 perf_cpu_map__put(evsel->core.own_cpus);
897 evsel->core.own_cpus = perf_cpu_map__new(NULL);
898 perf_cpu_map__put(evsel->core.cpus);
899 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
901 evsel__set_sample_bit(evsel, TIME);
906 static bool record__kcore_readable(struct machine *machine)
908 char kcore[PATH_MAX];
911 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
913 fd = open(kcore, O_RDONLY);
922 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
924 char from_dir[PATH_MAX];
925 char kcore_dir[PATH_MAX];
928 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
930 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
934 return kcore_copy(from_dir, kcore_dir);
937 static void record__thread_data_init_pipes(struct record_thread *thread_data)
939 thread_data->pipes.msg[0] = -1;
940 thread_data->pipes.msg[1] = -1;
941 thread_data->pipes.ack[0] = -1;
942 thread_data->pipes.ack[1] = -1;
945 static int record__thread_data_open_pipes(struct record_thread *thread_data)
947 if (pipe(thread_data->pipes.msg))
950 if (pipe(thread_data->pipes.ack)) {
951 close(thread_data->pipes.msg[0]);
952 thread_data->pipes.msg[0] = -1;
953 close(thread_data->pipes.msg[1]);
954 thread_data->pipes.msg[1] = -1;
958 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
959 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
960 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
965 static void record__thread_data_close_pipes(struct record_thread *thread_data)
967 if (thread_data->pipes.msg[0] != -1) {
968 close(thread_data->pipes.msg[0]);
969 thread_data->pipes.msg[0] = -1;
971 if (thread_data->pipes.msg[1] != -1) {
972 close(thread_data->pipes.msg[1]);
973 thread_data->pipes.msg[1] = -1;
975 if (thread_data->pipes.ack[0] != -1) {
976 close(thread_data->pipes.ack[0]);
977 thread_data->pipes.ack[0] = -1;
979 if (thread_data->pipes.ack[1] != -1) {
980 close(thread_data->pipes.ack[1]);
981 thread_data->pipes.ack[1] = -1;
985 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
987 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
988 struct mmap *mmap = evlist->mmap;
989 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
990 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
992 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
993 thread_data->mask->maps.nbits);
995 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
996 if (!thread_data->maps)
999 if (overwrite_mmap) {
1000 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1001 if (!thread_data->overwrite_maps) {
1002 zfree(&thread_data->maps);
1006 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1007 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1009 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1010 if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
1011 if (thread_data->maps) {
1012 thread_data->maps[tm] = &mmap[m];
1013 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1014 thread_data, cpus->map[m].cpu, tm, m);
1016 if (thread_data->overwrite_maps) {
1017 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1018 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1019 thread_data, cpus->map[m].cpu, tm, m);
1028 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1031 struct mmap *map, *overwrite_map;
1033 fdarray__init(&thread_data->pollfd, 64);
1035 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1036 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1037 overwrite_map = thread_data->overwrite_maps ?
1038 thread_data->overwrite_maps[tm] : NULL;
1040 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1041 void *ptr = evlist->core.pollfd.priv[f].ptr;
1043 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1044 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1045 &evlist->core.pollfd);
1048 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1049 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1057 static void record__free_thread_data(struct record *rec)
1060 struct record_thread *thread_data = rec->thread_data;
1062 if (thread_data == NULL)
1065 for (t = 0; t < rec->nr_threads; t++) {
1066 record__thread_data_close_pipes(&thread_data[t]);
1067 zfree(&thread_data[t].maps);
1068 zfree(&thread_data[t].overwrite_maps);
1069 fdarray__exit(&thread_data[t].pollfd);
1072 zfree(&rec->thread_data);
1075 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1078 struct record_thread *thread_data;
1080 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1081 if (!rec->thread_data) {
1082 pr_err("Failed to allocate thread data\n");
1085 thread_data = rec->thread_data;
1087 for (t = 0; t < rec->nr_threads; t++)
1088 record__thread_data_init_pipes(&thread_data[t]);
1090 for (t = 0; t < rec->nr_threads; t++) {
1091 thread_data[t].rec = rec;
1092 thread_data[t].mask = &rec->thread_masks[t];
1093 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1095 pr_err("Failed to initialize thread[%d] maps\n", t);
1098 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1100 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1104 thread_data[t].tid = -1;
1105 ret = record__thread_data_open_pipes(&thread_data[t]);
1107 pr_err("Failed to open thread[%d] communication pipes\n", t);
1110 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1111 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1113 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1116 thread_data[t].ctlfd_pos = ret;
1117 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1118 thread_data, thread_data[t].ctlfd_pos,
1119 thread_data[t].pipes.msg[0]);
1121 thread_data[t].tid = gettid();
1122 if (evlist->ctl_fd.pos == -1)
1124 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1125 &evlist->core.pollfd);
1127 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1130 thread_data[t].ctlfd_pos = ret;
1131 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1132 thread_data, thread_data[t].ctlfd_pos,
1133 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1140 record__free_thread_data(rec);
1145 static int record__mmap_evlist(struct record *rec,
1146 struct evlist *evlist)
1149 struct record_opts *opts = &rec->opts;
1150 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1151 opts->auxtrace_sample_mode;
1154 if (opts->affinity != PERF_AFFINITY_SYS)
1155 cpu__setup_cpunode_map();
1157 if (evlist__mmap_ex(evlist, opts->mmap_pages,
1158 opts->auxtrace_mmap_pages,
1160 opts->nr_cblocks, opts->affinity,
1161 opts->mmap_flush, opts->comp_level) < 0) {
1162 if (errno == EPERM) {
1163 pr_err("Permission error mapping pages.\n"
1164 "Consider increasing "
1165 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1166 "or try again with a smaller value of -m/--mmap_pages.\n"
1167 "(current value: %u,%u)\n",
1168 opts->mmap_pages, opts->auxtrace_mmap_pages);
1171 pr_err("failed to mmap with %d (%s)\n", errno,
1172 str_error_r(errno, msg, sizeof(msg)));
1180 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1183 ret = record__alloc_thread_data(rec, evlist);
1187 if (record__threads_enabled(rec)) {
1188 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1190 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1193 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1195 evlist->mmap[i].file = &rec->data.dir.files[i];
1196 if (evlist->overwrite_mmap)
1197 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1204 static int record__mmap(struct record *rec)
1206 return record__mmap_evlist(rec, rec->evlist);
1209 static int record__open(struct record *rec)
1213 struct evlist *evlist = rec->evlist;
1214 struct perf_session *session = rec->session;
1215 struct record_opts *opts = &rec->opts;
1219 * For initial_delay, system wide or a hybrid system, we need to add a
1220 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1221 * of waiting or event synthesis.
1223 if (opts->initial_delay || target__has_cpu(&opts->target) ||
1224 perf_pmu__has_hybrid()) {
1225 pos = evlist__get_tracking_event(evlist);
1226 if (!evsel__is_dummy_event(pos)) {
1227 /* Set up dummy event. */
1228 if (evlist__add_dummy(evlist))
1230 pos = evlist__last(evlist);
1231 evlist__set_tracking_event(evlist, pos);
1235 * Enable the dummy event when the process is forked for
1236 * initial_delay, immediately for system wide.
1238 if (opts->initial_delay && !pos->immediate &&
1239 !target__has_cpu(&opts->target))
1240 pos->core.attr.enable_on_exec = 1;
1245 evlist__config(evlist, opts, &callchain_param);
1247 evlist__for_each_entry(evlist, pos) {
1249 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1250 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1252 ui__warning("%s\n", msg);
1255 if ((errno == EINVAL || errno == EBADF) &&
1256 pos->core.leader != &pos->core &&
1258 pos = evlist__reset_weak_group(evlist, pos, true);
1262 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1263 ui__error("%s\n", msg);
1267 pos->supported = true;
1270 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1272 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1273 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1274 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1275 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1276 "Samples in kernel modules won't be resolved at all.\n\n"
1277 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1278 "even with a suitable vmlinux or kallsyms file.\n\n");
1281 if (evlist__apply_filters(evlist, &pos)) {
1282 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1283 pos->filter, evsel__name(pos), errno,
1284 str_error_r(errno, msg, sizeof(msg)));
1289 rc = record__mmap(rec);
1293 session->evlist = evlist;
1294 perf_session__set_id_hdr_size(session);
1299 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1301 if (rec->evlist->first_sample_time == 0)
1302 rec->evlist->first_sample_time = sample_time;
1305 rec->evlist->last_sample_time = sample_time;
1308 static int process_sample_event(struct perf_tool *tool,
1309 union perf_event *event,
1310 struct perf_sample *sample,
1311 struct evsel *evsel,
1312 struct machine *machine)
1314 struct record *rec = container_of(tool, struct record, tool);
1316 set_timestamp_boundary(rec, sample->time);
1318 if (rec->buildid_all)
1322 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1325 static int process_buildids(struct record *rec)
1327 struct perf_session *session = rec->session;
1329 if (perf_data__size(&rec->data) == 0)
1333 * During this process, it'll load kernel map and replace the
1334 * dso->long_name to a real pathname it found. In this case
1335 * we prefer the vmlinux path like
1336 * /lib/modules/3.16.4/build/vmlinux
1338 * rather than build-id path (in debug directory).
1339 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1341 symbol_conf.ignore_vmlinux_buildid = true;
1344 * If --buildid-all is given, it marks all DSO regardless of hits,
1345 * so no need to process samples. But if timestamp_boundary is enabled,
1346 * it still needs to walk on all samples to get the timestamps of
1347 * first/last samples.
1349 if (rec->buildid_all && !rec->timestamp_boundary)
1350 rec->tool.sample = NULL;
1352 return perf_session__process_events(session);
1355 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1358 struct perf_tool *tool = data;
1360 *As for guest kernel when processing subcommand record&report,
1361 *we arrange module mmap prior to guest kernel mmap and trigger
1362 *a preload dso because default guest module symbols are loaded
1363 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1364 *method is used to avoid symbol missing when the first addr is
1365 *in module instead of in guest kernel.
1367 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1370 pr_err("Couldn't record guest kernel [%d]'s reference"
1371 " relocation symbol.\n", machine->pid);
1374 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1375 * have no _text sometimes.
1377 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1380 pr_err("Couldn't record guest kernel [%d]'s reference"
1381 " relocation symbol.\n", machine->pid);
1384 static struct perf_event_header finished_round_event = {
1385 .size = sizeof(struct perf_event_header),
1386 .type = PERF_RECORD_FINISHED_ROUND,
1389 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1391 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1392 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1393 thread->mask->affinity.nbits)) {
1394 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1395 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1396 map->affinity_mask.bits, thread->mask->affinity.nbits);
1397 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1398 (cpu_set_t *)thread->mask->affinity.bits);
1400 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1401 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1406 static size_t process_comp_header(void *record, size_t increment)
1408 struct perf_record_compressed *event = record;
1409 size_t size = sizeof(*event);
1412 event->header.size += increment;
1416 event->header.type = PERF_RECORD_COMPRESSED;
1417 event->header.size = size;
1422 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1423 void *dst, size_t dst_size, void *src, size_t src_size)
1426 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1427 struct zstd_data *zstd_data = &session->zstd_data;
1429 if (map && map->file)
1430 zstd_data = &map->zstd_data;
1432 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1433 max_record_size, process_comp_header);
1435 if (map && map->file) {
1436 thread->bytes_transferred += src_size;
1437 thread->bytes_compressed += compressed;
1439 session->bytes_transferred += src_size;
1440 session->bytes_compressed += compressed;
1446 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1447 bool overwrite, bool synch)
1449 u64 bytes_written = rec->bytes_written;
1454 int trace_fd = rec->data.file.fd;
1460 nr_mmaps = thread->nr_mmaps;
1461 maps = overwrite ? thread->overwrite_maps : thread->maps;
1466 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1469 if (record__aio_enabled(rec))
1470 off = record__aio_get_pos(trace_fd);
1472 for (i = 0; i < nr_mmaps; i++) {
1474 struct mmap *map = maps[i];
1476 if (map->core.base) {
1477 record__adjust_affinity(rec, map);
1479 flush = map->core.flush;
1480 map->core.flush = 1;
1482 if (!record__aio_enabled(rec)) {
1483 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1485 map->core.flush = flush;
1490 if (record__aio_push(rec, map, &off) < 0) {
1491 record__aio_set_pos(trace_fd, off);
1493 map->core.flush = flush;
1499 map->core.flush = flush;
1502 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1503 !rec->opts.auxtrace_sample_mode &&
1504 record__auxtrace_mmap_read(rec, map) != 0) {
1510 if (record__aio_enabled(rec))
1511 record__aio_set_pos(trace_fd, off);
1514 * Mark the round finished in case we wrote
1515 * at least one event.
1517 * No need for round events in directory mode,
1518 * because per-cpu maps and files have data
1521 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1522 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1525 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1530 static int record__mmap_read_all(struct record *rec, bool synch)
1534 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1538 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1541 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1542 void *arg __maybe_unused)
1544 struct perf_mmap *map = fda->priv[fd].ptr;
1547 perf_mmap__put(map);
1550 static void *record__thread(void *arg)
1552 enum thread_msg msg = THREAD_MSG__READY;
1553 bool terminate = false;
1554 struct fdarray *pollfd;
1558 thread->tid = gettid();
1560 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1562 pr_warning("threads[%d]: failed to notify on start: %s\n",
1563 thread->tid, strerror(errno));
1565 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1567 pollfd = &thread->pollfd;
1568 ctlfd_pos = thread->ctlfd_pos;
1571 unsigned long long hits = thread->samples;
1573 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1576 if (hits == thread->samples) {
1578 err = fdarray__poll(pollfd, -1);
1580 * Propagate error, only if there's any. Ignore positive
1581 * number of returned events and interrupt error.
1583 if (err > 0 || (err < 0 && errno == EINTR))
1587 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1588 record__thread_munmap_filtered, NULL) == 0)
1592 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1594 close(thread->pipes.msg[0]);
1595 thread->pipes.msg[0] = -1;
1596 pollfd->entries[ctlfd_pos].fd = -1;
1597 pollfd->entries[ctlfd_pos].events = 0;
1600 pollfd->entries[ctlfd_pos].revents = 0;
1602 record__mmap_read_all(thread->rec, true);
1604 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1606 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1607 thread->tid, strerror(errno));
1612 static void record__init_features(struct record *rec)
1614 struct perf_session *session = rec->session;
1617 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1618 perf_header__set_feat(&session->header, feat);
1620 if (rec->no_buildid)
1621 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1623 if (!have_tracepoints(&rec->evlist->core.entries))
1624 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1626 if (!rec->opts.branch_stack)
1627 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1629 if (!rec->opts.full_auxtrace)
1630 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1632 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1633 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1635 if (!rec->opts.use_clockid)
1636 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1638 if (!record__threads_enabled(rec))
1639 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1641 if (!record__comp_enabled(rec))
1642 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1644 perf_header__clear_feat(&session->header, HEADER_STAT);
1648 record__finish_output(struct record *rec)
1651 struct perf_data *data = &rec->data;
1652 int fd = perf_data__fd(data);
1657 rec->session->header.data_size += rec->bytes_written;
1658 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1659 if (record__threads_enabled(rec)) {
1660 for (i = 0; i < data->dir.nr; i++)
1661 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1664 if (!rec->no_buildid) {
1665 process_buildids(rec);
1667 if (rec->buildid_all)
1668 dsos__hit_all(rec->session);
1670 perf_session__write_header(rec->session, rec->evlist, fd, true);
1675 static int record__synthesize_workload(struct record *rec, bool tail)
1678 struct perf_thread_map *thread_map;
1679 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1681 if (rec->opts.tail_synthesize != tail)
1684 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1685 if (thread_map == NULL)
1688 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1689 process_synthesized_event,
1690 &rec->session->machines.host,
1692 rec->opts.sample_address);
1693 perf_thread_map__put(thread_map);
1697 static int record__synthesize(struct record *rec, bool tail);
1700 record__switch_output(struct record *rec, bool at_exit)
1702 struct perf_data *data = &rec->data;
1706 /* Same Size: "2015122520103046"*/
1707 char timestamp[] = "InvalidTimestamp";
1709 record__aio_mmap_read_sync(rec);
1711 record__synthesize(rec, true);
1712 if (target__none(&rec->opts.target))
1713 record__synthesize_workload(rec, true);
1716 record__finish_output(rec);
1717 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1719 pr_err("Failed to get current timestamp\n");
1723 fd = perf_data__switch(data, timestamp,
1724 rec->session->header.data_offset,
1725 at_exit, &new_filename);
1726 if (fd >= 0 && !at_exit) {
1727 rec->bytes_written = 0;
1728 rec->session->header.data_size = 0;
1732 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1733 data->path, timestamp);
1735 if (rec->switch_output.num_files) {
1736 int n = rec->switch_output.cur_file + 1;
1738 if (n >= rec->switch_output.num_files)
1740 rec->switch_output.cur_file = n;
1741 if (rec->switch_output.filenames[n]) {
1742 remove(rec->switch_output.filenames[n]);
1743 zfree(&rec->switch_output.filenames[n]);
1745 rec->switch_output.filenames[n] = new_filename;
1750 /* Output tracking events */
1752 record__synthesize(rec, false);
1755 * In 'perf record --switch-output' without -a,
1756 * record__synthesize() in record__switch_output() won't
1757 * generate tracking events because there's no thread_map
1758 * in evlist. Which causes newly created perf.data doesn't
1759 * contain map and comm information.
1760 * Create a fake thread_map and directly call
1761 * perf_event__synthesize_thread_map() for those events.
1763 if (target__none(&rec->opts.target))
1764 record__synthesize_workload(rec, false);
1769 static volatile int workload_exec_errno;
1772 * evlist__prepare_workload will send a SIGUSR1
1773 * if the fork fails, since we asked by setting its
1774 * want_signal to true.
1776 static void workload_exec_failed_signal(int signo __maybe_unused,
1778 void *ucontext __maybe_unused)
1780 workload_exec_errno = info->si_value.sival_int;
1785 static void snapshot_sig_handler(int sig);
1786 static void alarm_sig_handler(int sig);
1788 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1791 if (evlist->mmap && evlist->mmap[0].core.base)
1792 return evlist->mmap[0].core.base;
1793 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1794 return evlist->overwrite_mmap[0].core.base;
1799 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1801 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1807 static int record__synthesize(struct record *rec, bool tail)
1809 struct perf_session *session = rec->session;
1810 struct machine *machine = &session->machines.host;
1811 struct perf_data *data = &rec->data;
1812 struct record_opts *opts = &rec->opts;
1813 struct perf_tool *tool = &rec->tool;
1815 event_op f = process_synthesized_event;
1817 if (rec->opts.tail_synthesize != tail)
1820 if (data->is_pipe) {
1821 err = perf_event__synthesize_for_pipe(tool, session, data,
1822 process_synthesized_event);
1826 rec->bytes_written += err;
1829 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1830 process_synthesized_event, machine);
1834 /* Synthesize id_index before auxtrace_info */
1835 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
1836 err = perf_event__synthesize_id_index(tool,
1837 process_synthesized_event,
1838 session->evlist, machine);
1843 if (rec->opts.full_auxtrace) {
1844 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1845 session, process_synthesized_event);
1850 if (!evlist__exclude_kernel(rec->evlist)) {
1851 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1853 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1854 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1855 "Check /proc/kallsyms permission or run as root.\n");
1857 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1859 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1860 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1861 "Check /proc/modules permission or run as root.\n");
1865 machines__process_guests(&session->machines,
1866 perf_event__synthesize_guest_os, tool);
1869 err = perf_event__synthesize_extra_attr(&rec->tool,
1871 process_synthesized_event,
1876 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1877 process_synthesized_event,
1880 pr_err("Couldn't synthesize thread map.\n");
1884 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
1885 process_synthesized_event, NULL);
1887 pr_err("Couldn't synthesize cpu map.\n");
1891 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1894 pr_warning("Couldn't synthesize bpf events.\n");
1896 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1897 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1900 pr_warning("Couldn't synthesize cgroup events.\n");
1903 if (rec->opts.nr_threads_synthesize > 1) {
1904 perf_set_multithreaded();
1905 f = process_locked_synthesized_event;
1908 if (rec->opts.synth & PERF_SYNTH_TASK) {
1909 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1911 err = __machine__synthesize_threads(machine, tool, &opts->target,
1912 rec->evlist->core.threads,
1913 f, needs_mmap, opts->sample_address,
1914 rec->opts.nr_threads_synthesize);
1917 if (rec->opts.nr_threads_synthesize > 1)
1918 perf_set_singlethreaded();
1924 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1926 struct record *rec = data;
1927 pthread_kill(rec->thread_id, SIGUSR2);
1931 static int record__setup_sb_evlist(struct record *rec)
1933 struct record_opts *opts = &rec->opts;
1935 if (rec->sb_evlist != NULL) {
1937 * We get here if --switch-output-event populated the
1938 * sb_evlist, so associate a callback that will send a SIGUSR2
1939 * to the main thread.
1941 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1942 rec->thread_id = pthread_self();
1944 #ifdef HAVE_LIBBPF_SUPPORT
1945 if (!opts->no_bpf_event) {
1946 if (rec->sb_evlist == NULL) {
1947 rec->sb_evlist = evlist__new();
1949 if (rec->sb_evlist == NULL) {
1950 pr_err("Couldn't create side band evlist.\n.");
1955 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1956 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1961 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1962 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1963 opts->no_bpf_event = true;
1969 static int record__init_clock(struct record *rec)
1971 struct perf_session *session = rec->session;
1972 struct timespec ref_clockid;
1973 struct timeval ref_tod;
1976 if (!rec->opts.use_clockid)
1979 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1980 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1982 session->header.env.clock.clockid = rec->opts.clockid;
1984 if (gettimeofday(&ref_tod, NULL) != 0) {
1985 pr_err("gettimeofday failed, cannot set reference time.\n");
1989 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1990 pr_err("clock_gettime failed, cannot set reference time.\n");
1994 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1995 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1997 session->header.env.clock.tod_ns = ref;
1999 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2000 (u64) ref_clockid.tv_nsec;
2002 session->header.env.clock.clockid_ns = ref;
2006 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2008 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2009 trigger_hit(&auxtrace_snapshot_trigger);
2010 auxtrace_record__snapshot_started = 1;
2011 if (auxtrace_record__snapshot_start(rec->itr))
2012 trigger_error(&auxtrace_snapshot_trigger);
2016 static void record__uniquify_name(struct record *rec)
2019 struct evlist *evlist = rec->evlist;
2023 if (!perf_pmu__has_hybrid())
2026 evlist__for_each_entry(evlist, pos) {
2027 if (!evsel__is_hybrid(pos))
2030 if (strchr(pos->name, '/'))
2033 ret = asprintf(&new_name, "%s/%s/",
2034 pos->pmu_name, pos->name);
2037 pos->name = new_name;
2042 static int record__terminate_thread(struct record_thread *thread_data)
2045 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2046 pid_t tid = thread_data->tid;
2048 close(thread_data->pipes.msg[1]);
2049 thread_data->pipes.msg[1] = -1;
2050 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2052 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2054 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2060 static int record__start_threads(struct record *rec)
2062 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2063 struct record_thread *thread_data = rec->thread_data;
2064 sigset_t full, mask;
2066 pthread_attr_t attrs;
2068 thread = &thread_data[0];
2070 if (!record__threads_enabled(rec))
2074 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2075 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2079 pthread_attr_init(&attrs);
2080 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2082 for (t = 1; t < nr_threads; t++) {
2083 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2085 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2086 pthread_attr_setaffinity_np(&attrs,
2087 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2088 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2090 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2091 for (tt = 1; tt < t; tt++)
2092 record__terminate_thread(&thread_data[t]);
2093 pr_err("Failed to start threads: %s\n", strerror(errno));
2098 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2100 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2101 thread_msg_tags[msg]);
2103 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2104 thread->tid, rec->thread_data[t].tid);
2107 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2108 (cpu_set_t *)thread->mask->affinity.bits);
2110 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2113 pthread_attr_destroy(&attrs);
2115 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2116 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2123 static int record__stop_threads(struct record *rec)
2126 struct record_thread *thread_data = rec->thread_data;
2128 for (t = 1; t < rec->nr_threads; t++)
2129 record__terminate_thread(&thread_data[t]);
2131 for (t = 0; t < rec->nr_threads; t++) {
2132 rec->samples += thread_data[t].samples;
2133 if (!record__threads_enabled(rec))
2135 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2136 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2137 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2138 thread_data[t].samples, thread_data[t].waking);
2139 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2140 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2141 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2143 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2149 static unsigned long record__waking(struct record *rec)
2152 unsigned long waking = 0;
2153 struct record_thread *thread_data = rec->thread_data;
2155 for (t = 0; t < rec->nr_threads; t++)
2156 waking += thread_data[t].waking;
2161 static int __cmd_record(struct record *rec, int argc, const char **argv)
2165 const bool forks = argc > 0;
2166 struct perf_tool *tool = &rec->tool;
2167 struct record_opts *opts = &rec->opts;
2168 struct perf_data *data = &rec->data;
2169 struct perf_session *session;
2170 bool disabled = false, draining = false;
2173 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2175 atexit(record__sig_exit);
2176 signal(SIGCHLD, sig_handler);
2177 signal(SIGINT, sig_handler);
2178 signal(SIGTERM, sig_handler);
2179 signal(SIGSEGV, sigsegv_handler);
2181 if (rec->opts.record_namespaces)
2182 tool->namespace_events = true;
2184 if (rec->opts.record_cgroup) {
2185 #ifdef HAVE_FILE_HANDLE
2186 tool->cgroup_events = true;
2188 pr_err("cgroup tracking is not supported\n");
2193 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2194 signal(SIGUSR2, snapshot_sig_handler);
2195 if (rec->opts.auxtrace_snapshot_mode)
2196 trigger_on(&auxtrace_snapshot_trigger);
2197 if (rec->switch_output.enabled)
2198 trigger_on(&switch_output_trigger);
2200 signal(SIGUSR2, SIG_IGN);
2203 session = perf_session__new(data, tool);
2204 if (IS_ERR(session)) {
2205 pr_err("Perf session creation failed.\n");
2206 return PTR_ERR(session);
2209 if (record__threads_enabled(rec)) {
2210 if (perf_data__is_pipe(&rec->data)) {
2211 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2214 if (rec->opts.full_auxtrace) {
2215 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2220 fd = perf_data__fd(data);
2221 rec->session = session;
2223 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2224 pr_err("Compression initialization failed.\n");
2227 #ifdef HAVE_EVENTFD_SUPPORT
2228 done_fd = eventfd(0, EFD_NONBLOCK);
2230 pr_err("Failed to create wakeup eventfd, error: %m\n");
2232 goto out_delete_session;
2234 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2236 pr_err("Failed to add wakeup eventfd to poll list\n");
2238 goto out_delete_session;
2240 #endif // HAVE_EVENTFD_SUPPORT
2242 session->header.env.comp_type = PERF_COMP_ZSTD;
2243 session->header.env.comp_level = rec->opts.comp_level;
2245 if (rec->opts.kcore &&
2246 !record__kcore_readable(&session->machines.host)) {
2247 pr_err("ERROR: kcore is not readable.\n");
2251 if (record__init_clock(rec))
2254 record__init_features(rec);
2257 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2258 workload_exec_failed_signal);
2260 pr_err("Couldn't run the workload!\n");
2262 goto out_delete_session;
2267 * If we have just single event and are sending data
2268 * through pipe, we need to force the ids allocation,
2269 * because we synthesize event name through the pipe
2270 * and need the id for that.
2272 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2273 rec->opts.sample_id = true;
2275 record__uniquify_name(rec);
2277 if (record__open(rec) != 0) {
2279 goto out_free_threads;
2281 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2283 if (rec->opts.kcore) {
2284 err = record__kcore_copy(&session->machines.host, data);
2286 pr_err("ERROR: Failed to copy kcore\n");
2287 goto out_free_threads;
2291 err = bpf__apply_obj_config();
2293 char errbuf[BUFSIZ];
2295 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2296 pr_err("ERROR: Apply config to BPF failed: %s\n",
2298 goto out_free_threads;
2302 * Normally perf_session__new would do this, but it doesn't have the
2305 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2306 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2307 rec->tool.ordered_events = false;
2310 if (!rec->evlist->core.nr_groups)
2311 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2313 if (data->is_pipe) {
2314 err = perf_header__write_pipe(fd);
2316 goto out_free_threads;
2318 err = perf_session__write_header(session, rec->evlist, fd, false);
2320 goto out_free_threads;
2324 if (!rec->no_buildid
2325 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2326 pr_err("Couldn't generate buildids. "
2327 "Use --no-buildid to profile anyway.\n");
2328 goto out_free_threads;
2331 err = record__setup_sb_evlist(rec);
2333 goto out_free_threads;
2335 err = record__synthesize(rec, false);
2337 goto out_free_threads;
2339 if (rec->realtime_prio) {
2340 struct sched_param param;
2342 param.sched_priority = rec->realtime_prio;
2343 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
2344 pr_err("Could not set realtime priority.\n");
2346 goto out_free_threads;
2350 if (record__start_threads(rec))
2351 goto out_free_threads;
2354 * When perf is starting the traced process, all the events
2355 * (apart from group members) have enable_on_exec=1 set,
2356 * so don't spoil it by prematurely enabling them.
2358 if (!target__none(&opts->target) && !opts->initial_delay)
2359 evlist__enable(rec->evlist);
2365 struct machine *machine = &session->machines.host;
2366 union perf_event *event;
2369 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2370 if (event == NULL) {
2376 * Some H/W events are generated before COMM event
2377 * which is emitted during exec(), so perf script
2378 * cannot see a correct process name for those events.
2379 * Synthesize COMM event to prevent it.
2381 tgid = perf_event__synthesize_comm(tool, event,
2382 rec->evlist->workload.pid,
2383 process_synthesized_event,
2390 event = malloc(sizeof(event->namespaces) +
2391 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2392 machine->id_hdr_size);
2393 if (event == NULL) {
2399 * Synthesize NAMESPACES event for the command specified.
2401 perf_event__synthesize_namespaces(tool, event,
2402 rec->evlist->workload.pid,
2403 tgid, process_synthesized_event,
2407 evlist__start_workload(rec->evlist);
2410 if (opts->initial_delay) {
2411 pr_info(EVLIST_DISABLED_MSG);
2412 if (opts->initial_delay > 0) {
2413 usleep(opts->initial_delay * USEC_PER_MSEC);
2414 evlist__enable(rec->evlist);
2415 pr_info(EVLIST_ENABLED_MSG);
2419 trigger_ready(&auxtrace_snapshot_trigger);
2420 trigger_ready(&switch_output_trigger);
2421 perf_hooks__invoke_record_start();
2423 unsigned long long hits = thread->samples;
2426 * rec->evlist->bkw_mmap_state is possible to be
2427 * BKW_MMAP_EMPTY here: when done == true and
2428 * hits != rec->samples in previous round.
2430 * evlist__toggle_bkw_mmap ensure we never
2431 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2433 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2434 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2436 if (record__mmap_read_all(rec, false) < 0) {
2437 trigger_error(&auxtrace_snapshot_trigger);
2438 trigger_error(&switch_output_trigger);
2443 if (auxtrace_record__snapshot_started) {
2444 auxtrace_record__snapshot_started = 0;
2445 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2446 record__read_auxtrace_snapshot(rec, false);
2447 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2448 pr_err("AUX area tracing snapshot failed\n");
2454 if (trigger_is_hit(&switch_output_trigger)) {
2456 * If switch_output_trigger is hit, the data in
2457 * overwritable ring buffer should have been collected,
2458 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2460 * If SIGUSR2 raise after or during record__mmap_read_all(),
2461 * record__mmap_read_all() didn't collect data from
2462 * overwritable ring buffer. Read again.
2464 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2466 trigger_ready(&switch_output_trigger);
2469 * Reenable events in overwrite ring buffer after
2470 * record__mmap_read_all(): we should have collected
2473 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2476 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2477 record__waking(rec));
2479 fd = record__switch_output(rec, false);
2481 pr_err("Failed to switch to new file\n");
2482 trigger_error(&switch_output_trigger);
2487 /* re-arm the alarm */
2488 if (rec->switch_output.time)
2489 alarm(rec->switch_output.time);
2492 if (hits == thread->samples) {
2493 if (done || draining)
2495 err = fdarray__poll(&thread->pollfd, -1);
2497 * Propagate error, only if there's any. Ignore positive
2498 * number of returned events and interrupt error.
2500 if (err > 0 || (err < 0 && errno == EINTR))
2504 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2505 record__thread_munmap_filtered, NULL) == 0)
2508 evlist__ctlfd_update(rec->evlist,
2509 &thread->pollfd.entries[thread->ctlfd_pos]);
2512 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2514 case EVLIST_CTL_CMD_SNAPSHOT:
2515 hit_auxtrace_snapshot_trigger(rec);
2516 evlist__ctlfd_ack(rec->evlist);
2518 case EVLIST_CTL_CMD_STOP:
2521 case EVLIST_CTL_CMD_ACK:
2522 case EVLIST_CTL_CMD_UNSUPPORTED:
2523 case EVLIST_CTL_CMD_ENABLE:
2524 case EVLIST_CTL_CMD_DISABLE:
2525 case EVLIST_CTL_CMD_EVLIST:
2526 case EVLIST_CTL_CMD_PING:
2533 * When perf is starting the traced process, at the end events
2534 * die with the process and we wait for that. Thus no need to
2535 * disable events in this case.
2537 if (done && !disabled && !target__none(&opts->target)) {
2538 trigger_off(&auxtrace_snapshot_trigger);
2539 evlist__disable(rec->evlist);
2544 trigger_off(&auxtrace_snapshot_trigger);
2545 trigger_off(&switch_output_trigger);
2547 if (opts->auxtrace_snapshot_on_exit)
2548 record__auxtrace_snapshot_exit(rec);
2550 if (forks && workload_exec_errno) {
2551 char msg[STRERR_BUFSIZE], strevsels[2048];
2552 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2554 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2556 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2557 strevsels, argv[0], emsg);
2563 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2564 record__waking(rec));
2566 if (target__none(&rec->opts.target))
2567 record__synthesize_workload(rec, true);
2570 record__stop_threads(rec);
2571 record__mmap_read_all(rec, true);
2573 record__free_thread_data(rec);
2574 evlist__finalize_ctlfd(rec->evlist);
2575 record__aio_mmap_read_sync(rec);
2577 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2578 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2579 session->header.env.comp_ratio = ratio + 0.5;
2585 if (!child_finished)
2586 kill(rec->evlist->workload.pid, SIGTERM);
2592 else if (WIFEXITED(exit_status))
2593 status = WEXITSTATUS(exit_status);
2594 else if (WIFSIGNALED(exit_status))
2595 signr = WTERMSIG(exit_status);
2599 record__synthesize(rec, true);
2600 /* this will be recalculated during process_buildids() */
2604 if (!rec->timestamp_filename) {
2605 record__finish_output(rec);
2607 fd = record__switch_output(rec, true);
2610 goto out_delete_session;
2615 perf_hooks__invoke_record_end();
2617 if (!err && !quiet) {
2619 const char *postfix = rec->timestamp_filename ?
2620 ".<timestamp>" : "";
2622 if (rec->samples && !rec->opts.full_auxtrace)
2623 scnprintf(samples, sizeof(samples),
2624 " (%" PRIu64 " samples)", rec->samples);
2628 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2629 perf_data__size(data) / 1024.0 / 1024.0,
2630 data->path, postfix, samples);
2632 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2633 rec->session->bytes_transferred / 1024.0 / 1024.0,
2636 fprintf(stderr, " ]\n");
2640 #ifdef HAVE_EVENTFD_SUPPORT
2644 zstd_fini(&session->zstd_data);
2645 perf_session__delete(session);
2647 if (!opts->no_bpf_event)
2648 evlist__stop_sb_thread(rec->sb_evlist);
2652 static void callchain_debug(struct callchain_param *callchain)
2654 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2656 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2658 if (callchain->record_mode == CALLCHAIN_DWARF)
2659 pr_debug("callchain: stack dump size %d\n",
2660 callchain->dump_size);
2663 int record_opts__parse_callchain(struct record_opts *record,
2664 struct callchain_param *callchain,
2665 const char *arg, bool unset)
2668 callchain->enabled = !unset;
2670 /* --no-call-graph */
2672 callchain->record_mode = CALLCHAIN_NONE;
2673 pr_debug("callchain: disabled\n");
2677 ret = parse_callchain_record_opt(arg, callchain);
2679 /* Enable data address sampling for DWARF unwind. */
2680 if (callchain->record_mode == CALLCHAIN_DWARF)
2681 record->sample_address = true;
2682 callchain_debug(callchain);
2688 int record_parse_callchain_opt(const struct option *opt,
2692 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2695 int record_callchain_opt(const struct option *opt,
2696 const char *arg __maybe_unused,
2697 int unset __maybe_unused)
2699 struct callchain_param *callchain = opt->value;
2701 callchain->enabled = true;
2703 if (callchain->record_mode == CALLCHAIN_NONE)
2704 callchain->record_mode = CALLCHAIN_FP;
2706 callchain_debug(callchain);
2710 static int perf_record_config(const char *var, const char *value, void *cb)
2712 struct record *rec = cb;
2714 if (!strcmp(var, "record.build-id")) {
2715 if (!strcmp(value, "cache"))
2716 rec->no_buildid_cache = false;
2717 else if (!strcmp(value, "no-cache"))
2718 rec->no_buildid_cache = true;
2719 else if (!strcmp(value, "skip"))
2720 rec->no_buildid = true;
2721 else if (!strcmp(value, "mmap"))
2722 rec->buildid_mmap = true;
2727 if (!strcmp(var, "record.call-graph")) {
2728 var = "call-graph.record-mode";
2729 return perf_default_config(var, value, cb);
2731 #ifdef HAVE_AIO_SUPPORT
2732 if (!strcmp(var, "record.aio")) {
2733 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2734 if (!rec->opts.nr_cblocks)
2735 rec->opts.nr_cblocks = nr_cblocks_default;
2738 if (!strcmp(var, "record.debuginfod")) {
2739 rec->debuginfod.urls = strdup(value);
2740 if (!rec->debuginfod.urls)
2742 rec->debuginfod.set = true;
2749 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2751 struct record_opts *opts = (struct record_opts *)opt->value;
2756 if (!strcasecmp(str, "node"))
2757 opts->affinity = PERF_AFFINITY_NODE;
2758 else if (!strcasecmp(str, "cpu"))
2759 opts->affinity = PERF_AFFINITY_CPU;
2764 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2766 mask->nbits = nr_bits;
2767 mask->bits = bitmap_zalloc(mask->nbits);
2774 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2776 bitmap_free(mask->bits);
2780 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2784 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2786 mask->affinity.bits = NULL;
2790 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2792 record__mmap_cpu_mask_free(&mask->maps);
2793 mask->maps.bits = NULL;
2799 static void record__thread_mask_free(struct thread_mask *mask)
2801 record__mmap_cpu_mask_free(&mask->maps);
2802 record__mmap_cpu_mask_free(&mask->affinity);
2805 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2808 struct record_opts *opts = opt->value;
2810 if (unset || !str || !strlen(str)) {
2811 opts->threads_spec = THREAD_SPEC__CPU;
2813 for (s = 1; s < THREAD_SPEC__MAX; s++) {
2814 if (s == THREAD_SPEC__USER) {
2815 opts->threads_user_spec = strdup(str);
2816 if (!opts->threads_user_spec)
2818 opts->threads_spec = THREAD_SPEC__USER;
2821 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2822 opts->threads_spec = s;
2828 if (opts->threads_spec == THREAD_SPEC__USER)
2829 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2831 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2836 static int parse_output_max_size(const struct option *opt,
2837 const char *str, int unset)
2839 unsigned long *s = (unsigned long *)opt->value;
2840 static struct parse_tag tags_size[] = {
2841 { .tag = 'B', .mult = 1 },
2842 { .tag = 'K', .mult = 1 << 10 },
2843 { .tag = 'M', .mult = 1 << 20 },
2844 { .tag = 'G', .mult = 1 << 30 },
2854 val = parse_tag_value(str, tags_size);
2855 if (val != (unsigned long) -1) {
2863 static int record__parse_mmap_pages(const struct option *opt,
2865 int unset __maybe_unused)
2867 struct record_opts *opts = opt->value;
2869 unsigned int mmap_pages;
2884 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2887 opts->mmap_pages = mmap_pages;
2895 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2899 opts->auxtrace_mmap_pages = mmap_pages;
2906 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2910 static int parse_control_option(const struct option *opt,
2912 int unset __maybe_unused)
2914 struct record_opts *opts = opt->value;
2916 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2919 static void switch_output_size_warn(struct record *rec)
2921 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2922 struct switch_output *s = &rec->switch_output;
2926 if (s->size < wakeup_size) {
2929 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2930 pr_warning("WARNING: switch-output data size lower than "
2931 "wakeup kernel buffer size (%s) "
2932 "expect bigger perf.data sizes\n", buf);
2936 static int switch_output_setup(struct record *rec)
2938 struct switch_output *s = &rec->switch_output;
2939 static struct parse_tag tags_size[] = {
2940 { .tag = 'B', .mult = 1 },
2941 { .tag = 'K', .mult = 1 << 10 },
2942 { .tag = 'M', .mult = 1 << 20 },
2943 { .tag = 'G', .mult = 1 << 30 },
2946 static struct parse_tag tags_time[] = {
2947 { .tag = 's', .mult = 1 },
2948 { .tag = 'm', .mult = 60 },
2949 { .tag = 'h', .mult = 60*60 },
2950 { .tag = 'd', .mult = 60*60*24 },
2956 * If we're using --switch-output-events, then we imply its
2957 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2958 * thread to its parent.
2960 if (rec->switch_output_event_set) {
2961 if (record__threads_enabled(rec)) {
2962 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2971 if (record__threads_enabled(rec)) {
2972 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2976 if (!strcmp(s->str, "signal")) {
2979 pr_debug("switch-output with SIGUSR2 signal\n");
2983 val = parse_tag_value(s->str, tags_size);
2984 if (val != (unsigned long) -1) {
2986 pr_debug("switch-output with %s size threshold\n", s->str);
2990 val = parse_tag_value(s->str, tags_time);
2991 if (val != (unsigned long) -1) {
2993 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3001 rec->timestamp_filename = true;
3004 if (s->size && !rec->opts.no_buffering)
3005 switch_output_size_warn(rec);
3010 static const char * const __record_usage[] = {
3011 "perf record [<options>] [<command>]",
3012 "perf record [<options>] -- <command> [<options>]",
3015 const char * const *record_usage = __record_usage;
3017 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3018 struct perf_sample *sample, struct machine *machine)
3021 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3022 * no need to add them twice.
3024 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3026 return perf_event__process_mmap(tool, event, sample, machine);
3029 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3030 struct perf_sample *sample, struct machine *machine)
3033 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3034 * no need to add them twice.
3036 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3039 return perf_event__process_mmap2(tool, event, sample, machine);
3042 static int process_timestamp_boundary(struct perf_tool *tool,
3043 union perf_event *event __maybe_unused,
3044 struct perf_sample *sample,
3045 struct machine *machine __maybe_unused)
3047 struct record *rec = container_of(tool, struct record, tool);
3049 set_timestamp_boundary(rec, sample->time);
3053 static int parse_record_synth_option(const struct option *opt,
3055 int unset __maybe_unused)
3057 struct record_opts *opts = opt->value;
3058 char *p = strdup(str);
3063 opts->synth = parse_synth_opt(p);
3066 if (opts->synth < 0) {
3067 pr_err("Invalid synth option: %s\n", str);
3074 * XXX Ideally would be local to cmd_record() and passed to a record__new
3075 * because we need to have access to it in record__exit, that is called
3076 * after cmd_record() exits, but since record_options need to be accessible to
3077 * builtin-script, leave it here.
3079 * At least we don't ouch it in all the other functions here directly.
3081 * Just say no to tons of global variables, sigh.
3083 static struct record record = {
3085 .sample_time = true,
3086 .mmap_pages = UINT_MAX,
3087 .user_freq = UINT_MAX,
3088 .user_interval = ULLONG_MAX,
3092 .default_per_cpu = true,
3094 .mmap_flush = MMAP_FLUSH_DEFAULT,
3095 .nr_threads_synthesize = 1,
3098 .synth = PERF_SYNTH_ALL,
3101 .sample = process_sample_event,
3102 .fork = perf_event__process_fork,
3103 .exit = perf_event__process_exit,
3104 .comm = perf_event__process_comm,
3105 .namespaces = perf_event__process_namespaces,
3106 .mmap = build_id__process_mmap,
3107 .mmap2 = build_id__process_mmap2,
3108 .itrace_start = process_timestamp_boundary,
3109 .aux = process_timestamp_boundary,
3110 .ordered_events = true,
3114 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3115 "\n\t\t\t\tDefault: fp";
3117 static bool dry_run;
3120 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3121 * with it and switch to use the library functions in perf_evlist that came
3122 * from builtin-record.c, i.e. use record_opts,
3123 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3126 static struct option __record_options[] = {
3127 OPT_CALLBACK('e', "event", &record.evlist, "event",
3128 "event selector. use 'perf list' to list available events",
3129 parse_events_option),
3130 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3131 "event filter", parse_filter),
3132 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3133 NULL, "don't record events from perf itself",
3135 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3136 "record events on existing process id"),
3137 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3138 "record events on existing thread id"),
3139 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3140 "collect data with this RT SCHED_FIFO priority"),
3141 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3142 "collect data without buffering"),
3143 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3144 "collect raw sample records from all opened counters"),
3145 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3146 "system-wide collection from all CPUs"),
3147 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3148 "list of cpus to monitor"),
3149 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3150 OPT_STRING('o', "output", &record.data.path, "file",
3151 "output file name"),
3152 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3153 &record.opts.no_inherit_set,
3154 "child tasks do not inherit counters"),
3155 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3156 "synthesize non-sample events at the end of output"),
3157 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3158 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3159 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3160 "Fail if the specified frequency can't be used"),
3161 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3162 "profile at this frequency",
3163 record__parse_freq),
3164 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3165 "number of mmap data pages and AUX area tracing mmap pages",
3166 record__parse_mmap_pages),
3167 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3168 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3169 record__mmap_flush_parse),
3170 OPT_BOOLEAN(0, "group", &record.opts.group,
3171 "put the counters into a counter group"),
3172 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3173 NULL, "enables call-graph recording" ,
3174 &record_callchain_opt),
3175 OPT_CALLBACK(0, "call-graph", &record.opts,
3176 "record_mode[,record_size]", record_callchain_help,
3177 &record_parse_callchain_opt),
3178 OPT_INCR('v', "verbose", &verbose,
3179 "be more verbose (show counter open errors, etc)"),
3180 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3181 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3182 "per thread counts"),
3183 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3184 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3185 "Record the sample physical addresses"),
3186 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3187 "Record the sampled data address data page size"),
3188 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3189 "Record the sampled code address (ip) page size"),
3190 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3191 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3192 &record.opts.sample_time_set,
3193 "Record the sample timestamps"),
3194 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3195 "Record the sample period"),
3196 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3198 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3199 &record.no_buildid_cache_set,
3200 "do not update the buildid cache"),
3201 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3202 &record.no_buildid_set,
3203 "do not collect buildids in perf.data"),
3204 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3205 "monitor event in cgroup name only",
3207 OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3208 "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3209 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3210 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3213 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3214 "branch any", "sample any taken branches",
3215 parse_branch_stack),
3217 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3218 "branch filter mask", "branch stack filter modes",
3219 parse_branch_stack),
3220 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3221 "sample by weight (on special events only)"),
3222 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3223 "sample transaction flags (special events only)"),
3224 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3225 "use per-thread mmaps"),
3226 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3227 "sample selected machine registers on interrupt,"
3228 " use '-I?' to list register names", parse_intr_regs),
3229 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3230 "sample selected machine registers on interrupt,"
3231 " use '--user-regs=?' to list register names", parse_user_regs),
3232 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3233 "Record running/enabled time of read (:S) events"),
3234 OPT_CALLBACK('k', "clockid", &record.opts,
3235 "clockid", "clockid to use for events, see clock_gettime()",
3237 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3238 "opts", "AUX area tracing Snapshot Mode", ""),
3239 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3240 "opts", "sample AUX area", ""),
3241 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3242 "per thread proc mmap processing timeout in ms"),
3243 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3244 "Record namespaces events"),
3245 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3246 "Record cgroup events"),
3247 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3248 &record.opts.record_switch_events_set,
3249 "Record context switch events"),
3250 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3251 "Configure all used events to run in kernel space.",
3252 PARSE_OPT_EXCLUSIVE),
3253 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3254 "Configure all used events to run in user space.",
3255 PARSE_OPT_EXCLUSIVE),
3256 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3257 "collect kernel callchains"),
3258 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3259 "collect user callchains"),
3260 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3261 "clang binary to use for compiling BPF scriptlets"),
3262 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3263 "options passed to clang when compiling BPF scriptlets"),
3264 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3265 "file", "vmlinux pathname"),
3266 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3267 "Record build-id of all DSOs regardless of hits"),
3268 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3269 "Record build-id in map events"),
3270 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3271 "append timestamp to output filename"),
3272 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3273 "Record timestamp boundary (time of first/last samples)"),
3274 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3275 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3276 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3278 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3279 "switch output event selector. use 'perf list' to list available events",
3280 parse_events_option_new_evlist),
3281 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3282 "Limit number of switch output generated files"),
3283 OPT_BOOLEAN(0, "dry-run", &dry_run,
3284 "Parse options then exit"),
3285 #ifdef HAVE_AIO_SUPPORT
3286 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3287 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3290 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3291 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3292 record__parse_affinity),
3293 #ifdef HAVE_ZSTD_SUPPORT
3294 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3295 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3296 record__parse_comp_level),
3298 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3299 "size", "Limit the maximum size of the output file", parse_output_max_size),
3300 OPT_UINTEGER(0, "num-thread-synthesize",
3301 &record.opts.nr_threads_synthesize,
3302 "number of threads to run for event synthesis"),
3304 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3305 "libpfm4 event selector. use 'perf list' to list available events",
3306 parse_libpfm_events_option),
3308 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3309 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3310 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3311 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3312 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3313 parse_control_option),
3314 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3315 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3316 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3317 &record.debuginfod.set, "debuginfod urls",
3318 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3320 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3321 "write collected trace data into several data files using parallel threads",
3322 record__parse_threads),
3326 struct option *record_options = __record_options;
3328 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3332 for (c = 0; c < cpus->nr; c++)
3333 set_bit(cpus->map[c].cpu, mask->bits);
3336 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3338 struct perf_cpu_map *cpus;
3340 cpus = perf_cpu_map__new(mask_spec);
3344 bitmap_zero(mask->bits, mask->nbits);
3345 record__mmap_cpu_mask_init(mask, cpus);
3346 perf_cpu_map__put(cpus);
3351 static void record__free_thread_masks(struct record *rec, int nr_threads)
3355 if (rec->thread_masks)
3356 for (t = 0; t < nr_threads; t++)
3357 record__thread_mask_free(&rec->thread_masks[t]);
3359 zfree(&rec->thread_masks);
3362 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3366 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3367 if (!rec->thread_masks) {
3368 pr_err("Failed to allocate thread masks\n");
3372 for (t = 0; t < nr_threads; t++) {
3373 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3375 pr_err("Failed to allocate thread masks[%d]\n", t);
3383 record__free_thread_masks(rec, nr_threads);
3388 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3390 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3392 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3396 rec->nr_threads = nr_cpus;
3397 pr_debug("nr_threads: %d\n", rec->nr_threads);
3399 for (t = 0; t < rec->nr_threads; t++) {
3400 set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits);
3401 set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits);
3403 pr_debug("thread_masks[%d]: ", t);
3404 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3405 pr_debug("thread_masks[%d]: ", t);
3406 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3413 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3414 const char **maps_spec, const char **affinity_spec,
3419 struct mmap_cpu_mask cpus_mask;
3420 struct thread_mask thread_mask, full_mask, *thread_masks;
3422 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3424 pr_err("Failed to allocate CPUs mask\n");
3427 record__mmap_cpu_mask_init(&cpus_mask, cpus);
3429 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3431 pr_err("Failed to allocate full mask\n");
3432 goto out_free_cpu_mask;
3435 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3437 pr_err("Failed to allocate thread mask\n");
3438 goto out_free_full_and_cpu_masks;
3441 for (s = 0; s < nr_spec; s++) {
3442 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3444 pr_err("Failed to initialize maps thread mask\n");
3447 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3449 pr_err("Failed to initialize affinity thread mask\n");
3453 /* ignore invalid CPUs but do not allow empty masks */
3454 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3455 cpus_mask.bits, thread_mask.maps.nbits)) {
3456 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3460 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3461 cpus_mask.bits, thread_mask.affinity.nbits)) {
3462 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3467 /* do not allow intersection with other masks (full_mask) */
3468 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3469 thread_mask.maps.nbits)) {
3470 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3474 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3475 thread_mask.affinity.nbits)) {
3476 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3481 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3482 thread_mask.maps.bits, full_mask.maps.nbits);
3483 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3484 thread_mask.affinity.bits, full_mask.maps.nbits);
3486 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3487 if (!thread_masks) {
3488 pr_err("Failed to reallocate thread masks\n");
3492 rec->thread_masks = thread_masks;
3493 rec->thread_masks[t] = thread_mask;
3495 pr_debug("thread_masks[%d]: ", t);
3496 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3497 pr_debug("thread_masks[%d]: ", t);
3498 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3501 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3503 pr_err("Failed to allocate thread mask\n");
3504 goto out_free_full_and_cpu_masks;
3507 rec->nr_threads = t;
3508 pr_debug("nr_threads: %d\n", rec->nr_threads);
3509 if (!rec->nr_threads)
3513 record__thread_mask_free(&thread_mask);
3514 out_free_full_and_cpu_masks:
3515 record__thread_mask_free(&full_mask);
3517 record__mmap_cpu_mask_free(&cpus_mask);
3522 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3525 struct cpu_topology *topo;
3527 topo = cpu_topology__new();
3529 pr_err("Failed to allocate CPU topology\n");
3533 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3534 topo->core_cpus_list, topo->core_cpus_lists);
3535 cpu_topology__delete(topo);
3540 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3543 struct cpu_topology *topo;
3545 topo = cpu_topology__new();
3547 pr_err("Failed to allocate CPU topology\n");
3551 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3552 topo->package_cpus_list, topo->package_cpus_lists);
3553 cpu_topology__delete(topo);
3558 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3563 struct numa_topology *topo;
3565 topo = numa_topology__new();
3567 pr_err("Failed to allocate NUMA topology\n");
3571 spec = zalloc(topo->nr * sizeof(char *));
3573 pr_err("Failed to allocate NUMA spec\n");
3575 goto out_delete_topo;
3577 for (s = 0; s < topo->nr; s++)
3578 spec[s] = topo->nodes[s].cpus;
3580 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3585 numa_topology__delete(topo);
3590 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3594 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3595 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3597 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3598 spec = strtok_r(user_spec, ":", &spec_ptr);
3601 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3602 mask = strtok_r(spec, "/", &mask_ptr);
3605 pr_debug2(" maps mask: %s\n", mask);
3606 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3608 pr_err("Failed to reallocate maps spec\n");
3612 maps_spec = tmp_spec;
3613 maps_spec[nr_spec] = dup_mask = strdup(mask);
3614 if (!maps_spec[nr_spec]) {
3615 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3619 mask = strtok_r(NULL, "/", &mask_ptr);
3621 pr_err("Invalid thread maps or affinity specs\n");
3625 pr_debug2(" affinity mask: %s\n", mask);
3626 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3628 pr_err("Failed to reallocate affinity spec\n");
3632 affinity_spec = tmp_spec;
3633 affinity_spec[nr_spec] = strdup(mask);
3634 if (!affinity_spec[nr_spec]) {
3635 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3643 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3644 (const char **)affinity_spec, nr_spec);
3648 for (s = 0; s < nr_spec; s++) {
3652 free(affinity_spec[s]);
3654 free(affinity_spec);
3660 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3664 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3668 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3670 rec->nr_threads = 1;
3675 static int record__init_thread_masks(struct record *rec)
3678 struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
3680 if (!record__threads_enabled(rec))
3681 return record__init_thread_default_masks(rec, cpus);
3683 switch (rec->opts.threads_spec) {
3684 case THREAD_SPEC__CPU:
3685 ret = record__init_thread_cpu_masks(rec, cpus);
3687 case THREAD_SPEC__CORE:
3688 ret = record__init_thread_core_masks(rec, cpus);
3690 case THREAD_SPEC__PACKAGE:
3691 ret = record__init_thread_package_masks(rec, cpus);
3693 case THREAD_SPEC__NUMA:
3694 ret = record__init_thread_numa_masks(rec, cpus);
3696 case THREAD_SPEC__USER:
3697 ret = record__init_thread_user_masks(rec, cpus);
3706 int cmd_record(int argc, const char **argv)
3709 struct record *rec = &record;
3710 char errbuf[BUFSIZ];
3712 setlocale(LC_ALL, "");
3714 #ifndef HAVE_LIBBPF_SUPPORT
3715 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3716 set_nobuild('\0', "clang-path", true);
3717 set_nobuild('\0', "clang-opt", true);
3721 #ifndef HAVE_BPF_PROLOGUE
3722 # if !defined (HAVE_DWARF_SUPPORT)
3723 # define REASON "NO_DWARF=1"
3724 # elif !defined (HAVE_LIBBPF_SUPPORT)
3725 # define REASON "NO_LIBBPF=1"
3727 # define REASON "this architecture doesn't support BPF prologue"
3729 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3730 set_nobuild('\0', "vmlinux", true);
3735 rec->opts.affinity = PERF_AFFINITY_SYS;
3737 rec->evlist = evlist__new();
3738 if (rec->evlist == NULL)
3741 err = perf_config(perf_record_config, rec);
3745 argc = parse_options(argc, argv, record_options, record_usage,
3746 PARSE_OPT_STOP_AT_NON_OPTION);
3748 perf_quiet_option();
3750 err = symbol__validate_sym_arguments();
3754 perf_debuginfod_setup(&record.debuginfod);
3756 /* Make system wide (-a) the default target. */
3757 if (!argc && target__none(&rec->opts.target))
3758 rec->opts.target.system_wide = true;
3760 if (nr_cgroups && !rec->opts.target.system_wide) {
3761 usage_with_options_msg(record_usage, record_options,
3762 "cgroup monitoring only available in system-wide mode");
3766 if (rec->buildid_mmap) {
3767 if (!perf_can_record_build_id()) {
3768 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3772 pr_debug("Enabling build id in mmap2 events.\n");
3773 /* Enable mmap build id synthesizing. */
3774 symbol_conf.buildid_mmap2 = true;
3775 /* Enable perf_event_attr::build_id bit. */
3776 rec->opts.build_id = true;
3777 /* Disable build id cache. */
3778 rec->no_buildid = true;
3781 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3782 pr_err("Kernel has no cgroup sampling support.\n");
3787 if (rec->opts.kcore || record__threads_enabled(rec))
3788 rec->data.is_dir = true;
3790 if (record__threads_enabled(rec)) {
3791 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3792 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3795 if (record__aio_enabled(rec)) {
3796 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3801 if (rec->opts.comp_level != 0) {
3802 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3803 rec->no_buildid = true;
3806 if (rec->opts.record_switch_events &&
3807 !perf_can_record_switch_events()) {
3808 ui__error("kernel does not support recording context switch events\n");
3809 parse_options_usage(record_usage, record_options, "switch-events", 0);
3814 if (switch_output_setup(rec)) {
3815 parse_options_usage(record_usage, record_options, "switch-output", 0);
3820 if (rec->switch_output.time) {
3821 signal(SIGALRM, alarm_sig_handler);
3822 alarm(rec->switch_output.time);
3825 if (rec->switch_output.num_files) {
3826 rec->switch_output.filenames = calloc(sizeof(char *),
3827 rec->switch_output.num_files);
3828 if (!rec->switch_output.filenames) {
3834 if (rec->timestamp_filename && record__threads_enabled(rec)) {
3835 rec->timestamp_filename = false;
3836 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3840 * Allow aliases to facilitate the lookup of symbols for address
3841 * filters. Refer to auxtrace_parse_filters().
3843 symbol_conf.allow_aliases = true;
3847 err = record__auxtrace_init(rec);
3854 err = bpf__setup_stdout(rec->evlist);
3856 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3857 pr_err("ERROR: Setup BPF stdout failed: %s\n",
3864 if (rec->no_buildid_cache || rec->no_buildid) {
3865 disable_buildid_cache();
3866 } else if (rec->switch_output.enabled) {
3868 * In 'perf record --switch-output', disable buildid
3869 * generation by default to reduce data file switching
3870 * overhead. Still generate buildid if they are required
3873 * perf record --switch-output --no-no-buildid \
3874 * --no-no-buildid-cache
3876 * Following code equals to:
3878 * if ((rec->no_buildid || !rec->no_buildid_set) &&
3879 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3880 * disable_buildid_cache();
3882 bool disable = true;
3884 if (rec->no_buildid_set && !rec->no_buildid)
3886 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3889 rec->no_buildid = true;
3890 rec->no_buildid_cache = true;
3891 disable_buildid_cache();
3895 if (record.opts.overwrite)
3896 record.opts.tail_synthesize = true;
3898 if (rec->evlist->core.nr_entries == 0) {
3899 if (perf_pmu__has_hybrid()) {
3900 err = evlist__add_default_hybrid(rec->evlist,
3901 !record.opts.no_samples);
3903 err = __evlist__add_default(rec->evlist,
3904 !record.opts.no_samples);
3908 pr_err("Not enough memory for event selector list\n");
3913 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3914 rec->opts.no_inherit = true;
3916 err = target__validate(&rec->opts.target);
3918 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3919 ui__warning("%s\n", errbuf);
3922 err = target__parse_uid(&rec->opts.target);
3924 int saved_errno = errno;
3926 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3927 ui__error("%s", errbuf);
3933 /* Enable ignoring missing threads when -u/-p option is defined. */
3934 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
3936 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3937 pr_err("failed to use cpu list %s\n",
3938 rec->opts.target.cpu_list);
3942 rec->opts.target.hybrid = perf_pmu__has_hybrid();
3944 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3945 arch__add_leaf_frame_record_opts(&rec->opts);
3948 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
3949 usage_with_options(record_usage, record_options);
3951 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3956 * We take all buildids when the file contains
3957 * AUX area tracing data because we do not decode the
3958 * trace because it would take too long.
3960 if (rec->opts.full_auxtrace)
3961 rec->buildid_all = true;
3963 if (rec->opts.text_poke) {
3964 err = record__config_text_poke(rec->evlist);
3966 pr_err("record__config_text_poke failed, error %d\n", err);
3971 if (record_opts__config(&rec->opts)) {
3976 err = record__init_thread_masks(rec);
3978 pr_err("Failed to initialize parallel data streaming masks\n");
3982 if (rec->opts.nr_cblocks > nr_cblocks_max)
3983 rec->opts.nr_cblocks = nr_cblocks_max;
3984 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
3986 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
3987 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
3989 if (rec->opts.comp_level > comp_level_max)
3990 rec->opts.comp_level = comp_level_max;
3991 pr_debug("comp level: %d\n", rec->opts.comp_level);
3993 err = __cmd_record(&record, argc, argv);
3995 evlist__delete(rec->evlist);
3997 auxtrace_record__free(rec->itr);
3999 record__free_thread_masks(rec, rec->nr_threads);
4000 rec->nr_threads = 0;
4001 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4005 static void snapshot_sig_handler(int sig __maybe_unused)
4007 struct record *rec = &record;
4009 hit_auxtrace_snapshot_trigger(rec);
4011 if (switch_output_signal(rec))
4012 trigger_hit(&switch_output_trigger);
4015 static void alarm_sig_handler(int sig __maybe_unused)
4017 struct record *rec = &record;
4019 if (switch_output_time(rec))
4020 trigger_hit(&switch_output_trigger);