4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/cgroup.h"
23 #include "util/color.h"
24 #include "util/debug.h"
26 #include "util/event.h"
27 #include "util/evlist.h"
28 #include <subcmd/exec-cmd.h>
29 #include "util/machine.h"
30 #include "util/path.h"
31 #include "util/session.h"
32 #include "util/thread.h"
33 #include <subcmd/parse-options.h>
34 #include "util/strlist.h"
35 #include "util/intlist.h"
36 #include "util/thread_map.h"
37 #include "util/stat.h"
38 #include "trace/beauty/beauty.h"
39 #include "trace-event.h"
40 #include "util/parse-events.h"
41 #include "util/bpf-loader.h"
42 #include "callchain.h"
43 #include "print_binary.h"
45 #include "syscalltbl.h"
46 #include "rb_resort.h"
54 #include <linux/err.h>
55 #include <linux/filter.h>
56 #include <linux/kernel.h>
57 #include <linux/random.h>
58 #include <linux/stringify.h>
59 #include <linux/time64.h>
62 #include "sane_ctype.h"
65 # define O_CLOEXEC 02000000
68 #ifndef F_LINUX_SPECIFIC_BASE
69 # define F_LINUX_SPECIFIC_BASE 1024
73 struct perf_tool tool;
74 struct syscalltbl *sctbl;
77 struct syscall *table;
79 struct perf_evsel *sys_enter,
83 struct record_opts opts;
84 struct perf_evlist *evlist;
86 struct thread *current;
87 struct cgroup *cgroup;
90 unsigned long nr_events;
91 struct strlist *ev_qualifier;
100 double duration_filter;
106 unsigned int max_stack;
107 unsigned int min_stack;
108 bool not_ev_qualifier;
112 bool multiple_threads;
118 bool show_tool_stats;
120 bool kernel_syscallchains;
130 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
131 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
135 #define TP_UINT_FIELD(bits) \
136 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
148 #define TP_UINT_FIELD__SWAPPED(bits) \
149 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
152 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
153 return bswap_##bits(value);\
156 TP_UINT_FIELD__SWAPPED(16);
157 TP_UINT_FIELD__SWAPPED(32);
158 TP_UINT_FIELD__SWAPPED(64);
160 static int tp_field__init_uint(struct tp_field *field,
161 struct format_field *format_field,
164 field->offset = format_field->offset;
166 switch (format_field->size) {
168 field->integer = tp_field__u8;
171 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
174 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
177 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
186 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
188 return sample->raw_data + field->offset;
191 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
193 field->offset = format_field->offset;
194 field->pointer = tp_field__ptr;
201 struct tp_field args, ret;
205 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
206 struct tp_field *field,
209 struct format_field *format_field = perf_evsel__field(evsel, name);
211 if (format_field == NULL)
214 return tp_field__init_uint(field, format_field, evsel->needs_swap);
217 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
218 ({ struct syscall_tp *sc = evsel->priv;\
219 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
221 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
222 struct tp_field *field,
225 struct format_field *format_field = perf_evsel__field(evsel, name);
227 if (format_field == NULL)
230 return tp_field__init_ptr(field, format_field);
233 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
234 ({ struct syscall_tp *sc = evsel->priv;\
235 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
237 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
240 perf_evsel__delete(evsel);
243 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
245 evsel->priv = malloc(sizeof(struct syscall_tp));
246 if (evsel->priv != NULL) {
247 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
250 evsel->handler = handler;
261 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
263 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
265 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
267 evsel = perf_evsel__newtp("syscalls", direction);
272 if (perf_evsel__init_syscall_tp(evsel, handler))
278 perf_evsel__delete_priv(evsel);
282 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.integer(&fields->name, sample); })
286 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
287 ({ struct syscall_tp *fields = evsel->priv; \
288 fields->name.pointer(&fields->name, sample); })
290 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
292 int idx = val - sa->offset;
294 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
295 return scnprintf(bf, size, intfmt, val);
297 return scnprintf(bf, size, "%s", sa->entries[idx]);
300 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
302 struct syscall_arg *arg)
304 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
307 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
308 struct syscall_arg *arg)
310 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
313 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317 struct strarray **entries;
320 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
321 .nr_entries = ARRAY_SIZE(array), \
325 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
326 struct syscall_arg *arg)
328 struct strarrays *sas = arg->parm;
331 for (i = 0; i < sas->nr_entries; ++i) {
332 struct strarray *sa = sas->entries[i];
333 int idx = arg->val - sa->offset;
335 if (idx >= 0 && idx < sa->nr_entries) {
336 if (sa->entries[idx] == NULL)
338 return scnprintf(bf, size, "%s", sa->entries[idx]);
342 return scnprintf(bf, size, "%d", arg->val);
346 #define AT_FDCWD -100
349 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
350 struct syscall_arg *arg)
355 return scnprintf(bf, size, "CWD");
357 return syscall_arg__scnprintf_fd(bf, size, arg);
360 #define SCA_FDAT syscall_arg__scnprintf_fd_at
362 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
363 struct syscall_arg *arg);
365 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
367 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
369 return scnprintf(bf, size, "%#lx", arg->val);
372 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
374 return scnprintf(bf, size, "%d", arg->val);
377 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
379 return scnprintf(bf, size, "%ld", arg->val);
382 static const char *bpf_cmd[] = {
383 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
384 "MAP_GET_NEXT_KEY", "PROG_LOAD",
386 static DEFINE_STRARRAY(bpf_cmd);
388 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
389 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
391 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
392 static DEFINE_STRARRAY(itimers);
394 static const char *keyctl_options[] = {
395 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
396 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
397 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
398 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
399 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
401 static DEFINE_STRARRAY(keyctl_options);
403 static const char *whences[] = { "SET", "CUR", "END",
411 static DEFINE_STRARRAY(whences);
413 static const char *fcntl_cmds[] = {
414 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
415 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
416 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
419 static DEFINE_STRARRAY(fcntl_cmds);
421 static const char *fcntl_linux_specific_cmds[] = {
422 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
423 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
424 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
427 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
429 static struct strarray *fcntl_cmds_arrays[] = {
430 &strarray__fcntl_cmds,
431 &strarray__fcntl_linux_specific_cmds,
434 static DEFINE_STRARRAYS(fcntl_cmds_arrays);
436 static const char *rlimit_resources[] = {
437 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
438 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
441 static DEFINE_STRARRAY(rlimit_resources);
443 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
444 static DEFINE_STRARRAY(sighow);
446 static const char *clockid[] = {
447 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
448 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
449 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
451 static DEFINE_STRARRAY(clockid);
453 static const char *socket_families[] = {
454 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
455 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
456 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
457 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
458 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
459 "ALG", "NFC", "VSOCK",
461 static DEFINE_STRARRAY(socket_families);
463 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
464 struct syscall_arg *arg)
469 if (mode == F_OK) /* 0 */
470 return scnprintf(bf, size, "F");
472 if (mode & n##_OK) { \
473 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
483 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
488 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
490 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
491 struct syscall_arg *arg);
493 #define SCA_FILENAME syscall_arg__scnprintf_filename
495 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
496 struct syscall_arg *arg)
498 int printed = 0, flags = arg->val;
501 if (flags & O_##n) { \
502 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
511 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
516 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
518 #ifndef GRND_NONBLOCK
519 #define GRND_NONBLOCK 0x0001
522 #define GRND_RANDOM 0x0002
525 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
526 struct syscall_arg *arg)
528 int printed = 0, flags = arg->val;
531 if (flags & GRND_##n) { \
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
533 flags &= ~GRND_##n; \
541 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548 #define STRARRAY(name, array) \
549 { .scnprintf = SCA_STRARRAY, \
550 .parm = &strarray__##array, }
552 #include "trace/beauty/arch_errno_names.c"
553 #include "trace/beauty/eventfd.c"
554 #include "trace/beauty/futex_op.c"
555 #include "trace/beauty/futex_val3.c"
556 #include "trace/beauty/mmap.c"
557 #include "trace/beauty/mode_t.c"
558 #include "trace/beauty/msg_flags.c"
559 #include "trace/beauty/open_flags.c"
560 #include "trace/beauty/perf_event_open.c"
561 #include "trace/beauty/pid.c"
562 #include "trace/beauty/sched_policy.c"
563 #include "trace/beauty/seccomp.c"
564 #include "trace/beauty/signum.c"
565 #include "trace/beauty/socket_type.c"
566 #include "trace/beauty/waitid_options.c"
568 struct syscall_arg_fmt {
569 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
575 static struct syscall_fmt {
578 struct syscall_arg_fmt arg[6];
585 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
587 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
588 { .name = "brk", .hexret = true,
589 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
590 { .name = "clock_gettime",
591 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
592 { .name = "clone", .errpid = true, .nr_args = 5,
593 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
594 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
595 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
596 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
597 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
599 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
600 { .name = "epoll_ctl",
601 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
602 { .name = "eventfd2",
603 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
604 { .name = "fchmodat",
605 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
606 { .name = "fchownat",
607 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
609 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
610 .parm = &strarrays__fcntl_cmds_arrays,
611 .show_zero = true, },
612 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
614 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
615 { .name = "fstat", .alias = "newfstat", },
616 { .name = "fstatat", .alias = "newfstatat", },
618 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
619 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
620 { .name = "futimesat",
621 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
622 { .name = "getitimer",
623 .arg = { [0] = STRARRAY(which, itimers), }, },
624 { .name = "getpid", .errpid = true, },
625 { .name = "getpgid", .errpid = true, },
626 { .name = "getppid", .errpid = true, },
627 { .name = "getrandom",
628 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
629 { .name = "getrlimit",
630 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
631 { .name = "gettid", .errpid = true, },
634 #if defined(__i386__) || defined(__x86_64__)
636 * FIXME: Make this available to all arches.
638 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
639 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
641 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
643 { .name = "kcmp", .nr_args = 5,
644 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
645 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
646 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
647 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
648 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
650 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
652 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
654 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
656 .arg = { [2] = STRARRAY(whence, whences), }, },
657 { .name = "lstat", .alias = "newlstat", },
659 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
660 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
662 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
664 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
666 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
667 { .name = "mlockall",
668 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
669 { .name = "mmap", .hexret = true,
670 /* The standard mmap maps to old_mmap on s390x */
671 #if defined(__s390x__)
674 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
675 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
676 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
677 { .name = "mprotect",
678 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
679 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
680 { .name = "mq_unlink",
681 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
682 { .name = "mremap", .hexret = true,
683 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
684 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
685 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
687 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
689 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
690 { .name = "name_to_handle_at",
691 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
692 { .name = "newfstatat",
693 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
695 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
696 { .name = "open_by_handle_at",
697 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
698 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
700 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
701 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
702 { .name = "perf_event_open",
703 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
704 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
705 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
707 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
708 { .name = "pkey_alloc",
709 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
710 { .name = "pkey_free",
711 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
712 { .name = "pkey_mprotect",
713 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
714 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
715 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
716 { .name = "poll", .timeout = true, },
717 { .name = "ppoll", .timeout = true, },
718 { .name = "prctl", .alias = "arch_prctl",
719 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
720 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
721 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
722 { .name = "pread", .alias = "pread64", },
723 { .name = "preadv", .alias = "pread", },
724 { .name = "prlimit64",
725 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
726 { .name = "pwrite", .alias = "pwrite64", },
727 { .name = "readlinkat",
728 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
729 { .name = "recvfrom",
730 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
731 { .name = "recvmmsg",
732 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
734 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
735 { .name = "renameat",
736 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
737 { .name = "rt_sigaction",
738 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
739 { .name = "rt_sigprocmask",
740 .arg = { [0] = STRARRAY(how, sighow), }, },
741 { .name = "rt_sigqueueinfo",
742 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
743 { .name = "rt_tgsigqueueinfo",
744 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
745 { .name = "sched_setscheduler",
746 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
748 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
749 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
750 { .name = "select", .timeout = true, },
751 { .name = "sendmmsg",
752 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
754 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
756 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
757 { .name = "set_tid_address", .errpid = true, },
758 { .name = "setitimer",
759 .arg = { [0] = STRARRAY(which, itimers), }, },
760 { .name = "setrlimit",
761 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
763 .arg = { [0] = STRARRAY(family, socket_families),
764 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
765 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
766 { .name = "socketpair",
767 .arg = { [0] = STRARRAY(family, socket_families),
768 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
769 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
770 { .name = "stat", .alias = "newstat", },
772 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
773 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
774 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
776 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
778 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
779 { .name = "symlinkat",
780 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
782 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
784 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
785 { .name = "uname", .alias = "newuname", },
786 { .name = "unlinkat",
787 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
788 { .name = "utimensat",
789 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
790 { .name = "wait4", .errpid = true,
791 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
792 { .name = "waitid", .errpid = true,
793 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
796 static int syscall_fmt__cmp(const void *name, const void *fmtp)
798 const struct syscall_fmt *fmt = fmtp;
799 return strcmp(name, fmt->name);
802 static struct syscall_fmt *syscall_fmt__find(const char *name)
804 const int nmemb = ARRAY_SIZE(syscall_fmts);
805 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
809 struct event_format *tp_format;
811 struct format_field *args;
814 struct syscall_fmt *fmt;
815 struct syscall_arg_fmt *arg_fmt;
819 * We need to have this 'calculated' boolean because in some cases we really
820 * don't know what is the duration of a syscall, for instance, when we start
821 * a session and some threads are waiting for a syscall to finish, say 'poll',
822 * in which case all we can do is to print "( ? ) for duration and for the
825 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
827 double duration = (double)t / NSEC_PER_MSEC;
828 size_t printed = fprintf(fp, "(");
831 printed += fprintf(fp, " ");
832 else if (duration >= 1.0)
833 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
834 else if (duration >= 0.01)
835 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
837 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
838 return printed + fprintf(fp, "): ");
842 * filename.ptr: The filename char pointer that will be vfs_getname'd
843 * filename.entry_str_pos: Where to insert the string translated from
844 * filename.ptr by the vfs_getname tracepoint/kprobe.
845 * ret_scnprintf: syscall args may set this to a different syscall return
846 * formatter, for instance, fcntl may return fds, file flags, etc.
848 struct thread_trace {
851 unsigned long nr_events;
852 unsigned long pfmaj, pfmin;
855 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
858 short int entry_str_pos;
860 unsigned int namelen;
868 struct intlist *syscall_stats;
871 static struct thread_trace *thread_trace__new(void)
873 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
876 ttrace->paths.max = -1;
878 ttrace->syscall_stats = intlist__new(NULL);
883 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
885 struct thread_trace *ttrace;
890 if (thread__priv(thread) == NULL)
891 thread__set_priv(thread, thread_trace__new());
893 if (thread__priv(thread) == NULL)
896 ttrace = thread__priv(thread);
901 color_fprintf(fp, PERF_COLOR_RED,
902 "WARNING: not enough memory, dropping samples!\n");
907 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
908 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
910 struct thread_trace *ttrace = thread__priv(arg->thread);
912 ttrace->ret_scnprintf = ret_scnprintf;
915 #define TRACE_PFMAJ (1 << 0)
916 #define TRACE_PFMIN (1 << 1)
918 static const size_t trace__entry_str_size = 2048;
920 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
922 struct thread_trace *ttrace = thread__priv(thread);
924 if (fd > ttrace->paths.max) {
925 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
930 if (ttrace->paths.max != -1) {
931 memset(npath + ttrace->paths.max + 1, 0,
932 (fd - ttrace->paths.max) * sizeof(char *));
934 memset(npath, 0, (fd + 1) * sizeof(char *));
937 ttrace->paths.table = npath;
938 ttrace->paths.max = fd;
941 ttrace->paths.table[fd] = strdup(pathname);
943 return ttrace->paths.table[fd] != NULL ? 0 : -1;
946 static int thread__read_fd_path(struct thread *thread, int fd)
948 char linkname[PATH_MAX], pathname[PATH_MAX];
952 if (thread->pid_ == thread->tid) {
953 scnprintf(linkname, sizeof(linkname),
954 "/proc/%d/fd/%d", thread->pid_, fd);
956 scnprintf(linkname, sizeof(linkname),
957 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
960 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
963 ret = readlink(linkname, pathname, sizeof(pathname));
965 if (ret < 0 || ret > st.st_size)
968 pathname[ret] = '\0';
969 return trace__set_fd_pathname(thread, fd, pathname);
972 static const char *thread__fd_path(struct thread *thread, int fd,
975 struct thread_trace *ttrace = thread__priv(thread);
983 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
986 ++trace->stats.proc_getname;
987 if (thread__read_fd_path(thread, fd))
991 return ttrace->paths.table[fd];
994 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
997 size_t printed = scnprintf(bf, size, "%d", fd);
998 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1001 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1006 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1008 size_t printed = scnprintf(bf, size, "%d", fd);
1009 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1012 const char *path = thread__fd_path(thread, fd, trace);
1015 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1017 thread__put(thread);
1023 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1024 struct syscall_arg *arg)
1027 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1028 struct thread_trace *ttrace = thread__priv(arg->thread);
1030 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1031 zfree(&ttrace->paths.table[fd]);
1036 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1039 struct thread_trace *ttrace = thread__priv(thread);
1041 ttrace->filename.ptr = ptr;
1042 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1045 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1046 struct syscall_arg *arg)
1048 unsigned long ptr = arg->val;
1050 if (!arg->trace->vfs_getname)
1051 return scnprintf(bf, size, "%#x", ptr);
1053 thread__set_filename_pos(arg->thread, bf, ptr);
1057 static bool trace__filter_duration(struct trace *trace, double t)
1059 return t < (trace->duration_filter * NSEC_PER_MSEC);
1062 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1064 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1066 return fprintf(fp, "%10.3f ", ts);
1070 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1071 * using ttrace->entry_time for a thread that receives a sys_exit without
1072 * first having received a sys_enter ("poll" issued before tracing session
1073 * starts, lost sys_enter exit due to ring buffer overflow).
1075 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1078 return __trace__fprintf_tstamp(trace, tstamp, fp);
1080 return fprintf(fp, " ? ");
1083 static bool done = false;
1084 static bool interrupted = false;
1086 static void sig_handler(int sig)
1089 interrupted = sig == SIGINT;
1092 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1093 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1095 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1096 printed += fprintf_duration(duration, duration_calculated, fp);
1098 if (trace->multiple_threads) {
1099 if (trace->show_comm)
1100 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1101 printed += fprintf(fp, "%d ", thread->tid);
1107 static int trace__process_event(struct trace *trace, struct machine *machine,
1108 union perf_event *event, struct perf_sample *sample)
1112 switch (event->header.type) {
1113 case PERF_RECORD_LOST:
1114 color_fprintf(trace->output, PERF_COLOR_RED,
1115 "LOST %" PRIu64 " events!\n", event->lost.lost);
1116 ret = machine__process_lost_event(machine, event, sample);
1119 ret = machine__process_event(machine, event, sample);
1126 static int trace__tool_process(struct perf_tool *tool,
1127 union perf_event *event,
1128 struct perf_sample *sample,
1129 struct machine *machine)
1131 struct trace *trace = container_of(tool, struct trace, tool);
1132 return trace__process_event(trace, machine, event, sample);
1135 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1137 struct machine *machine = vmachine;
1139 if (machine->kptr_restrict_warned)
1142 if (symbol_conf.kptr_restrict) {
1143 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1144 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1145 "Kernel samples will not be resolved.\n");
1146 machine->kptr_restrict_warned = true;
1150 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1153 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1155 int err = symbol__init(NULL);
1160 trace->host = machine__new_host();
1161 if (trace->host == NULL)
1164 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1168 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1169 evlist->threads, trace__tool_process, false,
1170 trace->opts.proc_map_timeout, 1);
1178 static void trace__symbols__exit(struct trace *trace)
1180 machine__exit(trace->host);
1186 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1190 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1191 nr_args = sc->fmt->nr_args;
1193 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1194 if (sc->arg_fmt == NULL)
1197 for (idx = 0; idx < nr_args; ++idx) {
1199 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1202 sc->nr_args = nr_args;
1206 static int syscall__set_arg_fmts(struct syscall *sc)
1208 struct format_field *field;
1211 for (field = sc->args; field; field = field->next, ++idx) {
1212 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1215 if (strcmp(field->type, "const char *") == 0 &&
1216 (strcmp(field->name, "filename") == 0 ||
1217 strcmp(field->name, "path") == 0 ||
1218 strcmp(field->name, "pathname") == 0))
1219 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1220 else if (field->flags & FIELD_IS_POINTER)
1221 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
1222 else if (strcmp(field->type, "pid_t") == 0)
1223 sc->arg_fmt[idx].scnprintf = SCA_PID;
1224 else if (strcmp(field->type, "umode_t") == 0)
1225 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1226 else if ((strcmp(field->type, "int") == 0 ||
1227 strcmp(field->type, "unsigned int") == 0 ||
1228 strcmp(field->type, "long") == 0) &&
1229 (len = strlen(field->name)) >= 2 &&
1230 strcmp(field->name + len - 2, "fd") == 0) {
1232 * /sys/kernel/tracing/events/syscalls/sys_enter*
1233 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1238 sc->arg_fmt[idx].scnprintf = SCA_FD;
1245 static int trace__read_syscall_info(struct trace *trace, int id)
1249 const char *name = syscalltbl__name(trace->sctbl, id);
1254 if (id > trace->syscalls.max) {
1255 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1257 if (nsyscalls == NULL)
1260 if (trace->syscalls.max != -1) {
1261 memset(nsyscalls + trace->syscalls.max + 1, 0,
1262 (id - trace->syscalls.max) * sizeof(*sc));
1264 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1267 trace->syscalls.table = nsyscalls;
1268 trace->syscalls.max = id;
1271 sc = trace->syscalls.table + id;
1274 sc->fmt = syscall_fmt__find(sc->name);
1276 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1277 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1279 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1280 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1281 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1284 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1287 if (IS_ERR(sc->tp_format))
1290 sc->args = sc->tp_format->format.fields;
1292 * We need to check and discard the first variable '__syscall_nr'
1293 * or 'nr' that mean the syscall number. It is needless here.
1294 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1296 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1297 sc->args = sc->args->next;
1301 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1303 return syscall__set_arg_fmts(sc);
1306 static int trace__validate_ev_qualifier(struct trace *trace)
1309 size_t nr_allocated;
1310 struct str_node *pos;
1312 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1313 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1314 sizeof(trace->ev_qualifier_ids.entries[0]));
1316 if (trace->ev_qualifier_ids.entries == NULL) {
1317 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1323 nr_allocated = trace->ev_qualifier_ids.nr;
1326 strlist__for_each_entry(pos, trace->ev_qualifier) {
1327 const char *sc = pos->s;
1328 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1331 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1336 fputs("Error:\tInvalid syscall ", trace->output);
1339 fputs(", ", trace->output);
1342 fputs(sc, trace->output);
1345 trace->ev_qualifier_ids.entries[i++] = id;
1346 if (match_next == -1)
1350 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1353 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1357 entries = realloc(trace->ev_qualifier_ids.entries,
1358 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1359 if (entries == NULL) {
1361 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1364 trace->ev_qualifier_ids.entries = entries;
1366 trace->ev_qualifier_ids.nr++;
1367 trace->ev_qualifier_ids.entries[i++] = id;
1372 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1373 "\nHint:\tand: 'man syscalls'\n", trace->output);
1375 zfree(&trace->ev_qualifier_ids.entries);
1376 trace->ev_qualifier_ids.nr = 0;
1383 * args is to be interpreted as a series of longs but we need to handle
1384 * 8-byte unaligned accesses. args points to raw_data within the event
1385 * and raw_data is guaranteed to be 8-byte unaligned because it is
1386 * preceded by raw_size which is a u32. So we need to copy args to a temp
1387 * variable to read it. Most notably this avoids extended load instructions
1388 * on unaligned addresses
1390 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1393 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1395 memcpy(&val, p, sizeof(val));
1399 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1400 struct syscall_arg *arg)
1402 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1403 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1405 return scnprintf(bf, size, "arg%d: ", arg->idx);
1408 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1409 struct syscall_arg *arg, unsigned long val)
1411 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1413 if (sc->arg_fmt[arg->idx].parm)
1414 arg->parm = sc->arg_fmt[arg->idx].parm;
1415 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1417 return scnprintf(bf, size, "%ld", val);
1420 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1421 unsigned char *args, struct trace *trace,
1422 struct thread *thread)
1427 struct syscall_arg arg = {
1434 struct thread_trace *ttrace = thread__priv(thread);
1437 * Things like fcntl will set this in its 'cmd' formatter to pick the
1438 * right formatter for the return value (an fd? file flags?), which is
1439 * not needed for syscalls that always return a given type, say an fd.
1441 ttrace->ret_scnprintf = NULL;
1443 if (sc->args != NULL) {
1444 struct format_field *field;
1446 for (field = sc->args; field;
1447 field = field->next, ++arg.idx, bit <<= 1) {
1451 val = syscall_arg__val(&arg, arg.idx);
1454 * Suppress this argument if its value is zero and
1455 * and we don't have a string associated in an
1460 (sc->arg_fmt[arg.idx].show_zero ||
1461 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
1462 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1463 sc->arg_fmt[arg.idx].parm))
1466 printed += scnprintf(bf + printed, size - printed,
1467 "%s%s: ", printed ? ", " : "", field->name);
1468 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1470 } else if (IS_ERR(sc->tp_format)) {
1472 * If we managed to read the tracepoint /format file, then we
1473 * may end up not having any args, like with gettid(), so only
1474 * print the raw args when we didn't manage to read it.
1476 while (arg.idx < sc->nr_args) {
1479 val = syscall_arg__val(&arg, arg.idx);
1481 printed += scnprintf(bf + printed, size - printed, ", ");
1482 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1483 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1493 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1494 union perf_event *event,
1495 struct perf_sample *sample);
1497 static struct syscall *trace__syscall_info(struct trace *trace,
1498 struct perf_evsel *evsel, int id)
1504 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1505 * before that, leaving at a higher verbosity level till that is
1506 * explained. Reproduced with plain ftrace with:
1508 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1509 * grep "NR -1 " /t/trace_pipe
1511 * After generating some load on the machine.
1515 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1516 id, perf_evsel__name(evsel), ++n);
1521 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1522 trace__read_syscall_info(trace, id))
1525 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1528 return &trace->syscalls.table[id];
1532 fprintf(trace->output, "Problems reading syscall %d", id);
1533 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1534 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1535 fputs(" information\n", trace->output);
1540 static void thread__update_stats(struct thread_trace *ttrace,
1541 int id, struct perf_sample *sample)
1543 struct int_node *inode;
1544 struct stats *stats;
1547 inode = intlist__findnew(ttrace->syscall_stats, id);
1551 stats = inode->priv;
1552 if (stats == NULL) {
1553 stats = malloc(sizeof(struct stats));
1557 inode->priv = stats;
1560 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1561 duration = sample->time - ttrace->entry_time;
1563 update_stats(stats, duration);
1566 static int trace__printf_interrupted_entry(struct trace *trace)
1568 struct thread_trace *ttrace;
1571 if (trace->failure_only || trace->current == NULL)
1574 ttrace = thread__priv(trace->current);
1576 if (!ttrace->entry_pending)
1579 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
1580 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1581 ttrace->entry_pending = false;
1586 static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1587 struct perf_sample *sample, struct thread *thread)
1591 if (trace->print_sample) {
1592 double ts = (double)sample->time / NSEC_PER_MSEC;
1594 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1595 perf_evsel__name(evsel), ts,
1596 thread__comm_str(thread),
1597 sample->pid, sample->tid, sample->cpu);
1603 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1604 union perf_event *event __maybe_unused,
1605 struct perf_sample *sample)
1610 struct thread *thread;
1611 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1612 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1613 struct thread_trace *ttrace;
1618 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1619 ttrace = thread__trace(thread, trace->output);
1623 trace__fprintf_sample(trace, evsel, sample, thread);
1625 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1627 if (ttrace->entry_str == NULL) {
1628 ttrace->entry_str = malloc(trace__entry_str_size);
1629 if (!ttrace->entry_str)
1633 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1634 trace__printf_interrupted_entry(trace);
1636 ttrace->entry_time = sample->time;
1637 msg = ttrace->entry_str;
1638 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1640 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1641 args, trace, thread);
1644 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
1645 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1646 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1649 ttrace->entry_pending = true;
1650 /* See trace__vfs_getname & trace__sys_exit */
1651 ttrace->filename.pending_open = false;
1654 if (trace->current != thread) {
1655 thread__put(trace->current);
1656 trace->current = thread__get(thread);
1660 thread__put(thread);
1664 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1665 struct perf_sample *sample,
1666 struct callchain_cursor *cursor)
1668 struct addr_location al;
1669 int max_stack = evsel->attr.sample_max_stack ?
1670 evsel->attr.sample_max_stack :
1673 if (machine__resolve(trace->host, &al, sample) < 0 ||
1674 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
1680 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1682 /* TODO: user-configurable print_opts */
1683 const unsigned int print_opts = EVSEL__PRINT_SYM |
1685 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1687 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1690 static const char *errno_to_name(struct perf_evsel *evsel, int err)
1692 struct perf_env *env = perf_evsel__env(evsel);
1693 const char *arch_name = perf_env__arch(env);
1695 return arch_syscalls__strerrno(arch_name, err);
1698 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1699 union perf_event *event __maybe_unused,
1700 struct perf_sample *sample)
1704 bool duration_calculated = false;
1705 struct thread *thread;
1706 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1707 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1708 struct thread_trace *ttrace;
1713 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1714 ttrace = thread__trace(thread, trace->output);
1718 trace__fprintf_sample(trace, evsel, sample, thread);
1721 thread__update_stats(ttrace, id, sample);
1723 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1725 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1726 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1727 ttrace->filename.pending_open = false;
1728 ++trace->stats.vfs_getname;
1731 if (ttrace->entry_time) {
1732 duration = sample->time - ttrace->entry_time;
1733 if (trace__filter_duration(trace, duration))
1735 duration_calculated = true;
1736 } else if (trace->duration_filter)
1739 if (sample->callchain) {
1740 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1741 if (callchain_ret == 0) {
1742 if (callchain_cursor.nr < trace->min_stack)
1748 if (trace->summary_only || (ret >= 0 && trace->failure_only))
1751 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1753 if (ttrace->entry_pending) {
1754 fprintf(trace->output, "%-70s", ttrace->entry_str);
1756 fprintf(trace->output, " ... [");
1757 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1758 fprintf(trace->output, "]: %s()", sc->name);
1761 if (sc->fmt == NULL) {
1765 fprintf(trace->output, ") = %ld", ret);
1766 } else if (ret < 0) {
1768 char bf[STRERR_BUFSIZE];
1769 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1770 *e = errno_to_name(evsel, -ret);
1772 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1774 } else if (ret == 0 && sc->fmt->timeout)
1775 fprintf(trace->output, ") = 0 Timeout");
1776 else if (ttrace->ret_scnprintf) {
1778 struct syscall_arg arg = {
1783 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
1784 ttrace->ret_scnprintf = NULL;
1785 fprintf(trace->output, ") = %s", bf);
1786 } else if (sc->fmt->hexret)
1787 fprintf(trace->output, ") = %#lx", ret);
1788 else if (sc->fmt->errpid) {
1789 struct thread *child = machine__find_thread(trace->host, ret, ret);
1791 if (child != NULL) {
1792 fprintf(trace->output, ") = %ld", ret);
1793 if (child->comm_set)
1794 fprintf(trace->output, " (%s)", thread__comm_str(child));
1800 fputc('\n', trace->output);
1802 if (callchain_ret > 0)
1803 trace__fprintf_callchain(trace, sample);
1804 else if (callchain_ret < 0)
1805 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1807 ttrace->entry_pending = false;
1810 thread__put(thread);
1814 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1815 union perf_event *event __maybe_unused,
1816 struct perf_sample *sample)
1818 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1819 struct thread_trace *ttrace;
1820 size_t filename_len, entry_str_len, to_move;
1821 ssize_t remaining_space;
1823 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1828 ttrace = thread__priv(thread);
1832 filename_len = strlen(filename);
1833 if (filename_len == 0)
1836 if (ttrace->filename.namelen < filename_len) {
1837 char *f = realloc(ttrace->filename.name, filename_len + 1);
1842 ttrace->filename.namelen = filename_len;
1843 ttrace->filename.name = f;
1846 strcpy(ttrace->filename.name, filename);
1847 ttrace->filename.pending_open = true;
1849 if (!ttrace->filename.ptr)
1852 entry_str_len = strlen(ttrace->entry_str);
1853 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1854 if (remaining_space <= 0)
1857 if (filename_len > (size_t)remaining_space) {
1858 filename += filename_len - remaining_space;
1859 filename_len = remaining_space;
1862 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1863 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1864 memmove(pos + filename_len, pos, to_move);
1865 memcpy(pos, filename, filename_len);
1867 ttrace->filename.ptr = 0;
1868 ttrace->filename.entry_str_pos = 0;
1870 thread__put(thread);
1875 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1876 union perf_event *event __maybe_unused,
1877 struct perf_sample *sample)
1879 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1880 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1881 struct thread *thread = machine__findnew_thread(trace->host,
1884 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1889 ttrace->runtime_ms += runtime_ms;
1890 trace->runtime_ms += runtime_ms;
1892 thread__put(thread);
1896 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1898 perf_evsel__strval(evsel, sample, "comm"),
1899 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1901 perf_evsel__intval(evsel, sample, "vruntime"));
1905 static int bpf_output__printer(enum binary_printer_ops op,
1906 unsigned int val, void *extra __maybe_unused, FILE *fp)
1908 unsigned char ch = (unsigned char)val;
1911 case BINARY_PRINT_CHAR_DATA:
1912 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1913 case BINARY_PRINT_DATA_BEGIN:
1914 case BINARY_PRINT_LINE_BEGIN:
1915 case BINARY_PRINT_ADDR:
1916 case BINARY_PRINT_NUM_DATA:
1917 case BINARY_PRINT_NUM_PAD:
1918 case BINARY_PRINT_SEP:
1919 case BINARY_PRINT_CHAR_PAD:
1920 case BINARY_PRINT_LINE_END:
1921 case BINARY_PRINT_DATA_END:
1929 static void bpf_output__fprintf(struct trace *trace,
1930 struct perf_sample *sample)
1932 binary__fprintf(sample->raw_data, sample->raw_size, 8,
1933 bpf_output__printer, NULL, trace->output);
1936 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1937 union perf_event *event __maybe_unused,
1938 struct perf_sample *sample)
1940 int callchain_ret = 0;
1942 if (sample->callchain) {
1943 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1944 if (callchain_ret == 0) {
1945 if (callchain_cursor.nr < trace->min_stack)
1951 trace__printf_interrupted_entry(trace);
1952 trace__fprintf_tstamp(trace, sample->time, trace->output);
1954 if (trace->trace_syscalls)
1955 fprintf(trace->output, "( ): ");
1957 fprintf(trace->output, "%s:", evsel->name);
1959 if (perf_evsel__is_bpf_output(evsel)) {
1960 bpf_output__fprintf(trace, sample);
1961 } else if (evsel->tp_format) {
1962 event_format__fprintf(evsel->tp_format, sample->cpu,
1963 sample->raw_data, sample->raw_size,
1967 fprintf(trace->output, "\n");
1969 if (callchain_ret > 0)
1970 trace__fprintf_callchain(trace, sample);
1971 else if (callchain_ret < 0)
1972 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1977 static void print_location(FILE *f, struct perf_sample *sample,
1978 struct addr_location *al,
1979 bool print_dso, bool print_sym)
1982 if ((verbose > 0 || print_dso) && al->map)
1983 fprintf(f, "%s@", al->map->dso->long_name);
1985 if ((verbose > 0 || print_sym) && al->sym)
1986 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1987 al->addr - al->sym->start);
1989 fprintf(f, "0x%" PRIx64, al->addr);
1991 fprintf(f, "0x%" PRIx64, sample->addr);
1994 static int trace__pgfault(struct trace *trace,
1995 struct perf_evsel *evsel,
1996 union perf_event *event __maybe_unused,
1997 struct perf_sample *sample)
1999 struct thread *thread;
2000 struct addr_location al;
2001 char map_type = 'd';
2002 struct thread_trace *ttrace;
2004 int callchain_ret = 0;
2006 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2008 if (sample->callchain) {
2009 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2010 if (callchain_ret == 0) {
2011 if (callchain_cursor.nr < trace->min_stack)
2017 ttrace = thread__trace(thread, trace->output);
2021 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2026 if (trace->summary_only)
2029 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2031 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2033 fprintf(trace->output, "%sfault [",
2034 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2037 print_location(trace->output, sample, &al, false, true);
2039 fprintf(trace->output, "] => ");
2041 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2044 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2052 print_location(trace->output, sample, &al, true, false);
2054 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2056 if (callchain_ret > 0)
2057 trace__fprintf_callchain(trace, sample);
2058 else if (callchain_ret < 0)
2059 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2063 thread__put(thread);
2067 static void trace__set_base_time(struct trace *trace,
2068 struct perf_evsel *evsel,
2069 struct perf_sample *sample)
2072 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2073 * and don't use sample->time unconditionally, we may end up having
2074 * some other event in the future without PERF_SAMPLE_TIME for good
2075 * reason, i.e. we may not be interested in its timestamps, just in
2076 * it taking place, picking some piece of information when it
2077 * appears in our event stream (vfs_getname comes to mind).
2079 if (trace->base_time == 0 && !trace->full_time &&
2080 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2081 trace->base_time = sample->time;
2084 static int trace__process_sample(struct perf_tool *tool,
2085 union perf_event *event,
2086 struct perf_sample *sample,
2087 struct perf_evsel *evsel,
2088 struct machine *machine __maybe_unused)
2090 struct trace *trace = container_of(tool, struct trace, tool);
2091 struct thread *thread;
2094 tracepoint_handler handler = evsel->handler;
2096 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2097 if (thread && thread__is_filtered(thread))
2100 trace__set_base_time(trace, evsel, sample);
2104 handler(trace, evsel, event, sample);
2107 thread__put(thread);
2111 static int trace__record(struct trace *trace, int argc, const char **argv)
2113 unsigned int rec_argc, i, j;
2114 const char **rec_argv;
2115 const char * const record_args[] = {
2122 const char * const sc_args[] = { "-e", };
2123 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2124 const char * const majpf_args[] = { "-e", "major-faults" };
2125 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2126 const char * const minpf_args[] = { "-e", "minor-faults" };
2127 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2129 /* +1 is for the event string below */
2130 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2131 majpf_args_nr + minpf_args_nr + argc;
2132 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2134 if (rec_argv == NULL)
2138 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2139 rec_argv[j++] = record_args[i];
2141 if (trace->trace_syscalls) {
2142 for (i = 0; i < sc_args_nr; i++)
2143 rec_argv[j++] = sc_args[i];
2145 /* event string may be different for older kernels - e.g., RHEL6 */
2146 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2147 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2148 else if (is_valid_tracepoint("syscalls:sys_enter"))
2149 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2151 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2157 if (trace->trace_pgfaults & TRACE_PFMAJ)
2158 for (i = 0; i < majpf_args_nr; i++)
2159 rec_argv[j++] = majpf_args[i];
2161 if (trace->trace_pgfaults & TRACE_PFMIN)
2162 for (i = 0; i < minpf_args_nr; i++)
2163 rec_argv[j++] = minpf_args[i];
2165 for (i = 0; i < (unsigned int)argc; i++)
2166 rec_argv[j++] = argv[i];
2168 return cmd_record(j, rec_argv);
2171 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2173 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2175 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2180 if (perf_evsel__field(evsel, "pathname") == NULL) {
2181 perf_evsel__delete(evsel);
2185 evsel->handler = trace__vfs_getname;
2186 perf_evlist__add(evlist, evsel);
2190 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2192 struct perf_evsel *evsel;
2193 struct perf_event_attr attr = {
2194 .type = PERF_TYPE_SOFTWARE,
2198 attr.config = config;
2199 attr.sample_period = 1;
2201 event_attr_init(&attr);
2203 evsel = perf_evsel__new(&attr);
2205 evsel->handler = trace__pgfault;
2210 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2212 const u32 type = event->header.type;
2213 struct perf_evsel *evsel;
2215 if (type != PERF_RECORD_SAMPLE) {
2216 trace__process_event(trace, trace->host, event, sample);
2220 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2221 if (evsel == NULL) {
2222 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2226 trace__set_base_time(trace, evsel, sample);
2228 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2229 sample->raw_data == NULL) {
2230 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2231 perf_evsel__name(evsel), sample->tid,
2232 sample->cpu, sample->raw_size);
2234 tracepoint_handler handler = evsel->handler;
2235 handler(trace, evsel, event, sample);
2239 static int trace__add_syscall_newtp(struct trace *trace)
2242 struct perf_evlist *evlist = trace->evlist;
2243 struct perf_evsel *sys_enter, *sys_exit;
2245 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2246 if (sys_enter == NULL)
2249 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2250 goto out_delete_sys_enter;
2252 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2253 if (sys_exit == NULL)
2254 goto out_delete_sys_enter;
2256 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2257 goto out_delete_sys_exit;
2259 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2260 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2262 perf_evlist__add(evlist, sys_enter);
2263 perf_evlist__add(evlist, sys_exit);
2265 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2267 * We're interested only in the user space callchain
2268 * leading to the syscall, allow overriding that for
2269 * debugging reasons using --kernel_syscall_callchains
2271 sys_exit->attr.exclude_callchain_kernel = 1;
2274 trace->syscalls.events.sys_enter = sys_enter;
2275 trace->syscalls.events.sys_exit = sys_exit;
2281 out_delete_sys_exit:
2282 perf_evsel__delete_priv(sys_exit);
2283 out_delete_sys_enter:
2284 perf_evsel__delete_priv(sys_enter);
2288 static int trace__set_ev_qualifier_filter(struct trace *trace)
2291 struct perf_evsel *sys_exit;
2292 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2293 trace->ev_qualifier_ids.nr,
2294 trace->ev_qualifier_ids.entries);
2299 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2301 sys_exit = trace->syscalls.events.sys_exit;
2302 err = perf_evsel__append_tp_filter(sys_exit, filter);
2313 static int trace__set_filter_loop_pids(struct trace *trace)
2315 unsigned int nr = 1;
2319 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2321 while (thread && nr < ARRAY_SIZE(pids)) {
2322 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2327 if (!strcmp(thread__comm_str(parent), "sshd")) {
2328 pids[nr++] = parent->tid;
2334 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2337 static int trace__run(struct trace *trace, int argc, const char **argv)
2339 struct perf_evlist *evlist = trace->evlist;
2340 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2342 unsigned long before;
2343 const bool forks = argc > 0;
2344 bool draining = false;
2348 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2349 goto out_error_raw_syscalls;
2351 if (trace->trace_syscalls)
2352 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2354 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2355 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2356 if (pgfault_maj == NULL)
2358 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2359 perf_evlist__add(evlist, pgfault_maj);
2362 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2363 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2364 if (pgfault_min == NULL)
2366 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2367 perf_evlist__add(evlist, pgfault_min);
2371 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2372 trace__sched_stat_runtime))
2373 goto out_error_sched_stat_runtime;
2376 * If a global cgroup was set, apply it to all the events without an
2377 * explicit cgroup. I.e.:
2379 * trace -G A -e sched:*switch
2381 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
2382 * _and_ sched:sched_switch to the 'A' cgroup, while:
2384 * trace -e sched:*switch -G A
2386 * will only set the sched:sched_switch event to the 'A' cgroup, all the
2387 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
2388 * a cgroup (on the root cgroup, sys wide, etc).
2392 * trace -G A -e sched:*switch -G B
2394 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
2395 * to the 'B' cgroup.
2397 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
2398 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
2401 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
2403 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2405 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2406 goto out_delete_evlist;
2409 err = trace__symbols_init(trace, evlist);
2411 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2412 goto out_delete_evlist;
2415 perf_evlist__config(evlist, &trace->opts, &callchain_param);
2417 signal(SIGCHLD, sig_handler);
2418 signal(SIGINT, sig_handler);
2421 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2424 fprintf(trace->output, "Couldn't run the workload!\n");
2425 goto out_delete_evlist;
2429 err = perf_evlist__open(evlist);
2431 goto out_error_open;
2433 err = bpf__apply_obj_config();
2435 char errbuf[BUFSIZ];
2437 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2438 pr_err("ERROR: Apply config to BPF failed: %s\n",
2440 goto out_error_open;
2444 * Better not use !target__has_task() here because we need to cover the
2445 * case where no threads were specified in the command line, but a
2446 * workload was, and in that case we will fill in the thread_map when
2447 * we fork the workload in perf_evlist__prepare_workload.
2449 if (trace->filter_pids.nr > 0)
2450 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2451 else if (thread_map__pid(evlist->threads, 0) == -1)
2452 err = trace__set_filter_loop_pids(trace);
2457 if (trace->ev_qualifier_ids.nr > 0) {
2458 err = trace__set_ev_qualifier_filter(trace);
2462 pr_debug("event qualifier tracepoint filter: %s\n",
2463 trace->syscalls.events.sys_exit->filter);
2466 err = perf_evlist__apply_filters(evlist, &evsel);
2468 goto out_error_apply_filters;
2470 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
2472 goto out_error_mmap;
2474 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2475 perf_evlist__enable(evlist);
2478 perf_evlist__start_workload(evlist);
2480 if (trace->opts.initial_delay) {
2481 usleep(trace->opts.initial_delay * 1000);
2482 perf_evlist__enable(evlist);
2485 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2486 evlist->threads->nr > 1 ||
2487 perf_evlist__first(evlist)->attr.inherit;
2490 * Now that we already used evsel->attr to ask the kernel to setup the
2491 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2492 * trace__resolve_callchain(), allowing per-event max-stack settings
2493 * to override an explicitely set --max-stack global setting.
2495 evlist__for_each_entry(evlist, evsel) {
2496 if (evsel__has_callchain(evsel) &&
2497 evsel->attr.sample_max_stack == 0)
2498 evsel->attr.sample_max_stack = trace->max_stack;
2501 before = trace->nr_events;
2503 for (i = 0; i < evlist->nr_mmaps; i++) {
2504 union perf_event *event;
2505 struct perf_mmap *md;
2507 md = &evlist->mmap[i];
2508 if (perf_mmap__read_init(md) < 0)
2511 while ((event = perf_mmap__read_event(md)) != NULL) {
2512 struct perf_sample sample;
2516 err = perf_evlist__parse_sample(evlist, event, &sample);
2518 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2522 trace__handle_event(trace, event, &sample);
2524 perf_mmap__consume(md);
2529 if (done && !draining) {
2530 perf_evlist__disable(evlist);
2534 perf_mmap__read_done(md);
2537 if (trace->nr_events == before) {
2538 int timeout = done ? 100 : -1;
2540 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2541 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2551 thread__zput(trace->current);
2553 perf_evlist__disable(evlist);
2557 trace__fprintf_thread_summary(trace, trace->output);
2559 if (trace->show_tool_stats) {
2560 fprintf(trace->output, "Stats:\n "
2561 " vfs_getname : %" PRIu64 "\n"
2562 " proc_getname: %" PRIu64 "\n",
2563 trace->stats.vfs_getname,
2564 trace->stats.proc_getname);
2569 trace__symbols__exit(trace);
2571 perf_evlist__delete(evlist);
2572 cgroup__put(trace->cgroup);
2573 trace->evlist = NULL;
2574 trace->live = false;
2577 char errbuf[BUFSIZ];
2579 out_error_sched_stat_runtime:
2580 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2583 out_error_raw_syscalls:
2584 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2588 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2592 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2595 fprintf(trace->output, "%s\n", errbuf);
2596 goto out_delete_evlist;
2598 out_error_apply_filters:
2599 fprintf(trace->output,
2600 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2601 evsel->filter, perf_evsel__name(evsel), errno,
2602 str_error_r(errno, errbuf, sizeof(errbuf)));
2603 goto out_delete_evlist;
2606 fprintf(trace->output, "Not enough memory to run!\n");
2607 goto out_delete_evlist;
2610 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2611 goto out_delete_evlist;
2614 static int trace__replay(struct trace *trace)
2616 const struct perf_evsel_str_handler handlers[] = {
2617 { "probe:vfs_getname", trace__vfs_getname, },
2619 struct perf_data data = {
2623 .mode = PERF_DATA_MODE_READ,
2624 .force = trace->force,
2626 struct perf_session *session;
2627 struct perf_evsel *evsel;
2630 trace->tool.sample = trace__process_sample;
2631 trace->tool.mmap = perf_event__process_mmap;
2632 trace->tool.mmap2 = perf_event__process_mmap2;
2633 trace->tool.comm = perf_event__process_comm;
2634 trace->tool.exit = perf_event__process_exit;
2635 trace->tool.fork = perf_event__process_fork;
2636 trace->tool.attr = perf_event__process_attr;
2637 trace->tool.tracing_data = perf_event__process_tracing_data;
2638 trace->tool.build_id = perf_event__process_build_id;
2639 trace->tool.namespaces = perf_event__process_namespaces;
2641 trace->tool.ordered_events = true;
2642 trace->tool.ordering_requires_timestamps = true;
2644 /* add tid to output */
2645 trace->multiple_threads = true;
2647 session = perf_session__new(&data, false, &trace->tool);
2648 if (session == NULL)
2651 if (trace->opts.target.pid)
2652 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2654 if (trace->opts.target.tid)
2655 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2657 if (symbol__init(&session->header.env) < 0)
2660 trace->host = &session->machines.host;
2662 err = perf_session__set_tracepoints_handlers(session, handlers);
2666 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2667 "raw_syscalls:sys_enter");
2668 /* older kernels have syscalls tp versus raw_syscalls */
2670 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2671 "syscalls:sys_enter");
2674 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2675 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2676 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2680 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2681 "raw_syscalls:sys_exit");
2683 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2684 "syscalls:sys_exit");
2686 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2687 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2688 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2692 evlist__for_each_entry(session->evlist, evsel) {
2693 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2694 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2695 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2696 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2697 evsel->handler = trace__pgfault;
2702 err = perf_session__process_events(session);
2704 pr_err("Failed to process events, error %d", err);
2706 else if (trace->summary)
2707 trace__fprintf_thread_summary(trace, trace->output);
2710 perf_session__delete(session);
2715 static size_t trace__fprintf_threads_header(FILE *fp)
2719 printed = fprintf(fp, "\n Summary of events:\n\n");
2724 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2725 struct stats *stats;
2730 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2731 struct stats *stats = source->priv;
2733 entry->syscall = source->i;
2734 entry->stats = stats;
2735 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2738 static size_t thread__dump_stats(struct thread_trace *ttrace,
2739 struct trace *trace, FILE *fp)
2744 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2746 if (syscall_stats == NULL)
2749 printed += fprintf(fp, "\n");
2751 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2752 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2753 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2755 resort_rb__for_each_entry(nd, syscall_stats) {
2756 struct stats *stats = syscall_stats_entry->stats;
2758 double min = (double)(stats->min) / NSEC_PER_MSEC;
2759 double max = (double)(stats->max) / NSEC_PER_MSEC;
2760 double avg = avg_stats(stats);
2762 u64 n = (u64) stats->n;
2764 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2765 avg /= NSEC_PER_MSEC;
2767 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2768 printed += fprintf(fp, " %-15s", sc->name);
2769 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2770 n, syscall_stats_entry->msecs, min, avg);
2771 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2775 resort_rb__delete(syscall_stats);
2776 printed += fprintf(fp, "\n\n");
2781 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2784 struct thread_trace *ttrace = thread__priv(thread);
2790 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2792 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2793 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2794 printed += fprintf(fp, "%.1f%%", ratio);
2796 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2798 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2800 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2801 else if (fputc('\n', fp) != EOF)
2804 printed += thread__dump_stats(ttrace, trace, fp);
2809 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2811 return ttrace ? ttrace->nr_events : 0;
2814 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2815 struct thread *thread;
2818 entry->thread = rb_entry(nd, struct thread, rb_node);
2821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2823 size_t printed = trace__fprintf_threads_header(fp);
2827 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2828 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
2830 if (threads == NULL) {
2831 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2835 resort_rb__for_each_entry(nd, threads)
2836 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2838 resort_rb__delete(threads);
2843 static int trace__set_duration(const struct option *opt, const char *str,
2844 int unset __maybe_unused)
2846 struct trace *trace = opt->value;
2848 trace->duration_filter = atof(str);
2852 static int trace__set_filter_pids(const struct option *opt, const char *str,
2853 int unset __maybe_unused)
2857 struct trace *trace = opt->value;
2859 * FIXME: introduce a intarray class, plain parse csv and create a
2860 * { int nr, int entries[] } struct...
2862 struct intlist *list = intlist__new(str);
2867 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2868 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2870 if (trace->filter_pids.entries == NULL)
2873 trace->filter_pids.entries[0] = getpid();
2875 for (i = 1; i < trace->filter_pids.nr; ++i)
2876 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2878 intlist__delete(list);
2884 static int trace__open_output(struct trace *trace, const char *filename)
2888 if (!stat(filename, &st) && st.st_size) {
2889 char oldname[PATH_MAX];
2891 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2893 rename(filename, oldname);
2896 trace->output = fopen(filename, "w");
2898 return trace->output == NULL ? -errno : 0;
2901 static int parse_pagefaults(const struct option *opt, const char *str,
2902 int unset __maybe_unused)
2904 int *trace_pgfaults = opt->value;
2906 if (strcmp(str, "all") == 0)
2907 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2908 else if (strcmp(str, "maj") == 0)
2909 *trace_pgfaults |= TRACE_PFMAJ;
2910 else if (strcmp(str, "min") == 0)
2911 *trace_pgfaults |= TRACE_PFMIN;
2918 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2920 struct perf_evsel *evsel;
2922 evlist__for_each_entry(evlist, evsel)
2923 evsel->handler = handler;
2927 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2928 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2929 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2931 * It'd be better to introduce a parse_options() variant that would return a
2932 * list with the terms it didn't match to an event...
2934 static int trace__parse_events_option(const struct option *opt, const char *str,
2935 int unset __maybe_unused)
2937 struct trace *trace = (struct trace *)opt->value;
2938 const char *s = str;
2939 char *sep = NULL, *lists[2] = { NULL, NULL, };
2940 int len = strlen(str) + 1, err = -1, list, idx;
2941 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2942 char group_name[PATH_MAX];
2944 if (strace_groups_dir == NULL)
2949 trace->not_ev_qualifier = true;
2953 if ((sep = strchr(s, ',')) != NULL)
2957 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2958 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
2961 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2962 if (access(group_name, R_OK) == 0)
2967 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2969 lists[list] = malloc(len);
2970 if (lists[list] == NULL)
2972 strcpy(lists[list], s);
2982 if (lists[1] != NULL) {
2983 struct strlist_config slist_config = {
2984 .dirname = strace_groups_dir,
2987 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2988 if (trace->ev_qualifier == NULL) {
2989 fputs("Not enough memory to parse event qualifier", trace->output);
2993 if (trace__validate_ev_qualifier(trace))
2995 trace->trace_syscalls = true;
3001 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3002 "event selector. use 'perf list' to list available events",
3003 parse_events_option);
3004 err = parse_events_option(&o, lists[0], 0);
3013 static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3015 struct trace *trace = opt->value;
3017 if (!list_empty(&trace->evlist->entries))
3018 return parse_cgroups(opt, str, unset);
3020 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
3025 int cmd_trace(int argc, const char **argv)
3027 const char *trace_usage[] = {
3028 "perf trace [<options>] [<command>]",
3029 "perf trace [<options>] -- <command> [<options>]",
3030 "perf trace record [<options>] [<command>]",
3031 "perf trace record [<options>] -- <command> [<options>]",
3034 struct trace trace = {
3043 .user_freq = UINT_MAX,
3044 .user_interval = ULLONG_MAX,
3045 .no_buffering = true,
3046 .mmap_pages = UINT_MAX,
3047 .proc_map_timeout = 500,
3051 .trace_syscalls = false,
3052 .kernel_syscallchains = false,
3053 .max_stack = UINT_MAX,
3055 const char *output_name = NULL;
3056 const struct option trace_options[] = {
3057 OPT_CALLBACK('e', "event", &trace, "event",
3058 "event/syscall selector. use 'perf list' to list available events",
3059 trace__parse_events_option),
3060 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3061 "show the thread COMM next to its id"),
3062 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3063 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3064 trace__parse_events_option),
3065 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3066 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3067 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3068 "trace events on existing process id"),
3069 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3070 "trace events on existing thread id"),
3071 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3072 "pids to filter (by the kernel)", trace__set_filter_pids),
3073 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3074 "system-wide collection from all CPUs"),
3075 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3076 "list of cpus to monitor"),
3077 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3078 "child tasks do not inherit counters"),
3079 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3080 "number of mmap data pages",
3081 perf_evlist__parse_mmap_pages),
3082 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3084 OPT_CALLBACK(0, "duration", &trace, "float",
3085 "show only events with duration > N.M ms",
3086 trace__set_duration),
3087 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3088 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3089 OPT_BOOLEAN('T', "time", &trace.full_time,
3090 "Show full timestamp, not time relative to first start"),
3091 OPT_BOOLEAN(0, "failure", &trace.failure_only,
3092 "Show only syscalls that failed"),
3093 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3094 "Show only syscall summary with statistics"),
3095 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3096 "Show all syscalls and summary with statistics"),
3097 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3098 "Trace pagefaults", parse_pagefaults, "maj"),
3099 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3100 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3101 OPT_CALLBACK(0, "call-graph", &trace.opts,
3102 "record_mode[,record_size]", record_callchain_help,
3103 &record_parse_callchain_opt),
3104 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3105 "Show the kernel callchains on the syscall exit path"),
3106 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3107 "Set the minimum stack depth when parsing the callchain, "
3108 "anything below the specified depth will be ignored."),
3109 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3110 "Set the maximum stack depth when parsing the callchain, "
3111 "anything beyond the specified depth will be ignored. "
3112 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
3113 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3114 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
3115 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3116 "per thread proc mmap processing timeout in ms"),
3117 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
3118 trace__parse_cgroups),
3119 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3120 "ms to wait before starting measurement after program "
3124 bool __maybe_unused max_stack_user_set = true;
3125 bool mmap_pages_user_set = true;
3126 const char * const trace_subcommands[] = { "record", NULL };
3130 signal(SIGSEGV, sighandler_dump_stack);
3131 signal(SIGFPE, sighandler_dump_stack);
3133 trace.evlist = perf_evlist__new();
3134 trace.sctbl = syscalltbl__new();
3136 if (trace.evlist == NULL || trace.sctbl == NULL) {
3137 pr_err("Not enough memory to run!\n");
3142 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3143 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3145 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
3146 usage_with_options_msg(trace_usage, trace_options,
3147 "cgroup monitoring only available in system-wide mode");
3150 err = bpf__setup_stdout(trace.evlist);
3152 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3153 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3159 if (trace.trace_pgfaults) {
3160 trace.opts.sample_address = true;
3161 trace.opts.sample_time = true;
3164 if (trace.opts.mmap_pages == UINT_MAX)
3165 mmap_pages_user_set = false;
3167 if (trace.max_stack == UINT_MAX) {
3168 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
3169 max_stack_user_set = false;
3172 #ifdef HAVE_DWARF_UNWIND_SUPPORT
3173 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
3174 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3178 if (callchain_param.enabled) {
3179 if (!mmap_pages_user_set && geteuid() == 0)
3180 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3182 symbol_conf.use_callchain = true;
3185 if (trace.evlist->nr_entries > 0)
3186 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3188 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3189 return trace__record(&trace, argc-1, &argv[1]);
3191 /* summary_only implies summary option, but don't overwrite summary if set */
3192 if (trace.summary_only)
3193 trace.summary = trace.summary_only;
3195 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3196 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3197 trace.trace_syscalls = true;
3200 if (output_name != NULL) {
3201 err = trace__open_output(&trace, output_name);
3203 perror("failed to create output file");
3208 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3210 err = target__validate(&trace.opts.target);
3212 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3213 fprintf(trace.output, "%s", bf);
3217 err = target__parse_uid(&trace.opts.target);
3219 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3220 fprintf(trace.output, "%s", bf);
3224 if (!argc && target__none(&trace.opts.target))
3225 trace.opts.target.system_wide = true;
3228 err = trace__replay(&trace);
3230 err = trace__run(&trace, argc, argv);
3233 if (output_name != NULL)
3234 fclose(trace.output);