1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
55 return *(u##bits *)(sample->raw_data + field->offset); \
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 return bswap_##bits(value);\
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
74 static int tp_field__init_uint(struct tp_field *field,
75 struct format_field *format_field,
78 field->offset = format_field->offset;
80 switch (format_field->size) {
82 field->integer = tp_field__u8;
85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
102 return sample->raw_data + field->offset;
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
107 field->offset = format_field->offset;
108 field->pointer = tp_field__ptr;
115 struct tp_field args, ret;
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 struct tp_field *field,
123 struct format_field *format_field = perf_evsel__field(evsel, name);
125 if (format_field == NULL)
128 return tp_field__init_uint(field, format_field, evsel->needs_swap);
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 ({ struct syscall_tp *sc = evsel->priv;\
133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 struct tp_field *field,
139 struct format_field *format_field = perf_evsel__field(evsel, name);
141 if (format_field == NULL)
144 return tp_field__init_ptr(field, format_field);
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 ({ struct syscall_tp *sc = evsel->priv;\
149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
154 perf_evsel__delete(evsel);
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
159 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv != NULL) {
161 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 evsel->handler = handler;
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
181 evsel = perf_evsel__newtp("syscalls", direction);
184 if (perf_evsel__init_syscall_tp(evsel, handler))
191 perf_evsel__delete_priv(evsel);
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.integer(&fields->name, sample); })
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 ({ struct syscall_tp *fields = evsel->priv; \
201 fields->name.pointer(&fields->name, sample); })
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 void *sys_enter_handler,
205 void *sys_exit_handler)
208 struct perf_evsel *sys_enter, *sys_exit;
210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 if (sys_enter == NULL)
214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 goto out_delete_sys_enter;
217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 if (sys_exit == NULL)
219 goto out_delete_sys_enter;
221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 goto out_delete_sys_exit;
224 perf_evlist__add(evlist, sys_enter);
225 perf_evlist__add(evlist, sys_exit);
232 perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 perf_evsel__delete_priv(sys_enter);
241 struct thread *thread;
251 const char **entries;
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 .nr_entries = ARRAY_SIZE(array), \
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
261 .nr_entries = ARRAY_SIZE(array), \
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 struct strarray *sa = arg->parm;
270 int idx = arg->val - sa->offset;
272 if (idx < 0 || idx >= sa->nr_entries)
273 return scnprintf(bf, size, intfmt, arg->val);
275 return scnprintf(bf, size, "%s", sa->entries[idx]);
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
286 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
287 struct syscall_arg *arg)
289 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
292 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
294 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
295 struct syscall_arg *arg);
297 #define SCA_FD syscall_arg__scnprintf_fd
299 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
300 struct syscall_arg *arg)
305 return scnprintf(bf, size, "CWD");
307 return syscall_arg__scnprintf_fd(bf, size, arg);
310 #define SCA_FDAT syscall_arg__scnprintf_fd_at
312 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
317 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
318 struct syscall_arg *arg)
320 return scnprintf(bf, size, "%#lx", arg->val);
323 #define SCA_HEX syscall_arg__scnprintf_hex
325 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
326 struct syscall_arg *arg)
328 int printed = 0, prot = arg->val;
330 if (prot == PROT_NONE)
331 return scnprintf(bf, size, "NONE");
332 #define P_MMAP_PROT(n) \
333 if (prot & PROT_##n) { \
334 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344 P_MMAP_PROT(GROWSDOWN);
345 P_MMAP_PROT(GROWSUP);
349 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
354 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
356 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
357 struct syscall_arg *arg)
359 int printed = 0, flags = arg->val;
361 #define P_MMAP_FLAG(n) \
362 if (flags & MAP_##n) { \
363 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
368 P_MMAP_FLAG(PRIVATE);
372 P_MMAP_FLAG(ANONYMOUS);
373 P_MMAP_FLAG(DENYWRITE);
374 P_MMAP_FLAG(EXECUTABLE);
377 P_MMAP_FLAG(GROWSDOWN);
379 P_MMAP_FLAG(HUGETLB);
382 P_MMAP_FLAG(NONBLOCK);
383 P_MMAP_FLAG(NORESERVE);
384 P_MMAP_FLAG(POPULATE);
386 #ifdef MAP_UNINITIALIZED
387 P_MMAP_FLAG(UNINITIALIZED);
392 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
397 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
399 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
400 struct syscall_arg *arg)
402 int behavior = arg->val;
405 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
408 P_MADV_BHV(SEQUENTIAL);
409 P_MADV_BHV(WILLNEED);
410 P_MADV_BHV(DONTNEED);
412 P_MADV_BHV(DONTFORK);
414 P_MADV_BHV(HWPOISON);
415 #ifdef MADV_SOFT_OFFLINE
416 P_MADV_BHV(SOFT_OFFLINE);
418 P_MADV_BHV(MERGEABLE);
419 P_MADV_BHV(UNMERGEABLE);
421 P_MADV_BHV(HUGEPAGE);
423 #ifdef MADV_NOHUGEPAGE
424 P_MADV_BHV(NOHUGEPAGE);
427 P_MADV_BHV(DONTDUMP);
436 return scnprintf(bf, size, "%#x", behavior);
439 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
441 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
442 struct syscall_arg *arg)
444 int printed = 0, op = arg->val;
447 return scnprintf(bf, size, "NONE");
449 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
450 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
465 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
470 #define SCA_FLOCK syscall_arg__scnprintf_flock
472 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
474 enum syscall_futex_args {
475 SCF_UADDR = (1 << 0),
478 SCF_TIMEOUT = (1 << 3),
479 SCF_UADDR2 = (1 << 4),
483 int cmd = op & FUTEX_CMD_MASK;
487 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
488 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
489 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
490 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
491 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
492 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
493 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
494 P_FUTEX_OP(WAKE_OP); break;
495 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
498 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
499 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
500 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
501 default: printed = scnprintf(bf, size, "%#x", cmd); break;
504 if (op & FUTEX_PRIVATE_FLAG)
505 printed += scnprintf(bf + printed, size - printed, "|PRIV");
507 if (op & FUTEX_CLOCK_REALTIME)
508 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
513 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
515 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
516 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
518 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
519 static DEFINE_STRARRAY(itimers);
521 static const char *whences[] = { "SET", "CUR", "END",
529 static DEFINE_STRARRAY(whences);
531 static const char *fcntl_cmds[] = {
532 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
533 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
534 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
537 static DEFINE_STRARRAY(fcntl_cmds);
539 static const char *rlimit_resources[] = {
540 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
541 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
544 static DEFINE_STRARRAY(rlimit_resources);
546 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
547 static DEFINE_STRARRAY(sighow);
549 static const char *clockid[] = {
550 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
551 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
553 static DEFINE_STRARRAY(clockid);
555 static const char *socket_families[] = {
556 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
557 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
558 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
559 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
560 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
561 "ALG", "NFC", "VSOCK",
563 static DEFINE_STRARRAY(socket_families);
565 #ifndef SOCK_TYPE_MASK
566 #define SOCK_TYPE_MASK 0xf
569 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
570 struct syscall_arg *arg)
574 flags = type & ~SOCK_TYPE_MASK;
576 type &= SOCK_TYPE_MASK;
578 * Can't use a strarray, MIPS may override for ABI reasons.
581 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
586 P_SK_TYPE(SEQPACKET);
591 printed = scnprintf(bf, size, "%#x", type);
594 #define P_SK_FLAG(n) \
595 if (flags & SOCK_##n) { \
596 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
597 flags &= ~SOCK_##n; \
605 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
610 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
613 #define MSG_PROBE 0x10
615 #ifndef MSG_WAITFORONE
616 #define MSG_WAITFORONE 0x10000
618 #ifndef MSG_SENDPAGE_NOTLAST
619 #define MSG_SENDPAGE_NOTLAST 0x20000
622 #define MSG_FASTOPEN 0x20000000
625 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
626 struct syscall_arg *arg)
628 int printed = 0, flags = arg->val;
631 return scnprintf(bf, size, "NONE");
632 #define P_MSG_FLAG(n) \
633 if (flags & MSG_##n) { \
634 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
640 P_MSG_FLAG(DONTROUTE);
645 P_MSG_FLAG(DONTWAIT);
652 P_MSG_FLAG(ERRQUEUE);
653 P_MSG_FLAG(NOSIGNAL);
655 P_MSG_FLAG(WAITFORONE);
656 P_MSG_FLAG(SENDPAGE_NOTLAST);
657 P_MSG_FLAG(FASTOPEN);
658 P_MSG_FLAG(CMSG_CLOEXEC);
662 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
667 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
669 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
670 struct syscall_arg *arg)
675 if (mode == F_OK) /* 0 */
676 return scnprintf(bf, size, "F");
678 if (mode & n##_OK) { \
679 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
689 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
694 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
696 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
697 struct syscall_arg *arg)
699 int printed = 0, flags = arg->val;
701 if (!(flags & O_CREAT))
702 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
705 return scnprintf(bf, size, "RDONLY");
707 if (flags & O_##n) { \
708 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
732 if ((flags & O_SYNC) == O_SYNC)
733 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
745 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
750 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
752 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
753 struct syscall_arg *arg)
755 int printed = 0, flags = arg->val;
758 return scnprintf(bf, size, "NONE");
760 if (flags & EFD_##n) { \
761 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
771 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
776 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
778 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
779 struct syscall_arg *arg)
781 int printed = 0, flags = arg->val;
784 if (flags & O_##n) { \
785 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
794 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
799 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
801 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
806 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
841 return scnprintf(bf, size, "%#x", sig);
844 #define SCA_SIGNUM syscall_arg__scnprintf_signum
846 #define TCGETS 0x5401
848 static const char *tioctls[] = {
849 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
850 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
851 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
852 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
853 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
854 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
855 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
856 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
857 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
858 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
859 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
860 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
861 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
862 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
863 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
866 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
868 #define STRARRAY(arg, name, array) \
869 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
870 .arg_parm = { [arg] = &strarray__##array, }
872 static struct syscall_fmt {
875 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
881 { .name = "access", .errmsg = true,
882 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
883 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
884 { .name = "brk", .hexret = true,
885 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
886 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
887 { .name = "close", .errmsg = true,
888 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
889 { .name = "connect", .errmsg = true, },
890 { .name = "dup", .errmsg = true,
891 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
892 { .name = "dup2", .errmsg = true,
893 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
894 { .name = "dup3", .errmsg = true,
895 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
896 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
897 { .name = "eventfd2", .errmsg = true,
898 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
899 { .name = "faccessat", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
901 { .name = "fadvise64", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "fallocate", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchdir", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
907 { .name = "fchmod", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
909 { .name = "fchmodat", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
911 { .name = "fchown", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 { .name = "fchownat", .errmsg = true,
914 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
915 { .name = "fcntl", .errmsg = true,
916 .arg_scnprintf = { [0] = SCA_FD, /* fd */
917 [1] = SCA_STRARRAY, /* cmd */ },
918 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
919 { .name = "fdatasync", .errmsg = true,
920 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
921 { .name = "flock", .errmsg = true,
922 .arg_scnprintf = { [0] = SCA_FD, /* fd */
923 [1] = SCA_FLOCK, /* cmd */ }, },
924 { .name = "fsetxattr", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "fstat", .errmsg = true, .alias = "newfstat",
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
929 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
930 { .name = "fstatfs", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "fsync", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
934 { .name = "ftruncate", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 { .name = "futex", .errmsg = true,
937 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
938 { .name = "futimesat", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
940 { .name = "getdents", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
942 { .name = "getdents64", .errmsg = true,
943 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
944 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
945 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
946 { .name = "ioctl", .errmsg = true,
947 .arg_scnprintf = { [0] = SCA_FD, /* fd */
948 [1] = SCA_STRHEXARRAY, /* cmd */
949 [2] = SCA_HEX, /* arg */ },
950 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
951 { .name = "kill", .errmsg = true,
952 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
953 { .name = "linkat", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955 { .name = "lseek", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_FD, /* fd */
957 [2] = SCA_STRARRAY, /* whence */ },
958 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
959 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
960 { .name = "madvise", .errmsg = true,
961 .arg_scnprintf = { [0] = SCA_HEX, /* start */
962 [2] = SCA_MADV_BHV, /* behavior */ }, },
963 { .name = "mkdirat", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
965 { .name = "mknodat", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
967 { .name = "mlock", .errmsg = true,
968 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
969 { .name = "mlockall", .errmsg = true,
970 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
971 { .name = "mmap", .hexret = true,
972 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
973 [2] = SCA_MMAP_PROT, /* prot */
974 [3] = SCA_MMAP_FLAGS, /* flags */
975 [4] = SCA_FD, /* fd */ }, },
976 { .name = "mprotect", .errmsg = true,
977 .arg_scnprintf = { [0] = SCA_HEX, /* start */
978 [2] = SCA_MMAP_PROT, /* prot */ }, },
979 { .name = "mremap", .hexret = true,
980 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
981 [4] = SCA_HEX, /* new_addr */ }, },
982 { .name = "munlock", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
984 { .name = "munmap", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
986 { .name = "name_to_handle_at", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
988 { .name = "newfstatat", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
990 { .name = "open", .errmsg = true,
991 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
992 { .name = "open_by_handle_at", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
994 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
995 { .name = "openat", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
997 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
998 { .name = "pipe2", .errmsg = true,
999 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1000 { .name = "poll", .errmsg = true, .timeout = true, },
1001 { .name = "ppoll", .errmsg = true, .timeout = true, },
1002 { .name = "pread", .errmsg = true, .alias = "pread64",
1003 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1004 { .name = "preadv", .errmsg = true, .alias = "pread",
1005 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1006 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1007 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "pwritev", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1011 { .name = "read", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "readlinkat", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1015 { .name = "readv", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1017 { .name = "recvfrom", .errmsg = true,
1018 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1019 { .name = "recvmmsg", .errmsg = true,
1020 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1021 { .name = "recvmsg", .errmsg = true,
1022 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1023 { .name = "renameat", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1025 { .name = "rt_sigaction", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1027 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1028 { .name = "rt_sigqueueinfo", .errmsg = true,
1029 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1030 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1031 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1032 { .name = "select", .errmsg = true, .timeout = true, },
1033 { .name = "sendmmsg", .errmsg = true,
1034 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035 { .name = "sendmsg", .errmsg = true,
1036 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1037 { .name = "sendto", .errmsg = true,
1038 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1039 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1040 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1041 { .name = "shutdown", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "socket", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045 [1] = SCA_SK_TYPE, /* type */ },
1046 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1047 { .name = "socketpair", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1049 [1] = SCA_SK_TYPE, /* type */ },
1050 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1051 { .name = "stat", .errmsg = true, .alias = "newstat", },
1052 { .name = "symlinkat", .errmsg = true,
1053 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1054 { .name = "tgkill", .errmsg = true,
1055 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1056 { .name = "tkill", .errmsg = true,
1057 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1058 { .name = "uname", .errmsg = true, .alias = "newuname", },
1059 { .name = "unlinkat", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1061 { .name = "utimensat", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1063 { .name = "write", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 { .name = "writev", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1071 const struct syscall_fmt *fmt = fmtp;
1072 return strcmp(name, fmt->name);
1075 static struct syscall_fmt *syscall_fmt__find(const char *name)
1077 const int nmemb = ARRAY_SIZE(syscall_fmts);
1078 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1082 struct event_format *tp_format;
1085 struct syscall_fmt *fmt;
1086 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1090 static size_t fprintf_duration(unsigned long t, FILE *fp)
1092 double duration = (double)t / NSEC_PER_MSEC;
1093 size_t printed = fprintf(fp, "(");
1095 if (duration >= 1.0)
1096 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1097 else if (duration >= 0.01)
1098 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1100 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1101 return printed + fprintf(fp, "): ");
1104 struct thread_trace {
1108 unsigned long nr_events;
1116 struct intlist *syscall_stats;
1119 static struct thread_trace *thread_trace__new(void)
1121 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1124 ttrace->paths.max = -1;
1126 ttrace->syscall_stats = intlist__new(NULL);
1131 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1133 struct thread_trace *ttrace;
1138 if (thread->priv == NULL)
1139 thread->priv = thread_trace__new();
1141 if (thread->priv == NULL)
1144 ttrace = thread->priv;
1145 ++ttrace->nr_events;
1149 color_fprintf(fp, PERF_COLOR_RED,
1150 "WARNING: not enough memory, dropping samples!\n");
1155 struct perf_tool tool;
1162 struct syscall *table;
1164 struct record_opts opts;
1165 struct machine *host;
1168 unsigned long nr_events;
1169 struct strlist *ev_qualifier;
1170 const char *last_vfs_getname;
1171 struct intlist *tid_list;
1172 struct intlist *pid_list;
1173 double duration_filter;
1179 bool not_ev_qualifier;
1183 bool multiple_threads;
1187 bool show_tool_stats;
1190 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1192 struct thread_trace *ttrace = thread->priv;
1194 if (fd > ttrace->paths.max) {
1195 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1200 if (ttrace->paths.max != -1) {
1201 memset(npath + ttrace->paths.max + 1, 0,
1202 (fd - ttrace->paths.max) * sizeof(char *));
1204 memset(npath, 0, (fd + 1) * sizeof(char *));
1207 ttrace->paths.table = npath;
1208 ttrace->paths.max = fd;
1211 ttrace->paths.table[fd] = strdup(pathname);
1213 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1216 static int thread__read_fd_path(struct thread *thread, int fd)
1218 char linkname[PATH_MAX], pathname[PATH_MAX];
1222 if (thread->pid_ == thread->tid) {
1223 scnprintf(linkname, sizeof(linkname),
1224 "/proc/%d/fd/%d", thread->pid_, fd);
1226 scnprintf(linkname, sizeof(linkname),
1227 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1230 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1233 ret = readlink(linkname, pathname, sizeof(pathname));
1235 if (ret < 0 || ret > st.st_size)
1238 pathname[ret] = '\0';
1239 return trace__set_fd_pathname(thread, fd, pathname);
1242 static const char *thread__fd_path(struct thread *thread, int fd,
1243 struct trace *trace)
1245 struct thread_trace *ttrace = thread->priv;
1253 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1256 ++trace->stats.proc_getname;
1257 if (thread__read_fd_path(thread, fd)) {
1261 return ttrace->paths.table[fd];
1264 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1265 struct syscall_arg *arg)
1268 size_t printed = scnprintf(bf, size, "%d", fd);
1269 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1272 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1277 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1278 struct syscall_arg *arg)
1281 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1282 struct thread_trace *ttrace = arg->thread->priv;
1284 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1285 zfree(&ttrace->paths.table[fd]);
1290 static bool trace__filter_duration(struct trace *trace, double t)
1292 return t < (trace->duration_filter * NSEC_PER_MSEC);
1295 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1297 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1299 return fprintf(fp, "%10.3f ", ts);
1302 static bool done = false;
1303 static bool interrupted = false;
1305 static void sig_handler(int sig)
1308 interrupted = sig == SIGINT;
1311 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1312 u64 duration, u64 tstamp, FILE *fp)
1314 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1315 printed += fprintf_duration(duration, fp);
1317 if (trace->multiple_threads) {
1318 if (trace->show_comm)
1319 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1320 printed += fprintf(fp, "%d ", thread->tid);
1326 static int trace__process_event(struct trace *trace, struct machine *machine,
1327 union perf_event *event, struct perf_sample *sample)
1331 switch (event->header.type) {
1332 case PERF_RECORD_LOST:
1333 color_fprintf(trace->output, PERF_COLOR_RED,
1334 "LOST %" PRIu64 " events!\n", event->lost.lost);
1335 ret = machine__process_lost_event(machine, event, sample);
1337 ret = machine__process_event(machine, event, sample);
1344 static int trace__tool_process(struct perf_tool *tool,
1345 union perf_event *event,
1346 struct perf_sample *sample,
1347 struct machine *machine)
1349 struct trace *trace = container_of(tool, struct trace, tool);
1350 return trace__process_event(trace, machine, event, sample);
1353 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1355 int err = symbol__init();
1360 trace->host = machine__new_host();
1361 if (trace->host == NULL)
1364 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1365 evlist->threads, trace__tool_process, false);
1372 static int syscall__set_arg_fmts(struct syscall *sc)
1374 struct format_field *field;
1377 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1378 if (sc->arg_scnprintf == NULL)
1382 sc->arg_parm = sc->fmt->arg_parm;
1384 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1385 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1386 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1387 else if (field->flags & FIELD_IS_POINTER)
1388 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1395 static int trace__read_syscall_info(struct trace *trace, int id)
1399 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1404 if (id > trace->syscalls.max) {
1405 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1407 if (nsyscalls == NULL)
1410 if (trace->syscalls.max != -1) {
1411 memset(nsyscalls + trace->syscalls.max + 1, 0,
1412 (id - trace->syscalls.max) * sizeof(*sc));
1414 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1417 trace->syscalls.table = nsyscalls;
1418 trace->syscalls.max = id;
1421 sc = trace->syscalls.table + id;
1424 if (trace->ev_qualifier) {
1425 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1427 if (!(in ^ trace->not_ev_qualifier)) {
1428 sc->filtered = true;
1430 * No need to do read tracepoint information since this will be
1437 sc->fmt = syscall_fmt__find(sc->name);
1439 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1440 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1442 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1443 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1444 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1447 if (sc->tp_format == NULL)
1450 return syscall__set_arg_fmts(sc);
1453 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1454 unsigned long *args, struct trace *trace,
1455 struct thread *thread)
1459 if (sc->tp_format != NULL) {
1460 struct format_field *field;
1462 struct syscall_arg arg = {
1469 for (field = sc->tp_format->format.fields->next; field;
1470 field = field->next, ++arg.idx, bit <<= 1) {
1474 * Suppress this argument if its value is zero and
1475 * and we don't have a string associated in an
1478 if (args[arg.idx] == 0 &&
1479 !(sc->arg_scnprintf &&
1480 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1481 sc->arg_parm[arg.idx]))
1484 printed += scnprintf(bf + printed, size - printed,
1485 "%s%s: ", printed ? ", " : "", field->name);
1486 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1487 arg.val = args[arg.idx];
1489 arg.parm = sc->arg_parm[arg.idx];
1490 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1491 size - printed, &arg);
1493 printed += scnprintf(bf + printed, size - printed,
1494 "%ld", args[arg.idx]);
1501 printed += scnprintf(bf + printed, size - printed,
1503 printed ? ", " : "", i, args[i]);
1511 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1512 struct perf_sample *sample);
1514 static struct syscall *trace__syscall_info(struct trace *trace,
1515 struct perf_evsel *evsel, int id)
1521 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1522 * before that, leaving at a higher verbosity level till that is
1523 * explained. Reproduced with plain ftrace with:
1525 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1526 * grep "NR -1 " /t/trace_pipe
1528 * After generating some load on the machine.
1532 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1533 id, perf_evsel__name(evsel), ++n);
1538 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1539 trace__read_syscall_info(trace, id))
1542 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1545 return &trace->syscalls.table[id];
1549 fprintf(trace->output, "Problems reading syscall %d", id);
1550 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1551 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1552 fputs(" information\n", trace->output);
1557 static void thread__update_stats(struct thread_trace *ttrace,
1558 int id, struct perf_sample *sample)
1560 struct int_node *inode;
1561 struct stats *stats;
1564 inode = intlist__findnew(ttrace->syscall_stats, id);
1568 stats = inode->priv;
1569 if (stats == NULL) {
1570 stats = malloc(sizeof(struct stats));
1574 inode->priv = stats;
1577 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1578 duration = sample->time - ttrace->entry_time;
1580 update_stats(stats, duration);
1583 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1584 struct perf_sample *sample)
1589 struct thread *thread;
1590 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1591 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1592 struct thread_trace *ttrace;
1600 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1601 ttrace = thread__trace(thread, trace->output);
1605 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1606 ttrace = thread->priv;
1608 if (ttrace->entry_str == NULL) {
1609 ttrace->entry_str = malloc(1024);
1610 if (!ttrace->entry_str)
1614 ttrace->entry_time = sample->time;
1615 msg = ttrace->entry_str;
1616 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1618 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1619 args, trace, thread);
1621 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1622 if (!trace->duration_filter && !trace->summary_only) {
1623 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1624 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1627 ttrace->entry_pending = true;
1632 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1633 struct perf_sample *sample)
1637 struct thread *thread;
1638 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1639 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1640 struct thread_trace *ttrace;
1648 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1649 ttrace = thread__trace(thread, trace->output);
1654 thread__update_stats(ttrace, id, sample);
1656 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1658 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1659 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1660 trace->last_vfs_getname = NULL;
1661 ++trace->stats.vfs_getname;
1664 ttrace = thread->priv;
1666 ttrace->exit_time = sample->time;
1668 if (ttrace->entry_time) {
1669 duration = sample->time - ttrace->entry_time;
1670 if (trace__filter_duration(trace, duration))
1672 } else if (trace->duration_filter)
1675 if (trace->summary_only)
1678 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1680 if (ttrace->entry_pending) {
1681 fprintf(trace->output, "%-70s", ttrace->entry_str);
1683 fprintf(trace->output, " ... [");
1684 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1685 fprintf(trace->output, "]: %s()", sc->name);
1688 if (sc->fmt == NULL) {
1690 fprintf(trace->output, ") = %d", ret);
1691 } else if (ret < 0 && sc->fmt->errmsg) {
1693 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1694 *e = audit_errno_to_name(-ret);
1696 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1697 } else if (ret == 0 && sc->fmt->timeout)
1698 fprintf(trace->output, ") = 0 Timeout");
1699 else if (sc->fmt->hexret)
1700 fprintf(trace->output, ") = %#x", ret);
1704 fputc('\n', trace->output);
1706 ttrace->entry_pending = false;
1711 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1712 struct perf_sample *sample)
1714 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1718 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1719 struct perf_sample *sample)
1721 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1722 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1723 struct thread *thread = machine__findnew_thread(trace->host,
1726 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1731 ttrace->runtime_ms += runtime_ms;
1732 trace->runtime_ms += runtime_ms;
1736 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1738 perf_evsel__strval(evsel, sample, "comm"),
1739 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1741 perf_evsel__intval(evsel, sample, "vruntime"));
1745 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1747 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1748 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1751 if (trace->pid_list || trace->tid_list)
1757 static int trace__process_sample(struct perf_tool *tool,
1758 union perf_event *event __maybe_unused,
1759 struct perf_sample *sample,
1760 struct perf_evsel *evsel,
1761 struct machine *machine __maybe_unused)
1763 struct trace *trace = container_of(tool, struct trace, tool);
1766 tracepoint_handler handler = evsel->handler;
1768 if (skip_sample(trace, sample))
1771 if (!trace->full_time && trace->base_time == 0)
1772 trace->base_time = sample->time;
1776 handler(trace, evsel, sample);
1782 static int parse_target_str(struct trace *trace)
1784 if (trace->opts.target.pid) {
1785 trace->pid_list = intlist__new(trace->opts.target.pid);
1786 if (trace->pid_list == NULL) {
1787 pr_err("Error parsing process id string\n");
1792 if (trace->opts.target.tid) {
1793 trace->tid_list = intlist__new(trace->opts.target.tid);
1794 if (trace->tid_list == NULL) {
1795 pr_err("Error parsing thread id string\n");
1803 static int trace__record(int argc, const char **argv)
1805 unsigned int rec_argc, i, j;
1806 const char **rec_argv;
1807 const char * const record_args[] = {
1815 /* +1 is for the event string below */
1816 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1817 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1819 if (rec_argv == NULL)
1822 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1823 rec_argv[i] = record_args[i];
1825 /* event string may be different for older kernels - e.g., RHEL6 */
1826 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1827 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1828 else if (is_valid_tracepoint("syscalls:sys_enter"))
1829 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1831 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1836 for (j = 0; j < (unsigned int)argc; j++, i++)
1837 rec_argv[i] = argv[j];
1839 return cmd_record(i, rec_argv, NULL);
1842 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1844 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1846 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1850 if (perf_evsel__field(evsel, "pathname") == NULL) {
1851 perf_evsel__delete(evsel);
1855 evsel->handler = trace__vfs_getname;
1856 perf_evlist__add(evlist, evsel);
1859 static int trace__run(struct trace *trace, int argc, const char **argv)
1861 struct perf_evlist *evlist = perf_evlist__new();
1862 struct perf_evsel *evsel;
1864 unsigned long before;
1865 const bool forks = argc > 0;
1869 if (evlist == NULL) {
1870 fprintf(trace->output, "Not enough memory to run!\n");
1874 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1877 perf_evlist__add_vfs_getname(evlist);
1880 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1881 trace__sched_stat_runtime))
1884 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1886 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1887 goto out_delete_evlist;
1890 err = trace__symbols_init(trace, evlist);
1892 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1893 goto out_delete_evlist;
1896 perf_evlist__config(evlist, &trace->opts);
1898 signal(SIGCHLD, sig_handler);
1899 signal(SIGINT, sig_handler);
1902 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1905 fprintf(trace->output, "Couldn't run the workload!\n");
1906 goto out_delete_evlist;
1910 err = perf_evlist__open(evlist);
1912 goto out_error_open;
1914 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1916 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1917 goto out_delete_evlist;
1920 perf_evlist__enable(evlist);
1923 perf_evlist__start_workload(evlist);
1925 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1927 before = trace->nr_events;
1929 for (i = 0; i < evlist->nr_mmaps; i++) {
1930 union perf_event *event;
1932 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1933 const u32 type = event->header.type;
1934 tracepoint_handler handler;
1935 struct perf_sample sample;
1939 err = perf_evlist__parse_sample(evlist, event, &sample);
1941 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1945 if (!trace->full_time && trace->base_time == 0)
1946 trace->base_time = sample.time;
1948 if (type != PERF_RECORD_SAMPLE) {
1949 trace__process_event(trace, trace->host, event, &sample);
1953 evsel = perf_evlist__id2evsel(evlist, sample.id);
1954 if (evsel == NULL) {
1955 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1959 if (sample.raw_data == NULL) {
1960 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1961 perf_evsel__name(evsel), sample.tid,
1962 sample.cpu, sample.raw_size);
1966 handler = evsel->handler;
1967 handler(trace, evsel, &sample);
1969 perf_evlist__mmap_consume(evlist, i);
1976 if (trace->nr_events == before) {
1977 int timeout = done ? 100 : -1;
1979 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1986 perf_evlist__disable(evlist);
1990 trace__fprintf_thread_summary(trace, trace->output);
1992 if (trace->show_tool_stats) {
1993 fprintf(trace->output, "Stats:\n "
1994 " vfs_getname : %" PRIu64 "\n"
1995 " proc_getname: %" PRIu64 "\n",
1996 trace->stats.vfs_getname,
1997 trace->stats.proc_getname);
2002 perf_evlist__delete(evlist);
2004 trace->live = false;
2007 char errbuf[BUFSIZ];
2010 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2014 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2017 fprintf(trace->output, "%s\n", errbuf);
2018 goto out_delete_evlist;
2022 static int trace__replay(struct trace *trace)
2024 const struct perf_evsel_str_handler handlers[] = {
2025 { "probe:vfs_getname", trace__vfs_getname, },
2027 struct perf_data_file file = {
2029 .mode = PERF_DATA_MODE_READ,
2031 struct perf_session *session;
2032 struct perf_evsel *evsel;
2035 trace->tool.sample = trace__process_sample;
2036 trace->tool.mmap = perf_event__process_mmap;
2037 trace->tool.mmap2 = perf_event__process_mmap2;
2038 trace->tool.comm = perf_event__process_comm;
2039 trace->tool.exit = perf_event__process_exit;
2040 trace->tool.fork = perf_event__process_fork;
2041 trace->tool.attr = perf_event__process_attr;
2042 trace->tool.tracing_data = perf_event__process_tracing_data;
2043 trace->tool.build_id = perf_event__process_build_id;
2045 trace->tool.ordered_samples = true;
2046 trace->tool.ordering_requires_timestamps = true;
2048 /* add tid to output */
2049 trace->multiple_threads = true;
2051 if (symbol__init() < 0)
2054 session = perf_session__new(&file, false, &trace->tool);
2055 if (session == NULL)
2058 trace->host = &session->machines.host;
2060 err = perf_session__set_tracepoints_handlers(session, handlers);
2064 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065 "raw_syscalls:sys_enter");
2066 /* older kernels have syscalls tp versus raw_syscalls */
2068 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2069 "syscalls:sys_enter");
2070 if (evsel == NULL) {
2071 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2075 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2076 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2077 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2081 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2082 "raw_syscalls:sys_exit");
2084 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2085 "syscalls:sys_exit");
2086 if (evsel == NULL) {
2087 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2091 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2092 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2093 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2097 err = parse_target_str(trace);
2103 err = perf_session__process_events(session, &trace->tool);
2105 pr_err("Failed to process events, error %d", err);
2107 else if (trace->summary)
2108 trace__fprintf_thread_summary(trace, trace->output);
2111 perf_session__delete(session);
2116 static size_t trace__fprintf_threads_header(FILE *fp)
2120 printed = fprintf(fp, "\n Summary of events:\n\n");
2125 static size_t thread__dump_stats(struct thread_trace *ttrace,
2126 struct trace *trace, FILE *fp)
2128 struct stats *stats;
2131 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2136 printed += fprintf(fp, "\n");
2138 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2139 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2140 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2142 /* each int_node is a syscall */
2144 stats = inode->priv;
2146 double min = (double)(stats->min) / NSEC_PER_MSEC;
2147 double max = (double)(stats->max) / NSEC_PER_MSEC;
2148 double avg = avg_stats(stats);
2150 u64 n = (u64) stats->n;
2152 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2153 avg /= NSEC_PER_MSEC;
2155 sc = &trace->syscalls.table[inode->i];
2156 printed += fprintf(fp, " %-15s", sc->name);
2157 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2159 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2162 inode = intlist__next(inode);
2165 printed += fprintf(fp, "\n\n");
2170 /* struct used to pass data to per-thread function */
2171 struct summary_data {
2173 struct trace *trace;
2177 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2179 struct summary_data *data = priv;
2180 FILE *fp = data->fp;
2181 size_t printed = data->printed;
2182 struct trace *trace = data->trace;
2183 struct thread_trace *ttrace = thread->priv;
2189 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2191 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2192 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2193 printed += fprintf(fp, "%.1f%%", ratio);
2194 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2195 printed += thread__dump_stats(ttrace, trace, fp);
2197 data->printed += printed;
2202 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2204 struct summary_data data = {
2208 data.printed = trace__fprintf_threads_header(fp);
2210 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2212 return data.printed;
2215 static int trace__set_duration(const struct option *opt, const char *str,
2216 int unset __maybe_unused)
2218 struct trace *trace = opt->value;
2220 trace->duration_filter = atof(str);
2224 static int trace__open_output(struct trace *trace, const char *filename)
2228 if (!stat(filename, &st) && st.st_size) {
2229 char oldname[PATH_MAX];
2231 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2233 rename(filename, oldname);
2236 trace->output = fopen(filename, "w");
2238 return trace->output == NULL ? -errno : 0;
2241 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2243 const char * const trace_usage[] = {
2244 "perf trace [<options>] [<command>]",
2245 "perf trace [<options>] -- <command> [<options>]",
2246 "perf trace record [<options>] [<command>]",
2247 "perf trace record [<options>] -- <command> [<options>]",
2250 struct trace trace = {
2252 .machine = audit_detect_machine(),
2253 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2263 .user_freq = UINT_MAX,
2264 .user_interval = ULLONG_MAX,
2265 .no_buffering = true,
2271 const char *output_name = NULL;
2272 const char *ev_qualifier_str = NULL;
2273 const struct option trace_options[] = {
2274 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2275 "show the thread COMM next to its id"),
2276 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2277 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2278 "list of events to trace"),
2279 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2280 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2281 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2282 "trace events on existing process id"),
2283 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2284 "trace events on existing thread id"),
2285 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2286 "system-wide collection from all CPUs"),
2287 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2288 "list of cpus to monitor"),
2289 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2290 "child tasks do not inherit counters"),
2291 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2292 "number of mmap data pages",
2293 perf_evlist__parse_mmap_pages),
2294 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2296 OPT_CALLBACK(0, "duration", &trace, "float",
2297 "show only events with duration > N.M ms",
2298 trace__set_duration),
2299 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2300 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2301 OPT_BOOLEAN('T', "time", &trace.full_time,
2302 "Show full timestamp, not time relative to first start"),
2303 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2304 "Show only syscall summary with statistics"),
2305 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2306 "Show all syscalls and summary with statistics"),
2312 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2313 return trace__record(argc-2, &argv[2]);
2315 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2317 /* summary_only implies summary option, but don't overwrite summary if set */
2318 if (trace.summary_only)
2319 trace.summary = trace.summary_only;
2321 if (output_name != NULL) {
2322 err = trace__open_output(&trace, output_name);
2324 perror("failed to create output file");
2329 if (ev_qualifier_str != NULL) {
2330 const char *s = ev_qualifier_str;
2332 trace.not_ev_qualifier = *s == '!';
2333 if (trace.not_ev_qualifier)
2335 trace.ev_qualifier = strlist__new(true, s);
2336 if (trace.ev_qualifier == NULL) {
2337 fputs("Not enough memory to parse event qualifier",
2344 err = target__validate(&trace.opts.target);
2346 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2347 fprintf(trace.output, "%s", bf);
2351 err = target__parse_uid(&trace.opts.target);
2353 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2354 fprintf(trace.output, "%s", bf);
2358 if (!argc && target__none(&trace.opts.target))
2359 trace.opts.target.system_wide = true;
2362 err = trace__replay(&trace);
2364 err = trace__run(&trace, argc, argv);
2367 if (output_name != NULL)
2368 fclose(trace.output);