4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/event.h"
25 #include "util/evlist.h"
26 #include <subcmd/exec-cmd.h>
27 #include "util/machine.h"
28 #include "util/path.h"
29 #include "util/session.h"
30 #include "util/thread.h"
31 #include <subcmd/parse-options.h>
32 #include "util/strlist.h"
33 #include "util/intlist.h"
34 #include "util/thread_map.h"
35 #include "util/stat.h"
36 #include "trace/beauty/beauty.h"
37 #include "trace-event.h"
38 #include "util/parse-events.h"
39 #include "util/bpf-loader.h"
40 #include "callchain.h"
41 #include "print_binary.h"
43 #include "syscalltbl.h"
44 #include "rb_resort.h"
48 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
53 #include <linux/err.h>
54 #include <linux/filter.h>
55 #include <linux/audit.h>
56 #include <linux/kernel.h>
57 #include <linux/random.h>
58 #include <linux/stringify.h>
59 #include <linux/time64.h>
61 #include "sane_ctype.h"
64 # define O_CLOEXEC 02000000
68 struct perf_tool tool;
69 struct syscalltbl *sctbl;
72 struct syscall *table;
74 struct perf_evsel *sys_enter,
78 struct record_opts opts;
79 struct perf_evlist *evlist;
81 struct thread *current;
84 unsigned long nr_events;
85 struct strlist *ev_qualifier;
94 double duration_filter;
100 unsigned int max_stack;
101 unsigned int min_stack;
102 bool not_ev_qualifier;
106 bool multiple_threads;
110 bool show_tool_stats;
112 bool kernel_syscallchains;
122 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
123 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
127 #define TP_UINT_FIELD(bits) \
128 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
131 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
140 #define TP_UINT_FIELD__SWAPPED(bits) \
141 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
144 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
145 return bswap_##bits(value);\
148 TP_UINT_FIELD__SWAPPED(16);
149 TP_UINT_FIELD__SWAPPED(32);
150 TP_UINT_FIELD__SWAPPED(64);
152 static int tp_field__init_uint(struct tp_field *field,
153 struct format_field *format_field,
156 field->offset = format_field->offset;
158 switch (format_field->size) {
160 field->integer = tp_field__u8;
163 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
166 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
169 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
178 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
180 return sample->raw_data + field->offset;
183 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
185 field->offset = format_field->offset;
186 field->pointer = tp_field__ptr;
193 struct tp_field args, ret;
197 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
198 struct tp_field *field,
201 struct format_field *format_field = perf_evsel__field(evsel, name);
203 if (format_field == NULL)
206 return tp_field__init_uint(field, format_field, evsel->needs_swap);
209 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
210 ({ struct syscall_tp *sc = evsel->priv;\
211 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
213 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
214 struct tp_field *field,
217 struct format_field *format_field = perf_evsel__field(evsel, name);
219 if (format_field == NULL)
222 return tp_field__init_ptr(field, format_field);
225 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
226 ({ struct syscall_tp *sc = evsel->priv;\
227 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
229 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
232 perf_evsel__delete(evsel);
235 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
237 evsel->priv = malloc(sizeof(struct syscall_tp));
238 if (evsel->priv != NULL) {
239 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
242 evsel->handler = handler;
253 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
255 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
257 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
259 evsel = perf_evsel__newtp("syscalls", direction);
264 if (perf_evsel__init_syscall_tp(evsel, handler))
270 perf_evsel__delete_priv(evsel);
274 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
275 ({ struct syscall_tp *fields = evsel->priv; \
276 fields->name.integer(&fields->name, sample); })
278 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.pointer(&fields->name, sample); })
285 const char **entries;
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 .nr_entries = ARRAY_SIZE(array), \
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
295 .nr_entries = ARRAY_SIZE(array), \
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
301 struct syscall_arg *arg)
303 struct strarray *sa = arg->parm;
304 int idx = arg->val - sa->offset;
306 if (idx < 0 || idx >= sa->nr_entries)
307 return scnprintf(bf, size, intfmt, arg->val);
309 return scnprintf(bf, size, "%s", sa->entries[idx]);
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 struct syscall_arg *arg)
315 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
320 #if defined(__i386__) || defined(__x86_64__)
322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
323 * gets rewritten to support all arches.
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 struct syscall_arg *arg)
328 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 struct syscall_arg *arg);
337 #define SCA_FD syscall_arg__scnprintf_fd
340 #define AT_FDCWD -100
343 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
344 struct syscall_arg *arg)
349 return scnprintf(bf, size, "CWD");
351 return syscall_arg__scnprintf_fd(bf, size, arg);
354 #define SCA_FDAT syscall_arg__scnprintf_fd_at
356 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
357 struct syscall_arg *arg);
359 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
361 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
362 struct syscall_arg *arg)
364 return scnprintf(bf, size, "%#lx", arg->val);
367 #define SCA_HEX syscall_arg__scnprintf_hex
369 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
370 struct syscall_arg *arg)
372 return scnprintf(bf, size, "%d", arg->val);
375 #define SCA_INT syscall_arg__scnprintf_int
377 static const char *bpf_cmd[] = {
378 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
379 "MAP_GET_NEXT_KEY", "PROG_LOAD",
381 static DEFINE_STRARRAY(bpf_cmd);
383 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
384 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
386 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
387 static DEFINE_STRARRAY(itimers);
389 static const char *keyctl_options[] = {
390 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
391 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
392 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
393 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
394 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
396 static DEFINE_STRARRAY(keyctl_options);
398 static const char *whences[] = { "SET", "CUR", "END",
406 static DEFINE_STRARRAY(whences);
408 static const char *fcntl_cmds[] = {
409 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
410 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
411 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
414 static DEFINE_STRARRAY(fcntl_cmds);
416 static const char *rlimit_resources[] = {
417 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
418 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
421 static DEFINE_STRARRAY(rlimit_resources);
423 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
424 static DEFINE_STRARRAY(sighow);
426 static const char *clockid[] = {
427 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
428 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
429 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
431 static DEFINE_STRARRAY(clockid);
433 static const char *socket_families[] = {
434 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
435 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
436 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
437 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
438 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
439 "ALG", "NFC", "VSOCK",
441 static DEFINE_STRARRAY(socket_families);
443 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
444 struct syscall_arg *arg)
449 if (mode == F_OK) /* 0 */
450 return scnprintf(bf, size, "F");
452 if (mode & n##_OK) { \
453 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
463 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
468 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
470 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
471 struct syscall_arg *arg);
473 #define SCA_FILENAME syscall_arg__scnprintf_filename
475 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
476 struct syscall_arg *arg)
478 int printed = 0, flags = arg->val;
481 if (flags & O_##n) { \
482 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
491 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
496 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
498 #if defined(__i386__) || defined(__x86_64__)
500 * FIXME: Make this available to all arches.
502 #define TCGETS 0x5401
504 static const char *tioctls[] = {
505 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
506 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
507 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
508 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
509 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
510 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
511 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
512 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
513 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
514 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
515 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
516 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
517 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
518 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
519 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
522 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
523 #endif /* defined(__i386__) || defined(__x86_64__) */
525 #ifndef GRND_NONBLOCK
526 #define GRND_NONBLOCK 0x0001
529 #define GRND_RANDOM 0x0002
532 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
533 struct syscall_arg *arg)
535 int printed = 0, flags = arg->val;
538 if (flags & GRND_##n) { \
539 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
540 flags &= ~GRND_##n; \
548 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
553 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
555 #define STRARRAY(arg, name, array) \
556 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
557 .arg_parm = { [arg] = &strarray__##array, }
559 #include "trace/beauty/eventfd.c"
560 #include "trace/beauty/flock.c"
561 #include "trace/beauty/futex_op.c"
562 #include "trace/beauty/mmap.c"
563 #include "trace/beauty/mode_t.c"
564 #include "trace/beauty/msg_flags.c"
565 #include "trace/beauty/open_flags.c"
566 #include "trace/beauty/perf_event_open.c"
567 #include "trace/beauty/pid.c"
568 #include "trace/beauty/sched_policy.c"
569 #include "trace/beauty/seccomp.c"
570 #include "trace/beauty/signum.c"
571 #include "trace/beauty/socket_type.c"
572 #include "trace/beauty/waitid_options.c"
574 static struct syscall_fmt {
577 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
584 { .name = "access", .errmsg = true,
585 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
586 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
587 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
588 { .name = "brk", .hexret = true,
589 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
590 { .name = "chdir", .errmsg = true, },
591 { .name = "chmod", .errmsg = true, },
592 { .name = "chroot", .errmsg = true, },
593 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
594 { .name = "clone", .errpid = true, },
595 { .name = "close", .errmsg = true,
596 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
597 { .name = "connect", .errmsg = true, },
598 { .name = "creat", .errmsg = true, },
599 { .name = "dup", .errmsg = true, },
600 { .name = "dup2", .errmsg = true, },
601 { .name = "dup3", .errmsg = true, },
602 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
603 { .name = "eventfd2", .errmsg = true,
604 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
605 { .name = "faccessat", .errmsg = true, },
606 { .name = "fadvise64", .errmsg = true, },
607 { .name = "fallocate", .errmsg = true, },
608 { .name = "fchdir", .errmsg = true, },
609 { .name = "fchmod", .errmsg = true, },
610 { .name = "fchmodat", .errmsg = true,
611 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
612 { .name = "fchown", .errmsg = true, },
613 { .name = "fchownat", .errmsg = true,
614 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
615 { .name = "fcntl", .errmsg = true,
616 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
617 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
618 { .name = "fdatasync", .errmsg = true, },
619 { .name = "flock", .errmsg = true,
620 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
621 { .name = "fsetxattr", .errmsg = true, },
622 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
623 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
624 { .name = "fstatfs", .errmsg = true, },
625 { .name = "fsync", .errmsg = true, },
626 { .name = "ftruncate", .errmsg = true, },
627 { .name = "futex", .errmsg = true,
628 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
629 { .name = "futimesat", .errmsg = true,
630 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
631 { .name = "getdents", .errmsg = true, },
632 { .name = "getdents64", .errmsg = true, },
633 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
634 { .name = "getpid", .errpid = true, },
635 { .name = "getpgid", .errpid = true, },
636 { .name = "getppid", .errpid = true, },
637 { .name = "getrandom", .errmsg = true,
638 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
639 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
640 { .name = "getxattr", .errmsg = true, },
641 { .name = "inotify_add_watch", .errmsg = true, },
642 { .name = "ioctl", .errmsg = true,
644 #if defined(__i386__) || defined(__x86_64__)
646 * FIXME: Make this available to all arches.
648 [1] = SCA_STRHEXARRAY, /* cmd */
649 [2] = SCA_HEX, /* arg */ },
650 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
652 [2] = SCA_HEX, /* arg */ }, },
654 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
655 { .name = "kill", .errmsg = true,
656 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
657 { .name = "lchown", .errmsg = true, },
658 { .name = "lgetxattr", .errmsg = true, },
659 { .name = "linkat", .errmsg = true,
660 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
661 { .name = "listxattr", .errmsg = true, },
662 { .name = "llistxattr", .errmsg = true, },
663 { .name = "lremovexattr", .errmsg = true, },
664 { .name = "lseek", .errmsg = true,
665 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
666 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
667 { .name = "lsetxattr", .errmsg = true, },
668 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
669 { .name = "lsxattr", .errmsg = true, },
670 { .name = "madvise", .errmsg = true,
671 .arg_scnprintf = { [0] = SCA_HEX, /* start */
672 [2] = SCA_MADV_BHV, /* behavior */ }, },
673 { .name = "mkdir", .errmsg = true, },
674 { .name = "mkdirat", .errmsg = true,
675 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
676 { .name = "mknod", .errmsg = true, },
677 { .name = "mknodat", .errmsg = true,
678 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
679 { .name = "mlock", .errmsg = true,
680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
681 { .name = "mlockall", .errmsg = true,
682 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
683 { .name = "mmap", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
685 [2] = SCA_MMAP_PROT, /* prot */
686 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
687 { .name = "mprotect", .errmsg = true,
688 .arg_scnprintf = { [0] = SCA_HEX, /* start */
689 [2] = SCA_MMAP_PROT, /* prot */ }, },
690 { .name = "mq_unlink", .errmsg = true,
691 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
692 { .name = "mremap", .hexret = true,
693 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
694 [3] = SCA_MREMAP_FLAGS, /* flags */
695 [4] = SCA_HEX, /* new_addr */ }, },
696 { .name = "munlock", .errmsg = true,
697 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
698 { .name = "munmap", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
700 { .name = "name_to_handle_at", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
702 { .name = "newfstatat", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
704 { .name = "open", .errmsg = true,
705 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
706 { .name = "open_by_handle_at", .errmsg = true,
707 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
708 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
709 { .name = "openat", .errmsg = true,
710 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
711 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
712 { .name = "perf_event_open", .errmsg = true,
713 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
714 [3] = SCA_FD, /* group_fd */
715 [4] = SCA_PERF_FLAGS, /* flags */ }, },
716 { .name = "pipe2", .errmsg = true,
717 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
718 { .name = "poll", .errmsg = true, .timeout = true, },
719 { .name = "ppoll", .errmsg = true, .timeout = true, },
720 { .name = "pread", .errmsg = true, .alias = "pread64", },
721 { .name = "preadv", .errmsg = true, .alias = "pread", },
722 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
723 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
724 { .name = "pwritev", .errmsg = true, },
725 { .name = "read", .errmsg = true, },
726 { .name = "readlink", .errmsg = true, },
727 { .name = "readlinkat", .errmsg = true,
728 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
729 { .name = "readv", .errmsg = true, },
730 { .name = "recvfrom", .errmsg = true,
731 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
732 { .name = "recvmmsg", .errmsg = true,
733 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
734 { .name = "recvmsg", .errmsg = true,
735 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
736 { .name = "removexattr", .errmsg = true, },
737 { .name = "renameat", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
739 { .name = "rmdir", .errmsg = true, },
740 { .name = "rt_sigaction", .errmsg = true,
741 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
742 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
743 { .name = "rt_sigqueueinfo", .errmsg = true,
744 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
745 { .name = "rt_tgsigqueueinfo", .errmsg = true,
746 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
747 { .name = "sched_getattr", .errmsg = true, },
748 { .name = "sched_setattr", .errmsg = true, },
749 { .name = "sched_setscheduler", .errmsg = true,
750 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
751 { .name = "seccomp", .errmsg = true,
752 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
753 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
754 { .name = "select", .errmsg = true, .timeout = true, },
755 { .name = "sendmmsg", .errmsg = true,
756 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
757 { .name = "sendmsg", .errmsg = true,
758 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
759 { .name = "sendto", .errmsg = true,
760 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
761 { .name = "set_tid_address", .errpid = true, },
762 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
763 { .name = "setpgid", .errmsg = true, },
764 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
765 { .name = "setxattr", .errmsg = true, },
766 { .name = "shutdown", .errmsg = true, },
767 { .name = "socket", .errmsg = true,
768 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
769 [1] = SCA_SK_TYPE, /* type */ },
770 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
771 { .name = "socketpair", .errmsg = true,
772 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
773 [1] = SCA_SK_TYPE, /* type */ },
774 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
775 { .name = "stat", .errmsg = true, .alias = "newstat", },
776 { .name = "statfs", .errmsg = true, },
777 { .name = "statx", .errmsg = true,
778 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
779 [2] = SCA_STATX_FLAGS, /* flags */
780 [3] = SCA_STATX_MASK, /* mask */ }, },
781 { .name = "swapoff", .errmsg = true,
782 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
783 { .name = "swapon", .errmsg = true,
784 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
785 { .name = "symlinkat", .errmsg = true,
786 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
787 { .name = "tgkill", .errmsg = true,
788 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
789 { .name = "tkill", .errmsg = true,
790 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
791 { .name = "truncate", .errmsg = true, },
792 { .name = "uname", .errmsg = true, .alias = "newuname", },
793 { .name = "unlinkat", .errmsg = true,
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
795 { .name = "utime", .errmsg = true, },
796 { .name = "utimensat", .errmsg = true,
797 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
798 { .name = "utimes", .errmsg = true, },
799 { .name = "vmsplice", .errmsg = true, },
800 { .name = "wait4", .errpid = true,
801 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
802 { .name = "waitid", .errpid = true,
803 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
804 { .name = "write", .errmsg = true, },
805 { .name = "writev", .errmsg = true, },
808 static int syscall_fmt__cmp(const void *name, const void *fmtp)
810 const struct syscall_fmt *fmt = fmtp;
811 return strcmp(name, fmt->name);
814 static struct syscall_fmt *syscall_fmt__find(const char *name)
816 const int nmemb = ARRAY_SIZE(syscall_fmts);
817 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
821 struct event_format *tp_format;
823 struct format_field *args;
826 struct syscall_fmt *fmt;
827 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
832 * We need to have this 'calculated' boolean because in some cases we really
833 * don't know what is the duration of a syscall, for instance, when we start
834 * a session and some threads are waiting for a syscall to finish, say 'poll',
835 * in which case all we can do is to print "( ? ) for duration and for the
838 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
840 double duration = (double)t / NSEC_PER_MSEC;
841 size_t printed = fprintf(fp, "(");
844 printed += fprintf(fp, " ? ");
845 else if (duration >= 1.0)
846 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
847 else if (duration >= 0.01)
848 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
850 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
851 return printed + fprintf(fp, "): ");
855 * filename.ptr: The filename char pointer that will be vfs_getname'd
856 * filename.entry_str_pos: Where to insert the string translated from
857 * filename.ptr by the vfs_getname tracepoint/kprobe.
859 struct thread_trace {
862 unsigned long nr_events;
863 unsigned long pfmaj, pfmin;
868 short int entry_str_pos;
870 unsigned int namelen;
878 struct intlist *syscall_stats;
881 static struct thread_trace *thread_trace__new(void)
883 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
886 ttrace->paths.max = -1;
888 ttrace->syscall_stats = intlist__new(NULL);
893 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
895 struct thread_trace *ttrace;
900 if (thread__priv(thread) == NULL)
901 thread__set_priv(thread, thread_trace__new());
903 if (thread__priv(thread) == NULL)
906 ttrace = thread__priv(thread);
911 color_fprintf(fp, PERF_COLOR_RED,
912 "WARNING: not enough memory, dropping samples!\n");
916 #define TRACE_PFMAJ (1 << 0)
917 #define TRACE_PFMIN (1 << 1)
919 static const size_t trace__entry_str_size = 2048;
921 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
923 struct thread_trace *ttrace = thread__priv(thread);
925 if (fd > ttrace->paths.max) {
926 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
931 if (ttrace->paths.max != -1) {
932 memset(npath + ttrace->paths.max + 1, 0,
933 (fd - ttrace->paths.max) * sizeof(char *));
935 memset(npath, 0, (fd + 1) * sizeof(char *));
938 ttrace->paths.table = npath;
939 ttrace->paths.max = fd;
942 ttrace->paths.table[fd] = strdup(pathname);
944 return ttrace->paths.table[fd] != NULL ? 0 : -1;
947 static int thread__read_fd_path(struct thread *thread, int fd)
949 char linkname[PATH_MAX], pathname[PATH_MAX];
953 if (thread->pid_ == thread->tid) {
954 scnprintf(linkname, sizeof(linkname),
955 "/proc/%d/fd/%d", thread->pid_, fd);
957 scnprintf(linkname, sizeof(linkname),
958 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
961 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
964 ret = readlink(linkname, pathname, sizeof(pathname));
966 if (ret < 0 || ret > st.st_size)
969 pathname[ret] = '\0';
970 return trace__set_fd_pathname(thread, fd, pathname);
973 static const char *thread__fd_path(struct thread *thread, int fd,
976 struct thread_trace *ttrace = thread__priv(thread);
984 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
987 ++trace->stats.proc_getname;
988 if (thread__read_fd_path(thread, fd))
992 return ttrace->paths.table[fd];
995 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
996 struct syscall_arg *arg)
999 size_t printed = scnprintf(bf, size, "%d", fd);
1000 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1003 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1008 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1009 struct syscall_arg *arg)
1012 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1013 struct thread_trace *ttrace = thread__priv(arg->thread);
1015 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1016 zfree(&ttrace->paths.table[fd]);
1021 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1024 struct thread_trace *ttrace = thread__priv(thread);
1026 ttrace->filename.ptr = ptr;
1027 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1030 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1031 struct syscall_arg *arg)
1033 unsigned long ptr = arg->val;
1035 if (!arg->trace->vfs_getname)
1036 return scnprintf(bf, size, "%#x", ptr);
1038 thread__set_filename_pos(arg->thread, bf, ptr);
1042 static bool trace__filter_duration(struct trace *trace, double t)
1044 return t < (trace->duration_filter * NSEC_PER_MSEC);
1047 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1049 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1051 return fprintf(fp, "%10.3f ", ts);
1055 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1056 * using ttrace->entry_time for a thread that receives a sys_exit without
1057 * first having received a sys_enter ("poll" issued before tracing session
1058 * starts, lost sys_enter exit due to ring buffer overflow).
1060 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1063 return __trace__fprintf_tstamp(trace, tstamp, fp);
1065 return fprintf(fp, " ? ");
1068 static bool done = false;
1069 static bool interrupted = false;
1071 static void sig_handler(int sig)
1074 interrupted = sig == SIGINT;
1077 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1078 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1080 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1081 printed += fprintf_duration(duration, duration_calculated, fp);
1083 if (trace->multiple_threads) {
1084 if (trace->show_comm)
1085 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1086 printed += fprintf(fp, "%d ", thread->tid);
1092 static int trace__process_event(struct trace *trace, struct machine *machine,
1093 union perf_event *event, struct perf_sample *sample)
1097 switch (event->header.type) {
1098 case PERF_RECORD_LOST:
1099 color_fprintf(trace->output, PERF_COLOR_RED,
1100 "LOST %" PRIu64 " events!\n", event->lost.lost);
1101 ret = machine__process_lost_event(machine, event, sample);
1104 ret = machine__process_event(machine, event, sample);
1111 static int trace__tool_process(struct perf_tool *tool,
1112 union perf_event *event,
1113 struct perf_sample *sample,
1114 struct machine *machine)
1116 struct trace *trace = container_of(tool, struct trace, tool);
1117 return trace__process_event(trace, machine, event, sample);
1120 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1122 struct machine *machine = vmachine;
1124 if (machine->kptr_restrict_warned)
1127 if (symbol_conf.kptr_restrict) {
1128 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1129 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1130 "Kernel samples will not be resolved.\n");
1131 machine->kptr_restrict_warned = true;
1135 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1138 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1140 int err = symbol__init(NULL);
1145 trace->host = machine__new_host();
1146 if (trace->host == NULL)
1149 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1152 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1153 evlist->threads, trace__tool_process, false,
1154 trace->opts.proc_map_timeout);
1161 static int syscall__set_arg_fmts(struct syscall *sc)
1163 struct format_field *field;
1166 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1167 if (sc->arg_scnprintf == NULL)
1171 sc->arg_parm = sc->fmt->arg_parm;
1173 for (field = sc->args; field; field = field->next) {
1174 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1175 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1176 else if (strcmp(field->type, "const char *") == 0 &&
1177 (strcmp(field->name, "filename") == 0 ||
1178 strcmp(field->name, "path") == 0 ||
1179 strcmp(field->name, "pathname") == 0))
1180 sc->arg_scnprintf[idx] = SCA_FILENAME;
1181 else if (field->flags & FIELD_IS_POINTER)
1182 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1183 else if (strcmp(field->type, "pid_t") == 0)
1184 sc->arg_scnprintf[idx] = SCA_PID;
1185 else if (strcmp(field->type, "umode_t") == 0)
1186 sc->arg_scnprintf[idx] = SCA_MODE_T;
1187 else if ((strcmp(field->type, "int") == 0 ||
1188 strcmp(field->type, "unsigned int") == 0 ||
1189 strcmp(field->type, "long") == 0) &&
1190 (len = strlen(field->name)) >= 2 &&
1191 strcmp(field->name + len - 2, "fd") == 0) {
1193 * /sys/kernel/tracing/events/syscalls/sys_enter*
1194 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1199 sc->arg_scnprintf[idx] = SCA_FD;
1207 static int trace__read_syscall_info(struct trace *trace, int id)
1211 const char *name = syscalltbl__name(trace->sctbl, id);
1216 if (id > trace->syscalls.max) {
1217 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1219 if (nsyscalls == NULL)
1222 if (trace->syscalls.max != -1) {
1223 memset(nsyscalls + trace->syscalls.max + 1, 0,
1224 (id - trace->syscalls.max) * sizeof(*sc));
1226 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1229 trace->syscalls.table = nsyscalls;
1230 trace->syscalls.max = id;
1233 sc = trace->syscalls.table + id;
1236 sc->fmt = syscall_fmt__find(sc->name);
1238 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1239 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1241 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1242 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1243 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1246 if (IS_ERR(sc->tp_format))
1249 sc->args = sc->tp_format->format.fields;
1250 sc->nr_args = sc->tp_format->format.nr_fields;
1252 * We need to check and discard the first variable '__syscall_nr'
1253 * or 'nr' that mean the syscall number. It is needless here.
1254 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1256 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1257 sc->args = sc->args->next;
1261 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1263 return syscall__set_arg_fmts(sc);
1266 static int trace__validate_ev_qualifier(struct trace *trace)
1269 struct str_node *pos;
1271 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1272 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1273 sizeof(trace->ev_qualifier_ids.entries[0]));
1275 if (trace->ev_qualifier_ids.entries == NULL) {
1276 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1284 strlist__for_each_entry(pos, trace->ev_qualifier) {
1285 const char *sc = pos->s;
1286 int id = syscalltbl__id(trace->sctbl, sc);
1290 fputs("Error:\tInvalid syscall ", trace->output);
1293 fputs(", ", trace->output);
1296 fputs(sc, trace->output);
1299 trace->ev_qualifier_ids.entries[i++] = id;
1303 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1304 "\nHint:\tand: 'man syscalls'\n", trace->output);
1305 zfree(&trace->ev_qualifier_ids.entries);
1306 trace->ev_qualifier_ids.nr = 0;
1313 * args is to be interpreted as a series of longs but we need to handle
1314 * 8-byte unaligned accesses. args points to raw_data within the event
1315 * and raw_data is guaranteed to be 8-byte unaligned because it is
1316 * preceded by raw_size which is a u32. So we need to copy args to a temp
1317 * variable to read it. Most notably this avoids extended load instructions
1318 * on unaligned addresses
1321 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1322 unsigned char *args, struct trace *trace,
1323 struct thread *thread)
1329 if (sc->args != NULL) {
1330 struct format_field *field;
1332 struct syscall_arg arg = {
1339 for (field = sc->args; field;
1340 field = field->next, ++arg.idx, bit <<= 1) {
1344 /* special care for unaligned accesses */
1345 p = args + sizeof(unsigned long) * arg.idx;
1346 memcpy(&val, p, sizeof(val));
1349 * Suppress this argument if its value is zero and
1350 * and we don't have a string associated in an
1354 !(sc->arg_scnprintf &&
1355 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1356 sc->arg_parm[arg.idx]))
1359 printed += scnprintf(bf + printed, size - printed,
1360 "%s%s: ", printed ? ", " : "", field->name);
1361 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1364 arg.parm = sc->arg_parm[arg.idx];
1365 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1366 size - printed, &arg);
1368 printed += scnprintf(bf + printed, size - printed,
1372 } else if (IS_ERR(sc->tp_format)) {
1374 * If we managed to read the tracepoint /format file, then we
1375 * may end up not having any args, like with gettid(), so only
1376 * print the raw args when we didn't manage to read it.
1381 /* special care for unaligned accesses */
1382 p = args + sizeof(unsigned long) * i;
1383 memcpy(&val, p, sizeof(val));
1384 printed += scnprintf(bf + printed, size - printed,
1386 printed ? ", " : "", i, val);
1394 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1395 union perf_event *event,
1396 struct perf_sample *sample);
1398 static struct syscall *trace__syscall_info(struct trace *trace,
1399 struct perf_evsel *evsel, int id)
1405 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1406 * before that, leaving at a higher verbosity level till that is
1407 * explained. Reproduced with plain ftrace with:
1409 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1410 * grep "NR -1 " /t/trace_pipe
1412 * After generating some load on the machine.
1416 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1417 id, perf_evsel__name(evsel), ++n);
1422 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1423 trace__read_syscall_info(trace, id))
1426 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1429 return &trace->syscalls.table[id];
1433 fprintf(trace->output, "Problems reading syscall %d", id);
1434 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1435 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1436 fputs(" information\n", trace->output);
1441 static void thread__update_stats(struct thread_trace *ttrace,
1442 int id, struct perf_sample *sample)
1444 struct int_node *inode;
1445 struct stats *stats;
1448 inode = intlist__findnew(ttrace->syscall_stats, id);
1452 stats = inode->priv;
1453 if (stats == NULL) {
1454 stats = malloc(sizeof(struct stats));
1458 inode->priv = stats;
1461 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1462 duration = sample->time - ttrace->entry_time;
1464 update_stats(stats, duration);
1467 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1469 struct thread_trace *ttrace;
1473 if (trace->current == NULL)
1476 ttrace = thread__priv(trace->current);
1478 if (!ttrace->entry_pending)
1481 duration = sample->time - ttrace->entry_time;
1483 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1484 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1485 ttrace->entry_pending = false;
1490 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1491 union perf_event *event __maybe_unused,
1492 struct perf_sample *sample)
1497 struct thread *thread;
1498 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1499 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1500 struct thread_trace *ttrace;
1505 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1506 ttrace = thread__trace(thread, trace->output);
1510 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1512 if (ttrace->entry_str == NULL) {
1513 ttrace->entry_str = malloc(trace__entry_str_size);
1514 if (!ttrace->entry_str)
1518 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1519 trace__printf_interrupted_entry(trace, sample);
1521 ttrace->entry_time = sample->time;
1522 msg = ttrace->entry_str;
1523 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1525 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1526 args, trace, thread);
1529 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1530 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1531 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1534 ttrace->entry_pending = true;
1535 /* See trace__vfs_getname & trace__sys_exit */
1536 ttrace->filename.pending_open = false;
1539 if (trace->current != thread) {
1540 thread__put(trace->current);
1541 trace->current = thread__get(thread);
1545 thread__put(thread);
1549 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1550 struct perf_sample *sample,
1551 struct callchain_cursor *cursor)
1553 struct addr_location al;
1555 if (machine__resolve(trace->host, &al, sample) < 0 ||
1556 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1562 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1564 /* TODO: user-configurable print_opts */
1565 const unsigned int print_opts = EVSEL__PRINT_SYM |
1567 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1569 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1572 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1573 union perf_event *event __maybe_unused,
1574 struct perf_sample *sample)
1578 bool duration_calculated = false;
1579 struct thread *thread;
1580 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1581 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1582 struct thread_trace *ttrace;
1587 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1588 ttrace = thread__trace(thread, trace->output);
1593 thread__update_stats(ttrace, id, sample);
1595 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1597 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1598 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1599 ttrace->filename.pending_open = false;
1600 ++trace->stats.vfs_getname;
1603 if (ttrace->entry_time) {
1604 duration = sample->time - ttrace->entry_time;
1605 if (trace__filter_duration(trace, duration))
1607 duration_calculated = true;
1608 } else if (trace->duration_filter)
1611 if (sample->callchain) {
1612 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1613 if (callchain_ret == 0) {
1614 if (callchain_cursor.nr < trace->min_stack)
1620 if (trace->summary_only)
1623 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1625 if (ttrace->entry_pending) {
1626 fprintf(trace->output, "%-70s", ttrace->entry_str);
1628 fprintf(trace->output, " ... [");
1629 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1630 fprintf(trace->output, "]: %s()", sc->name);
1633 if (sc->fmt == NULL) {
1635 fprintf(trace->output, ") = %ld", ret);
1636 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1637 char bf[STRERR_BUFSIZE];
1638 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1639 *e = audit_errno_to_name(-ret);
1641 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1642 } else if (ret == 0 && sc->fmt->timeout)
1643 fprintf(trace->output, ") = 0 Timeout");
1644 else if (sc->fmt->hexret)
1645 fprintf(trace->output, ") = %#lx", ret);
1646 else if (sc->fmt->errpid) {
1647 struct thread *child = machine__find_thread(trace->host, ret, ret);
1649 if (child != NULL) {
1650 fprintf(trace->output, ") = %ld", ret);
1651 if (child->comm_set)
1652 fprintf(trace->output, " (%s)", thread__comm_str(child));
1658 fputc('\n', trace->output);
1660 if (callchain_ret > 0)
1661 trace__fprintf_callchain(trace, sample);
1662 else if (callchain_ret < 0)
1663 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1665 ttrace->entry_pending = false;
1668 thread__put(thread);
1672 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1673 union perf_event *event __maybe_unused,
1674 struct perf_sample *sample)
1676 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1677 struct thread_trace *ttrace;
1678 size_t filename_len, entry_str_len, to_move;
1679 ssize_t remaining_space;
1681 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1686 ttrace = thread__priv(thread);
1690 filename_len = strlen(filename);
1691 if (filename_len == 0)
1694 if (ttrace->filename.namelen < filename_len) {
1695 char *f = realloc(ttrace->filename.name, filename_len + 1);
1700 ttrace->filename.namelen = filename_len;
1701 ttrace->filename.name = f;
1704 strcpy(ttrace->filename.name, filename);
1705 ttrace->filename.pending_open = true;
1707 if (!ttrace->filename.ptr)
1710 entry_str_len = strlen(ttrace->entry_str);
1711 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1712 if (remaining_space <= 0)
1715 if (filename_len > (size_t)remaining_space) {
1716 filename += filename_len - remaining_space;
1717 filename_len = remaining_space;
1720 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1721 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1722 memmove(pos + filename_len, pos, to_move);
1723 memcpy(pos, filename, filename_len);
1725 ttrace->filename.ptr = 0;
1726 ttrace->filename.entry_str_pos = 0;
1728 thread__put(thread);
1733 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1734 union perf_event *event __maybe_unused,
1735 struct perf_sample *sample)
1737 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1738 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1739 struct thread *thread = machine__findnew_thread(trace->host,
1742 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1747 ttrace->runtime_ms += runtime_ms;
1748 trace->runtime_ms += runtime_ms;
1750 thread__put(thread);
1754 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1756 perf_evsel__strval(evsel, sample, "comm"),
1757 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1759 perf_evsel__intval(evsel, sample, "vruntime"));
1763 static void bpf_output__printer(enum binary_printer_ops op,
1764 unsigned int val, void *extra)
1766 FILE *output = extra;
1767 unsigned char ch = (unsigned char)val;
1770 case BINARY_PRINT_CHAR_DATA:
1771 fprintf(output, "%c", isprint(ch) ? ch : '.');
1773 case BINARY_PRINT_DATA_BEGIN:
1774 case BINARY_PRINT_LINE_BEGIN:
1775 case BINARY_PRINT_ADDR:
1776 case BINARY_PRINT_NUM_DATA:
1777 case BINARY_PRINT_NUM_PAD:
1778 case BINARY_PRINT_SEP:
1779 case BINARY_PRINT_CHAR_PAD:
1780 case BINARY_PRINT_LINE_END:
1781 case BINARY_PRINT_DATA_END:
1787 static void bpf_output__fprintf(struct trace *trace,
1788 struct perf_sample *sample)
1790 print_binary(sample->raw_data, sample->raw_size, 8,
1791 bpf_output__printer, trace->output);
1794 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1795 union perf_event *event __maybe_unused,
1796 struct perf_sample *sample)
1798 int callchain_ret = 0;
1800 if (sample->callchain) {
1801 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1802 if (callchain_ret == 0) {
1803 if (callchain_cursor.nr < trace->min_stack)
1809 trace__printf_interrupted_entry(trace, sample);
1810 trace__fprintf_tstamp(trace, sample->time, trace->output);
1812 if (trace->trace_syscalls)
1813 fprintf(trace->output, "( ): ");
1815 fprintf(trace->output, "%s:", evsel->name);
1817 if (perf_evsel__is_bpf_output(evsel)) {
1818 bpf_output__fprintf(trace, sample);
1819 } else if (evsel->tp_format) {
1820 event_format__fprintf(evsel->tp_format, sample->cpu,
1821 sample->raw_data, sample->raw_size,
1825 fprintf(trace->output, ")\n");
1827 if (callchain_ret > 0)
1828 trace__fprintf_callchain(trace, sample);
1829 else if (callchain_ret < 0)
1830 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1835 static void print_location(FILE *f, struct perf_sample *sample,
1836 struct addr_location *al,
1837 bool print_dso, bool print_sym)
1840 if ((verbose > 0 || print_dso) && al->map)
1841 fprintf(f, "%s@", al->map->dso->long_name);
1843 if ((verbose > 0 || print_sym) && al->sym)
1844 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1845 al->addr - al->sym->start);
1847 fprintf(f, "0x%" PRIx64, al->addr);
1849 fprintf(f, "0x%" PRIx64, sample->addr);
1852 static int trace__pgfault(struct trace *trace,
1853 struct perf_evsel *evsel,
1854 union perf_event *event __maybe_unused,
1855 struct perf_sample *sample)
1857 struct thread *thread;
1858 struct addr_location al;
1859 char map_type = 'd';
1860 struct thread_trace *ttrace;
1862 int callchain_ret = 0;
1864 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1866 if (sample->callchain) {
1867 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1868 if (callchain_ret == 0) {
1869 if (callchain_cursor.nr < trace->min_stack)
1875 ttrace = thread__trace(thread, trace->output);
1879 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1884 if (trace->summary_only)
1887 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1890 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1892 fprintf(trace->output, "%sfault [",
1893 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1896 print_location(trace->output, sample, &al, false, true);
1898 fprintf(trace->output, "] => ");
1900 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1904 thread__find_addr_location(thread, sample->cpumode,
1905 MAP__FUNCTION, sample->addr, &al);
1913 print_location(trace->output, sample, &al, true, false);
1915 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1917 if (callchain_ret > 0)
1918 trace__fprintf_callchain(trace, sample);
1919 else if (callchain_ret < 0)
1920 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1924 thread__put(thread);
1928 static void trace__set_base_time(struct trace *trace,
1929 struct perf_evsel *evsel,
1930 struct perf_sample *sample)
1933 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1934 * and don't use sample->time unconditionally, we may end up having
1935 * some other event in the future without PERF_SAMPLE_TIME for good
1936 * reason, i.e. we may not be interested in its timestamps, just in
1937 * it taking place, picking some piece of information when it
1938 * appears in our event stream (vfs_getname comes to mind).
1940 if (trace->base_time == 0 && !trace->full_time &&
1941 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1942 trace->base_time = sample->time;
1945 static int trace__process_sample(struct perf_tool *tool,
1946 union perf_event *event,
1947 struct perf_sample *sample,
1948 struct perf_evsel *evsel,
1949 struct machine *machine __maybe_unused)
1951 struct trace *trace = container_of(tool, struct trace, tool);
1952 struct thread *thread;
1955 tracepoint_handler handler = evsel->handler;
1957 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1958 if (thread && thread__is_filtered(thread))
1961 trace__set_base_time(trace, evsel, sample);
1965 handler(trace, evsel, event, sample);
1968 thread__put(thread);
1972 static int trace__record(struct trace *trace, int argc, const char **argv)
1974 unsigned int rec_argc, i, j;
1975 const char **rec_argv;
1976 const char * const record_args[] = {
1983 const char * const sc_args[] = { "-e", };
1984 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1985 const char * const majpf_args[] = { "-e", "major-faults" };
1986 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1987 const char * const minpf_args[] = { "-e", "minor-faults" };
1988 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1990 /* +1 is for the event string below */
1991 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1992 majpf_args_nr + minpf_args_nr + argc;
1993 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1995 if (rec_argv == NULL)
1999 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2000 rec_argv[j++] = record_args[i];
2002 if (trace->trace_syscalls) {
2003 for (i = 0; i < sc_args_nr; i++)
2004 rec_argv[j++] = sc_args[i];
2006 /* event string may be different for older kernels - e.g., RHEL6 */
2007 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2008 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2009 else if (is_valid_tracepoint("syscalls:sys_enter"))
2010 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2012 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2017 if (trace->trace_pgfaults & TRACE_PFMAJ)
2018 for (i = 0; i < majpf_args_nr; i++)
2019 rec_argv[j++] = majpf_args[i];
2021 if (trace->trace_pgfaults & TRACE_PFMIN)
2022 for (i = 0; i < minpf_args_nr; i++)
2023 rec_argv[j++] = minpf_args[i];
2025 for (i = 0; i < (unsigned int)argc; i++)
2026 rec_argv[j++] = argv[i];
2028 return cmd_record(j, rec_argv);
2031 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2033 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2035 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2040 if (perf_evsel__field(evsel, "pathname") == NULL) {
2041 perf_evsel__delete(evsel);
2045 evsel->handler = trace__vfs_getname;
2046 perf_evlist__add(evlist, evsel);
2050 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2052 struct perf_evsel *evsel;
2053 struct perf_event_attr attr = {
2054 .type = PERF_TYPE_SOFTWARE,
2058 attr.config = config;
2059 attr.sample_period = 1;
2061 event_attr_init(&attr);
2063 evsel = perf_evsel__new(&attr);
2065 evsel->handler = trace__pgfault;
2070 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2072 const u32 type = event->header.type;
2073 struct perf_evsel *evsel;
2075 if (type != PERF_RECORD_SAMPLE) {
2076 trace__process_event(trace, trace->host, event, sample);
2080 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2081 if (evsel == NULL) {
2082 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2086 trace__set_base_time(trace, evsel, sample);
2088 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2089 sample->raw_data == NULL) {
2090 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2091 perf_evsel__name(evsel), sample->tid,
2092 sample->cpu, sample->raw_size);
2094 tracepoint_handler handler = evsel->handler;
2095 handler(trace, evsel, event, sample);
2099 static int trace__add_syscall_newtp(struct trace *trace)
2102 struct perf_evlist *evlist = trace->evlist;
2103 struct perf_evsel *sys_enter, *sys_exit;
2105 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2106 if (sys_enter == NULL)
2109 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2110 goto out_delete_sys_enter;
2112 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2113 if (sys_exit == NULL)
2114 goto out_delete_sys_enter;
2116 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2117 goto out_delete_sys_exit;
2119 perf_evlist__add(evlist, sys_enter);
2120 perf_evlist__add(evlist, sys_exit);
2122 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2124 * We're interested only in the user space callchain
2125 * leading to the syscall, allow overriding that for
2126 * debugging reasons using --kernel_syscall_callchains
2128 sys_exit->attr.exclude_callchain_kernel = 1;
2131 trace->syscalls.events.sys_enter = sys_enter;
2132 trace->syscalls.events.sys_exit = sys_exit;
2138 out_delete_sys_exit:
2139 perf_evsel__delete_priv(sys_exit);
2140 out_delete_sys_enter:
2141 perf_evsel__delete_priv(sys_enter);
2145 static int trace__set_ev_qualifier_filter(struct trace *trace)
2148 struct perf_evsel *sys_exit;
2149 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2150 trace->ev_qualifier_ids.nr,
2151 trace->ev_qualifier_ids.entries);
2156 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2158 sys_exit = trace->syscalls.events.sys_exit;
2159 err = perf_evsel__append_tp_filter(sys_exit, filter);
2170 static int trace__run(struct trace *trace, int argc, const char **argv)
2172 struct perf_evlist *evlist = trace->evlist;
2173 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2175 unsigned long before;
2176 const bool forks = argc > 0;
2177 bool draining = false;
2181 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2182 goto out_error_raw_syscalls;
2184 if (trace->trace_syscalls)
2185 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2187 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2188 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2189 if (pgfault_maj == NULL)
2191 perf_evlist__add(evlist, pgfault_maj);
2194 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2195 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2196 if (pgfault_min == NULL)
2198 perf_evlist__add(evlist, pgfault_min);
2202 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2203 trace__sched_stat_runtime))
2204 goto out_error_sched_stat_runtime;
2206 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2208 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2209 goto out_delete_evlist;
2212 err = trace__symbols_init(trace, evlist);
2214 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2215 goto out_delete_evlist;
2218 perf_evlist__config(evlist, &trace->opts, NULL);
2220 if (callchain_param.enabled) {
2221 bool use_identifier = false;
2223 if (trace->syscalls.events.sys_exit) {
2224 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2225 &trace->opts, &callchain_param);
2226 use_identifier = true;
2230 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2231 use_identifier = true;
2235 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2236 use_identifier = true;
2239 if (use_identifier) {
2241 * Now we have evsels with different sample_ids, use
2242 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2243 * from a fixed position in each ring buffer record.
2245 * As of this the changeset introducing this comment, this
2246 * isn't strictly needed, as the fields that can come before
2247 * PERF_SAMPLE_ID are all used, but we'll probably disable
2248 * some of those for things like copying the payload of
2249 * pointer syscall arguments, and for vfs_getname we don't
2250 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2251 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2253 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2254 perf_evlist__reset_sample_bit(evlist, ID);
2258 signal(SIGCHLD, sig_handler);
2259 signal(SIGINT, sig_handler);
2262 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2265 fprintf(trace->output, "Couldn't run the workload!\n");
2266 goto out_delete_evlist;
2270 err = perf_evlist__open(evlist);
2272 goto out_error_open;
2274 err = bpf__apply_obj_config();
2276 char errbuf[BUFSIZ];
2278 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2279 pr_err("ERROR: Apply config to BPF failed: %s\n",
2281 goto out_error_open;
2285 * Better not use !target__has_task() here because we need to cover the
2286 * case where no threads were specified in the command line, but a
2287 * workload was, and in that case we will fill in the thread_map when
2288 * we fork the workload in perf_evlist__prepare_workload.
2290 if (trace->filter_pids.nr > 0)
2291 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2292 else if (thread_map__pid(evlist->threads, 0) == -1)
2293 err = perf_evlist__set_filter_pid(evlist, getpid());
2298 if (trace->ev_qualifier_ids.nr > 0) {
2299 err = trace__set_ev_qualifier_filter(trace);
2303 pr_debug("event qualifier tracepoint filter: %s\n",
2304 trace->syscalls.events.sys_exit->filter);
2307 err = perf_evlist__apply_filters(evlist, &evsel);
2309 goto out_error_apply_filters;
2311 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2313 goto out_error_mmap;
2315 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2316 perf_evlist__enable(evlist);
2319 perf_evlist__start_workload(evlist);
2321 if (trace->opts.initial_delay) {
2322 usleep(trace->opts.initial_delay * 1000);
2323 perf_evlist__enable(evlist);
2326 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2327 evlist->threads->nr > 1 ||
2328 perf_evlist__first(evlist)->attr.inherit;
2330 before = trace->nr_events;
2332 for (i = 0; i < evlist->nr_mmaps; i++) {
2333 union perf_event *event;
2335 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2336 struct perf_sample sample;
2340 err = perf_evlist__parse_sample(evlist, event, &sample);
2342 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2346 trace__handle_event(trace, event, &sample);
2348 perf_evlist__mmap_consume(evlist, i);
2353 if (done && !draining) {
2354 perf_evlist__disable(evlist);
2360 if (trace->nr_events == before) {
2361 int timeout = done ? 100 : -1;
2363 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2364 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2374 thread__zput(trace->current);
2376 perf_evlist__disable(evlist);
2380 trace__fprintf_thread_summary(trace, trace->output);
2382 if (trace->show_tool_stats) {
2383 fprintf(trace->output, "Stats:\n "
2384 " vfs_getname : %" PRIu64 "\n"
2385 " proc_getname: %" PRIu64 "\n",
2386 trace->stats.vfs_getname,
2387 trace->stats.proc_getname);
2392 perf_evlist__delete(evlist);
2393 trace->evlist = NULL;
2394 trace->live = false;
2397 char errbuf[BUFSIZ];
2399 out_error_sched_stat_runtime:
2400 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2403 out_error_raw_syscalls:
2404 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2408 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2412 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2415 fprintf(trace->output, "%s\n", errbuf);
2416 goto out_delete_evlist;
2418 out_error_apply_filters:
2419 fprintf(trace->output,
2420 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2421 evsel->filter, perf_evsel__name(evsel), errno,
2422 str_error_r(errno, errbuf, sizeof(errbuf)));
2423 goto out_delete_evlist;
2426 fprintf(trace->output, "Not enough memory to run!\n");
2427 goto out_delete_evlist;
2430 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2431 goto out_delete_evlist;
2434 static int trace__replay(struct trace *trace)
2436 const struct perf_evsel_str_handler handlers[] = {
2437 { "probe:vfs_getname", trace__vfs_getname, },
2439 struct perf_data_file file = {
2441 .mode = PERF_DATA_MODE_READ,
2442 .force = trace->force,
2444 struct perf_session *session;
2445 struct perf_evsel *evsel;
2448 trace->tool.sample = trace__process_sample;
2449 trace->tool.mmap = perf_event__process_mmap;
2450 trace->tool.mmap2 = perf_event__process_mmap2;
2451 trace->tool.comm = perf_event__process_comm;
2452 trace->tool.exit = perf_event__process_exit;
2453 trace->tool.fork = perf_event__process_fork;
2454 trace->tool.attr = perf_event__process_attr;
2455 trace->tool.tracing_data = perf_event__process_tracing_data;
2456 trace->tool.build_id = perf_event__process_build_id;
2457 trace->tool.namespaces = perf_event__process_namespaces;
2459 trace->tool.ordered_events = true;
2460 trace->tool.ordering_requires_timestamps = true;
2462 /* add tid to output */
2463 trace->multiple_threads = true;
2465 session = perf_session__new(&file, false, &trace->tool);
2466 if (session == NULL)
2469 if (trace->opts.target.pid)
2470 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2472 if (trace->opts.target.tid)
2473 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2475 if (symbol__init(&session->header.env) < 0)
2478 trace->host = &session->machines.host;
2480 err = perf_session__set_tracepoints_handlers(session, handlers);
2484 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2485 "raw_syscalls:sys_enter");
2486 /* older kernels have syscalls tp versus raw_syscalls */
2488 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2489 "syscalls:sys_enter");
2492 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2493 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2494 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2498 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2499 "raw_syscalls:sys_exit");
2501 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2502 "syscalls:sys_exit");
2504 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2505 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2506 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2510 evlist__for_each_entry(session->evlist, evsel) {
2511 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2512 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2513 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2514 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2515 evsel->handler = trace__pgfault;
2520 err = perf_session__process_events(session);
2522 pr_err("Failed to process events, error %d", err);
2524 else if (trace->summary)
2525 trace__fprintf_thread_summary(trace, trace->output);
2528 perf_session__delete(session);
2533 static size_t trace__fprintf_threads_header(FILE *fp)
2537 printed = fprintf(fp, "\n Summary of events:\n\n");
2542 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2543 struct stats *stats;
2548 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2549 struct stats *stats = source->priv;
2551 entry->syscall = source->i;
2552 entry->stats = stats;
2553 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2556 static size_t thread__dump_stats(struct thread_trace *ttrace,
2557 struct trace *trace, FILE *fp)
2562 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2564 if (syscall_stats == NULL)
2567 printed += fprintf(fp, "\n");
2569 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2570 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2571 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2573 resort_rb__for_each_entry(nd, syscall_stats) {
2574 struct stats *stats = syscall_stats_entry->stats;
2576 double min = (double)(stats->min) / NSEC_PER_MSEC;
2577 double max = (double)(stats->max) / NSEC_PER_MSEC;
2578 double avg = avg_stats(stats);
2580 u64 n = (u64) stats->n;
2582 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2583 avg /= NSEC_PER_MSEC;
2585 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2586 printed += fprintf(fp, " %-15s", sc->name);
2587 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2588 n, syscall_stats_entry->msecs, min, avg);
2589 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2593 resort_rb__delete(syscall_stats);
2594 printed += fprintf(fp, "\n\n");
2599 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2602 struct thread_trace *ttrace = thread__priv(thread);
2608 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2610 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2611 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2612 printed += fprintf(fp, "%.1f%%", ratio);
2614 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2616 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2618 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2619 else if (fputc('\n', fp) != EOF)
2622 printed += thread__dump_stats(ttrace, trace, fp);
2627 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2629 return ttrace ? ttrace->nr_events : 0;
2632 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2633 struct thread *thread;
2636 entry->thread = rb_entry(nd, struct thread, rb_node);
2639 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2641 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2642 size_t printed = trace__fprintf_threads_header(fp);
2645 if (threads == NULL) {
2646 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2650 resort_rb__for_each_entry(nd, threads)
2651 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2653 resort_rb__delete(threads);
2658 static int trace__set_duration(const struct option *opt, const char *str,
2659 int unset __maybe_unused)
2661 struct trace *trace = opt->value;
2663 trace->duration_filter = atof(str);
2667 static int trace__set_filter_pids(const struct option *opt, const char *str,
2668 int unset __maybe_unused)
2672 struct trace *trace = opt->value;
2674 * FIXME: introduce a intarray class, plain parse csv and create a
2675 * { int nr, int entries[] } struct...
2677 struct intlist *list = intlist__new(str);
2682 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2683 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2685 if (trace->filter_pids.entries == NULL)
2688 trace->filter_pids.entries[0] = getpid();
2690 for (i = 1; i < trace->filter_pids.nr; ++i)
2691 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2693 intlist__delete(list);
2699 static int trace__open_output(struct trace *trace, const char *filename)
2703 if (!stat(filename, &st) && st.st_size) {
2704 char oldname[PATH_MAX];
2706 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2708 rename(filename, oldname);
2711 trace->output = fopen(filename, "w");
2713 return trace->output == NULL ? -errno : 0;
2716 static int parse_pagefaults(const struct option *opt, const char *str,
2717 int unset __maybe_unused)
2719 int *trace_pgfaults = opt->value;
2721 if (strcmp(str, "all") == 0)
2722 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2723 else if (strcmp(str, "maj") == 0)
2724 *trace_pgfaults |= TRACE_PFMAJ;
2725 else if (strcmp(str, "min") == 0)
2726 *trace_pgfaults |= TRACE_PFMIN;
2733 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2735 struct perf_evsel *evsel;
2737 evlist__for_each_entry(evlist, evsel)
2738 evsel->handler = handler;
2742 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2743 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2744 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2746 * It'd be better to introduce a parse_options() variant that would return a
2747 * list with the terms it didn't match to an event...
2749 static int trace__parse_events_option(const struct option *opt, const char *str,
2750 int unset __maybe_unused)
2752 struct trace *trace = (struct trace *)opt->value;
2753 const char *s = str;
2754 char *sep = NULL, *lists[2] = { NULL, NULL, };
2755 int len = strlen(str), err = -1, list;
2756 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2757 char group_name[PATH_MAX];
2759 if (strace_groups_dir == NULL)
2764 trace->not_ev_qualifier = true;
2768 if ((sep = strchr(s, ',')) != NULL)
2772 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2775 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2776 if (access(group_name, R_OK) == 0)
2781 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2783 lists[list] = malloc(len);
2784 if (lists[list] == NULL)
2786 strcpy(lists[list], s);
2796 if (lists[1] != NULL) {
2797 struct strlist_config slist_config = {
2798 .dirname = strace_groups_dir,
2801 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2802 if (trace->ev_qualifier == NULL) {
2803 fputs("Not enough memory to parse event qualifier", trace->output);
2807 if (trace__validate_ev_qualifier(trace))
2814 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2815 "event selector. use 'perf list' to list available events",
2816 parse_events_option);
2817 err = parse_events_option(&o, lists[0], 0);
2826 int cmd_trace(int argc, const char **argv)
2828 const char *trace_usage[] = {
2829 "perf trace [<options>] [<command>]",
2830 "perf trace [<options>] -- <command> [<options>]",
2831 "perf trace record [<options>] [<command>]",
2832 "perf trace record [<options>] -- <command> [<options>]",
2835 struct trace trace = {
2844 .user_freq = UINT_MAX,
2845 .user_interval = ULLONG_MAX,
2846 .no_buffering = true,
2847 .mmap_pages = UINT_MAX,
2848 .proc_map_timeout = 500,
2852 .trace_syscalls = true,
2853 .kernel_syscallchains = false,
2854 .max_stack = UINT_MAX,
2856 const char *output_name = NULL;
2857 const struct option trace_options[] = {
2858 OPT_CALLBACK('e', "event", &trace, "event",
2859 "event/syscall selector. use 'perf list' to list available events",
2860 trace__parse_events_option),
2861 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2862 "show the thread COMM next to its id"),
2863 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2864 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2865 trace__parse_events_option),
2866 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2867 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2868 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2869 "trace events on existing process id"),
2870 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2871 "trace events on existing thread id"),
2872 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2873 "pids to filter (by the kernel)", trace__set_filter_pids),
2874 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2875 "system-wide collection from all CPUs"),
2876 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2877 "list of cpus to monitor"),
2878 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2879 "child tasks do not inherit counters"),
2880 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2881 "number of mmap data pages",
2882 perf_evlist__parse_mmap_pages),
2883 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2885 OPT_CALLBACK(0, "duration", &trace, "float",
2886 "show only events with duration > N.M ms",
2887 trace__set_duration),
2888 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2889 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2890 OPT_BOOLEAN('T', "time", &trace.full_time,
2891 "Show full timestamp, not time relative to first start"),
2892 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2893 "Show only syscall summary with statistics"),
2894 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2895 "Show all syscalls and summary with statistics"),
2896 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2897 "Trace pagefaults", parse_pagefaults, "maj"),
2898 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2899 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2900 OPT_CALLBACK(0, "call-graph", &trace.opts,
2901 "record_mode[,record_size]", record_callchain_help,
2902 &record_parse_callchain_opt),
2903 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2904 "Show the kernel callchains on the syscall exit path"),
2905 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2906 "Set the minimum stack depth when parsing the callchain, "
2907 "anything below the specified depth will be ignored."),
2908 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2909 "Set the maximum stack depth when parsing the callchain, "
2910 "anything beyond the specified depth will be ignored. "
2911 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2912 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2913 "per thread proc mmap processing timeout in ms"),
2914 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2915 "ms to wait before starting measurement after program "
2919 bool __maybe_unused max_stack_user_set = true;
2920 bool mmap_pages_user_set = true;
2921 const char * const trace_subcommands[] = { "record", NULL };
2925 signal(SIGSEGV, sighandler_dump_stack);
2926 signal(SIGFPE, sighandler_dump_stack);
2928 trace.evlist = perf_evlist__new();
2929 trace.sctbl = syscalltbl__new();
2931 if (trace.evlist == NULL || trace.sctbl == NULL) {
2932 pr_err("Not enough memory to run!\n");
2937 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2938 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2940 err = bpf__setup_stdout(trace.evlist);
2942 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2943 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2949 if (trace.trace_pgfaults) {
2950 trace.opts.sample_address = true;
2951 trace.opts.sample_time = true;
2954 if (trace.opts.mmap_pages == UINT_MAX)
2955 mmap_pages_user_set = false;
2957 if (trace.max_stack == UINT_MAX) {
2958 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2959 max_stack_user_set = false;
2962 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2963 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2964 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2967 if (callchain_param.enabled) {
2968 if (!mmap_pages_user_set && geteuid() == 0)
2969 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2971 symbol_conf.use_callchain = true;
2974 if (trace.evlist->nr_entries > 0)
2975 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2977 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2978 return trace__record(&trace, argc-1, &argv[1]);
2980 /* summary_only implies summary option, but don't overwrite summary if set */
2981 if (trace.summary_only)
2982 trace.summary = trace.summary_only;
2984 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2985 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2986 pr_err("Please specify something to trace.\n");
2990 if (!trace.trace_syscalls && trace.ev_qualifier) {
2991 pr_err("The -e option can't be used with --no-syscalls.\n");
2995 if (output_name != NULL) {
2996 err = trace__open_output(&trace, output_name);
2998 perror("failed to create output file");
3003 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3005 err = target__validate(&trace.opts.target);
3007 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3008 fprintf(trace.output, "%s", bf);
3012 err = target__parse_uid(&trace.opts.target);
3014 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3015 fprintf(trace.output, "%s", bf);
3019 if (!argc && target__none(&trace.opts.target))
3020 trace.opts.target.system_wide = true;
3023 err = trace__replay(&trace);
3025 err = trace__run(&trace, argc, argv);
3028 if (output_name != NULL)
3029 fclose(trace.output);