4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/path.h"
28 #include "util/session.h"
29 #include "util/thread.h"
30 #include <subcmd/parse-options.h>
31 #include "util/strlist.h"
32 #include "util/intlist.h"
33 #include "util/thread_map.h"
34 #include "util/stat.h"
35 #include "trace/beauty/beauty.h"
36 #include "trace-event.h"
37 #include "util/parse-events.h"
38 #include "util/bpf-loader.h"
39 #include "callchain.h"
40 #include "print_binary.h"
42 #include "syscalltbl.h"
43 #include "rb_resort.h"
47 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
52 #include <linux/err.h>
53 #include <linux/filter.h>
54 #include <linux/audit.h>
55 #include <linux/kernel.h>
56 #include <linux/random.h>
57 #include <linux/stringify.h>
58 #include <linux/time64.h>
60 #include "sane_ctype.h"
63 # define O_CLOEXEC 02000000
67 struct perf_tool tool;
68 struct syscalltbl *sctbl;
71 struct syscall *table;
73 struct perf_evsel *sys_enter,
77 struct record_opts opts;
78 struct perf_evlist *evlist;
80 struct thread *current;
83 unsigned long nr_events;
84 struct strlist *ev_qualifier;
93 double duration_filter;
99 unsigned int max_stack;
100 unsigned int min_stack;
101 bool not_ev_qualifier;
105 bool multiple_threads;
109 bool show_tool_stats;
111 bool kernel_syscallchains;
121 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
122 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
126 #define TP_UINT_FIELD(bits) \
127 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
130 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
139 #define TP_UINT_FIELD__SWAPPED(bits) \
140 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
143 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
144 return bswap_##bits(value);\
147 TP_UINT_FIELD__SWAPPED(16);
148 TP_UINT_FIELD__SWAPPED(32);
149 TP_UINT_FIELD__SWAPPED(64);
151 static int tp_field__init_uint(struct tp_field *field,
152 struct format_field *format_field,
155 field->offset = format_field->offset;
157 switch (format_field->size) {
159 field->integer = tp_field__u8;
162 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
165 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
168 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
177 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
179 return sample->raw_data + field->offset;
182 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
184 field->offset = format_field->offset;
185 field->pointer = tp_field__ptr;
192 struct tp_field args, ret;
196 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
197 struct tp_field *field,
200 struct format_field *format_field = perf_evsel__field(evsel, name);
202 if (format_field == NULL)
205 return tp_field__init_uint(field, format_field, evsel->needs_swap);
208 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
209 ({ struct syscall_tp *sc = evsel->priv;\
210 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
212 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
213 struct tp_field *field,
216 struct format_field *format_field = perf_evsel__field(evsel, name);
218 if (format_field == NULL)
221 return tp_field__init_ptr(field, format_field);
224 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
225 ({ struct syscall_tp *sc = evsel->priv;\
226 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
228 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
231 perf_evsel__delete(evsel);
234 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
236 evsel->priv = malloc(sizeof(struct syscall_tp));
237 if (evsel->priv != NULL) {
238 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
241 evsel->handler = handler;
252 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
254 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
256 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
258 evsel = perf_evsel__newtp("syscalls", direction);
263 if (perf_evsel__init_syscall_tp(evsel, handler))
269 perf_evsel__delete_priv(evsel);
273 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
274 ({ struct syscall_tp *fields = evsel->priv; \
275 fields->name.integer(&fields->name, sample); })
277 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
278 ({ struct syscall_tp *fields = evsel->priv; \
279 fields->name.pointer(&fields->name, sample); })
284 const char **entries;
287 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
288 .nr_entries = ARRAY_SIZE(array), \
292 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 .nr_entries = ARRAY_SIZE(array), \
298 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 struct syscall_arg *arg)
302 struct strarray *sa = arg->parm;
303 int idx = arg->val - sa->offset;
305 if (idx < 0 || idx >= sa->nr_entries)
306 return scnprintf(bf, size, intfmt, arg->val);
308 return scnprintf(bf, size, "%s", sa->entries[idx]);
311 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
312 struct syscall_arg *arg)
314 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
317 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319 #if defined(__i386__) || defined(__x86_64__)
321 * FIXME: Make this available to all arches as soon as the ioctl beautifier
322 * gets rewritten to support all arches.
324 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
325 struct syscall_arg *arg)
327 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
330 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
331 #endif /* defined(__i386__) || defined(__x86_64__) */
333 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
334 struct syscall_arg *arg);
336 #define SCA_FD syscall_arg__scnprintf_fd
339 #define AT_FDCWD -100
342 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
343 struct syscall_arg *arg)
348 return scnprintf(bf, size, "CWD");
350 return syscall_arg__scnprintf_fd(bf, size, arg);
353 #define SCA_FDAT syscall_arg__scnprintf_fd_at
355 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
356 struct syscall_arg *arg);
358 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
360 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
361 struct syscall_arg *arg)
363 return scnprintf(bf, size, "%#lx", arg->val);
366 #define SCA_HEX syscall_arg__scnprintf_hex
368 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
369 struct syscall_arg *arg)
371 return scnprintf(bf, size, "%d", arg->val);
374 #define SCA_INT syscall_arg__scnprintf_int
376 static const char *bpf_cmd[] = {
377 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
378 "MAP_GET_NEXT_KEY", "PROG_LOAD",
380 static DEFINE_STRARRAY(bpf_cmd);
382 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
383 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
385 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
386 static DEFINE_STRARRAY(itimers);
388 static const char *keyctl_options[] = {
389 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
390 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
391 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
392 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
393 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
395 static DEFINE_STRARRAY(keyctl_options);
397 static const char *whences[] = { "SET", "CUR", "END",
405 static DEFINE_STRARRAY(whences);
407 static const char *fcntl_cmds[] = {
408 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
409 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
410 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
413 static DEFINE_STRARRAY(fcntl_cmds);
415 static const char *rlimit_resources[] = {
416 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
417 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
420 static DEFINE_STRARRAY(rlimit_resources);
422 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
423 static DEFINE_STRARRAY(sighow);
425 static const char *clockid[] = {
426 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
427 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
428 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
430 static DEFINE_STRARRAY(clockid);
432 static const char *socket_families[] = {
433 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
434 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
435 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
436 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
437 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
438 "ALG", "NFC", "VSOCK",
440 static DEFINE_STRARRAY(socket_families);
442 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
443 struct syscall_arg *arg)
448 if (mode == F_OK) /* 0 */
449 return scnprintf(bf, size, "F");
451 if (mode & n##_OK) { \
452 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
462 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
467 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
469 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
470 struct syscall_arg *arg);
472 #define SCA_FILENAME syscall_arg__scnprintf_filename
474 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
475 struct syscall_arg *arg)
477 int printed = 0, flags = arg->val;
480 if (flags & O_##n) { \
481 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
490 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
495 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
497 #if defined(__i386__) || defined(__x86_64__)
499 * FIXME: Make this available to all arches.
501 #define TCGETS 0x5401
503 static const char *tioctls[] = {
504 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
505 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
506 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
507 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
508 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
509 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
510 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
511 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
512 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
513 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
514 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
515 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
516 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
517 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
518 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
521 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
522 #endif /* defined(__i386__) || defined(__x86_64__) */
524 #ifndef GRND_NONBLOCK
525 #define GRND_NONBLOCK 0x0001
528 #define GRND_RANDOM 0x0002
531 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
532 struct syscall_arg *arg)
534 int printed = 0, flags = arg->val;
537 if (flags & GRND_##n) { \
538 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
539 flags &= ~GRND_##n; \
547 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
552 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
554 #define STRARRAY(arg, name, array) \
555 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
556 .arg_parm = { [arg] = &strarray__##array, }
558 #include "trace/beauty/eventfd.c"
559 #include "trace/beauty/flock.c"
560 #include "trace/beauty/futex_op.c"
561 #include "trace/beauty/mmap.c"
562 #include "trace/beauty/mode_t.c"
563 #include "trace/beauty/msg_flags.c"
564 #include "trace/beauty/open_flags.c"
565 #include "trace/beauty/perf_event_open.c"
566 #include "trace/beauty/pid.c"
567 #include "trace/beauty/sched_policy.c"
568 #include "trace/beauty/seccomp.c"
569 #include "trace/beauty/signum.c"
570 #include "trace/beauty/socket_type.c"
571 #include "trace/beauty/waitid_options.c"
573 static struct syscall_fmt {
576 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
583 { .name = "access", .errmsg = true,
584 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
585 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
586 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
587 { .name = "brk", .hexret = true,
588 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
589 { .name = "chdir", .errmsg = true, },
590 { .name = "chmod", .errmsg = true, },
591 { .name = "chroot", .errmsg = true, },
592 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
593 { .name = "clone", .errpid = true, },
594 { .name = "close", .errmsg = true,
595 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
596 { .name = "connect", .errmsg = true, },
597 { .name = "creat", .errmsg = true, },
598 { .name = "dup", .errmsg = true, },
599 { .name = "dup2", .errmsg = true, },
600 { .name = "dup3", .errmsg = true, },
601 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
602 { .name = "eventfd2", .errmsg = true,
603 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
604 { .name = "faccessat", .errmsg = true, },
605 { .name = "fadvise64", .errmsg = true, },
606 { .name = "fallocate", .errmsg = true, },
607 { .name = "fchdir", .errmsg = true, },
608 { .name = "fchmod", .errmsg = true, },
609 { .name = "fchmodat", .errmsg = true,
610 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
611 { .name = "fchown", .errmsg = true, },
612 { .name = "fchownat", .errmsg = true,
613 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
614 { .name = "fcntl", .errmsg = true,
615 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
616 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
617 { .name = "fdatasync", .errmsg = true, },
618 { .name = "flock", .errmsg = true,
619 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
620 { .name = "fsetxattr", .errmsg = true, },
621 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
622 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
623 { .name = "fstatfs", .errmsg = true, },
624 { .name = "fsync", .errmsg = true, },
625 { .name = "ftruncate", .errmsg = true, },
626 { .name = "futex", .errmsg = true,
627 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
628 { .name = "futimesat", .errmsg = true,
629 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
630 { .name = "getdents", .errmsg = true, },
631 { .name = "getdents64", .errmsg = true, },
632 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
633 { .name = "getpid", .errpid = true, },
634 { .name = "getpgid", .errpid = true, },
635 { .name = "getppid", .errpid = true, },
636 { .name = "getrandom", .errmsg = true,
637 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
638 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
639 { .name = "getxattr", .errmsg = true, },
640 { .name = "inotify_add_watch", .errmsg = true, },
641 { .name = "ioctl", .errmsg = true,
643 #if defined(__i386__) || defined(__x86_64__)
645 * FIXME: Make this available to all arches.
647 [1] = SCA_STRHEXARRAY, /* cmd */
648 [2] = SCA_HEX, /* arg */ },
649 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
651 [2] = SCA_HEX, /* arg */ }, },
653 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
654 { .name = "kill", .errmsg = true,
655 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
656 { .name = "lchown", .errmsg = true, },
657 { .name = "lgetxattr", .errmsg = true, },
658 { .name = "linkat", .errmsg = true,
659 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
660 { .name = "listxattr", .errmsg = true, },
661 { .name = "llistxattr", .errmsg = true, },
662 { .name = "lremovexattr", .errmsg = true, },
663 { .name = "lseek", .errmsg = true,
664 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
665 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
666 { .name = "lsetxattr", .errmsg = true, },
667 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
668 { .name = "lsxattr", .errmsg = true, },
669 { .name = "madvise", .errmsg = true,
670 .arg_scnprintf = { [0] = SCA_HEX, /* start */
671 [2] = SCA_MADV_BHV, /* behavior */ }, },
672 { .name = "mkdir", .errmsg = true, },
673 { .name = "mkdirat", .errmsg = true,
674 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
675 { .name = "mknod", .errmsg = true, },
676 { .name = "mknodat", .errmsg = true,
677 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
678 { .name = "mlock", .errmsg = true,
679 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
680 { .name = "mlockall", .errmsg = true,
681 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
682 { .name = "mmap", .hexret = true,
683 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
684 [2] = SCA_MMAP_PROT, /* prot */
685 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
686 { .name = "mprotect", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_HEX, /* start */
688 [2] = SCA_MMAP_PROT, /* prot */ }, },
689 { .name = "mq_unlink", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
691 { .name = "mremap", .hexret = true,
692 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
693 [3] = SCA_MREMAP_FLAGS, /* flags */
694 [4] = SCA_HEX, /* new_addr */ }, },
695 { .name = "munlock", .errmsg = true,
696 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
697 { .name = "munmap", .errmsg = true,
698 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
699 { .name = "name_to_handle_at", .errmsg = true,
700 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
701 { .name = "newfstatat", .errmsg = true,
702 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
703 { .name = "open", .errmsg = true,
704 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
705 { .name = "open_by_handle_at", .errmsg = true,
706 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
707 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
708 { .name = "openat", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
710 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
711 { .name = "perf_event_open", .errmsg = true,
712 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
713 [3] = SCA_FD, /* group_fd */
714 [4] = SCA_PERF_FLAGS, /* flags */ }, },
715 { .name = "pipe2", .errmsg = true,
716 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
717 { .name = "poll", .errmsg = true, .timeout = true, },
718 { .name = "ppoll", .errmsg = true, .timeout = true, },
719 { .name = "pread", .errmsg = true, .alias = "pread64", },
720 { .name = "preadv", .errmsg = true, .alias = "pread", },
721 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
722 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
723 { .name = "pwritev", .errmsg = true, },
724 { .name = "read", .errmsg = true, },
725 { .name = "readlink", .errmsg = true, },
726 { .name = "readlinkat", .errmsg = true,
727 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
728 { .name = "readv", .errmsg = true, },
729 { .name = "recvfrom", .errmsg = true,
730 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
731 { .name = "recvmmsg", .errmsg = true,
732 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
733 { .name = "recvmsg", .errmsg = true,
734 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
735 { .name = "removexattr", .errmsg = true, },
736 { .name = "renameat", .errmsg = true,
737 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
738 { .name = "rmdir", .errmsg = true, },
739 { .name = "rt_sigaction", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
741 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
742 { .name = "rt_sigqueueinfo", .errmsg = true,
743 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
744 { .name = "rt_tgsigqueueinfo", .errmsg = true,
745 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
746 { .name = "sched_getattr", .errmsg = true, },
747 { .name = "sched_setattr", .errmsg = true, },
748 { .name = "sched_setscheduler", .errmsg = true,
749 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
750 { .name = "seccomp", .errmsg = true,
751 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
752 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
753 { .name = "select", .errmsg = true, .timeout = true, },
754 { .name = "sendmmsg", .errmsg = true,
755 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
756 { .name = "sendmsg", .errmsg = true,
757 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
758 { .name = "sendto", .errmsg = true,
759 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
760 { .name = "set_tid_address", .errpid = true, },
761 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
762 { .name = "setpgid", .errmsg = true, },
763 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
764 { .name = "setxattr", .errmsg = true, },
765 { .name = "shutdown", .errmsg = true, },
766 { .name = "socket", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
768 [1] = SCA_SK_TYPE, /* type */ },
769 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
770 { .name = "socketpair", .errmsg = true,
771 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
772 [1] = SCA_SK_TYPE, /* type */ },
773 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
774 { .name = "stat", .errmsg = true, .alias = "newstat", },
775 { .name = "statfs", .errmsg = true, },
776 { .name = "statx", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
778 [2] = SCA_STATX_FLAGS, /* flags */
779 [3] = SCA_STATX_MASK, /* mask */ }, },
780 { .name = "swapoff", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
782 { .name = "swapon", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
784 { .name = "symlinkat", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "tgkill", .errmsg = true,
787 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
788 { .name = "tkill", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
790 { .name = "truncate", .errmsg = true, },
791 { .name = "uname", .errmsg = true, .alias = "newuname", },
792 { .name = "unlinkat", .errmsg = true,
793 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
794 { .name = "utime", .errmsg = true, },
795 { .name = "utimensat", .errmsg = true,
796 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
797 { .name = "utimes", .errmsg = true, },
798 { .name = "vmsplice", .errmsg = true, },
799 { .name = "wait4", .errpid = true,
800 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
801 { .name = "waitid", .errpid = true,
802 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
803 { .name = "write", .errmsg = true, },
804 { .name = "writev", .errmsg = true, },
807 static int syscall_fmt__cmp(const void *name, const void *fmtp)
809 const struct syscall_fmt *fmt = fmtp;
810 return strcmp(name, fmt->name);
813 static struct syscall_fmt *syscall_fmt__find(const char *name)
815 const int nmemb = ARRAY_SIZE(syscall_fmts);
816 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
820 struct event_format *tp_format;
822 struct format_field *args;
825 struct syscall_fmt *fmt;
826 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
831 * We need to have this 'calculated' boolean because in some cases we really
832 * don't know what is the duration of a syscall, for instance, when we start
833 * a session and some threads are waiting for a syscall to finish, say 'poll',
834 * in which case all we can do is to print "( ? ) for duration and for the
837 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
839 double duration = (double)t / NSEC_PER_MSEC;
840 size_t printed = fprintf(fp, "(");
843 printed += fprintf(fp, " ? ");
844 else if (duration >= 1.0)
845 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
846 else if (duration >= 0.01)
847 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
849 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
850 return printed + fprintf(fp, "): ");
854 * filename.ptr: The filename char pointer that will be vfs_getname'd
855 * filename.entry_str_pos: Where to insert the string translated from
856 * filename.ptr by the vfs_getname tracepoint/kprobe.
858 struct thread_trace {
861 unsigned long nr_events;
862 unsigned long pfmaj, pfmin;
867 short int entry_str_pos;
869 unsigned int namelen;
877 struct intlist *syscall_stats;
880 static struct thread_trace *thread_trace__new(void)
882 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
885 ttrace->paths.max = -1;
887 ttrace->syscall_stats = intlist__new(NULL);
892 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
894 struct thread_trace *ttrace;
899 if (thread__priv(thread) == NULL)
900 thread__set_priv(thread, thread_trace__new());
902 if (thread__priv(thread) == NULL)
905 ttrace = thread__priv(thread);
910 color_fprintf(fp, PERF_COLOR_RED,
911 "WARNING: not enough memory, dropping samples!\n");
915 #define TRACE_PFMAJ (1 << 0)
916 #define TRACE_PFMIN (1 << 1)
918 static const size_t trace__entry_str_size = 2048;
920 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
922 struct thread_trace *ttrace = thread__priv(thread);
924 if (fd > ttrace->paths.max) {
925 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
930 if (ttrace->paths.max != -1) {
931 memset(npath + ttrace->paths.max + 1, 0,
932 (fd - ttrace->paths.max) * sizeof(char *));
934 memset(npath, 0, (fd + 1) * sizeof(char *));
937 ttrace->paths.table = npath;
938 ttrace->paths.max = fd;
941 ttrace->paths.table[fd] = strdup(pathname);
943 return ttrace->paths.table[fd] != NULL ? 0 : -1;
946 static int thread__read_fd_path(struct thread *thread, int fd)
948 char linkname[PATH_MAX], pathname[PATH_MAX];
952 if (thread->pid_ == thread->tid) {
953 scnprintf(linkname, sizeof(linkname),
954 "/proc/%d/fd/%d", thread->pid_, fd);
956 scnprintf(linkname, sizeof(linkname),
957 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
960 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
963 ret = readlink(linkname, pathname, sizeof(pathname));
965 if (ret < 0 || ret > st.st_size)
968 pathname[ret] = '\0';
969 return trace__set_fd_pathname(thread, fd, pathname);
972 static const char *thread__fd_path(struct thread *thread, int fd,
975 struct thread_trace *ttrace = thread__priv(thread);
983 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
986 ++trace->stats.proc_getname;
987 if (thread__read_fd_path(thread, fd))
991 return ttrace->paths.table[fd];
994 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
995 struct syscall_arg *arg)
998 size_t printed = scnprintf(bf, size, "%d", fd);
999 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1002 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1007 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1008 struct syscall_arg *arg)
1011 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1012 struct thread_trace *ttrace = thread__priv(arg->thread);
1014 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1015 zfree(&ttrace->paths.table[fd]);
1020 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1023 struct thread_trace *ttrace = thread__priv(thread);
1025 ttrace->filename.ptr = ptr;
1026 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1029 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1030 struct syscall_arg *arg)
1032 unsigned long ptr = arg->val;
1034 if (!arg->trace->vfs_getname)
1035 return scnprintf(bf, size, "%#x", ptr);
1037 thread__set_filename_pos(arg->thread, bf, ptr);
1041 static bool trace__filter_duration(struct trace *trace, double t)
1043 return t < (trace->duration_filter * NSEC_PER_MSEC);
1046 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1048 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1050 return fprintf(fp, "%10.3f ", ts);
1054 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1055 * using ttrace->entry_time for a thread that receives a sys_exit without
1056 * first having received a sys_enter ("poll" issued before tracing session
1057 * starts, lost sys_enter exit due to ring buffer overflow).
1059 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1062 return __trace__fprintf_tstamp(trace, tstamp, fp);
1064 return fprintf(fp, " ? ");
1067 static bool done = false;
1068 static bool interrupted = false;
1070 static void sig_handler(int sig)
1073 interrupted = sig == SIGINT;
1076 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1077 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1079 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1080 printed += fprintf_duration(duration, duration_calculated, fp);
1082 if (trace->multiple_threads) {
1083 if (trace->show_comm)
1084 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1085 printed += fprintf(fp, "%d ", thread->tid);
1091 static int trace__process_event(struct trace *trace, struct machine *machine,
1092 union perf_event *event, struct perf_sample *sample)
1096 switch (event->header.type) {
1097 case PERF_RECORD_LOST:
1098 color_fprintf(trace->output, PERF_COLOR_RED,
1099 "LOST %" PRIu64 " events!\n", event->lost.lost);
1100 ret = machine__process_lost_event(machine, event, sample);
1103 ret = machine__process_event(machine, event, sample);
1110 static int trace__tool_process(struct perf_tool *tool,
1111 union perf_event *event,
1112 struct perf_sample *sample,
1113 struct machine *machine)
1115 struct trace *trace = container_of(tool, struct trace, tool);
1116 return trace__process_event(trace, machine, event, sample);
1119 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1121 struct machine *machine = vmachine;
1123 if (machine->kptr_restrict_warned)
1126 if (symbol_conf.kptr_restrict) {
1127 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1128 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1129 "Kernel samples will not be resolved.\n");
1130 machine->kptr_restrict_warned = true;
1134 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1137 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1139 int err = symbol__init(NULL);
1144 trace->host = machine__new_host();
1145 if (trace->host == NULL)
1148 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1151 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1152 evlist->threads, trace__tool_process, false,
1153 trace->opts.proc_map_timeout);
1160 static int syscall__set_arg_fmts(struct syscall *sc)
1162 struct format_field *field;
1165 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1166 if (sc->arg_scnprintf == NULL)
1170 sc->arg_parm = sc->fmt->arg_parm;
1172 for (field = sc->args; field; field = field->next) {
1173 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1174 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1175 else if (strcmp(field->type, "const char *") == 0 &&
1176 (strcmp(field->name, "filename") == 0 ||
1177 strcmp(field->name, "path") == 0 ||
1178 strcmp(field->name, "pathname") == 0))
1179 sc->arg_scnprintf[idx] = SCA_FILENAME;
1180 else if (field->flags & FIELD_IS_POINTER)
1181 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1182 else if (strcmp(field->type, "pid_t") == 0)
1183 sc->arg_scnprintf[idx] = SCA_PID;
1184 else if (strcmp(field->type, "umode_t") == 0)
1185 sc->arg_scnprintf[idx] = SCA_MODE_T;
1186 else if ((strcmp(field->type, "int") == 0 ||
1187 strcmp(field->type, "unsigned int") == 0 ||
1188 strcmp(field->type, "long") == 0) &&
1189 (len = strlen(field->name)) >= 2 &&
1190 strcmp(field->name + len - 2, "fd") == 0) {
1192 * /sys/kernel/tracing/events/syscalls/sys_enter*
1193 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1198 sc->arg_scnprintf[idx] = SCA_FD;
1206 static int trace__read_syscall_info(struct trace *trace, int id)
1210 const char *name = syscalltbl__name(trace->sctbl, id);
1215 if (id > trace->syscalls.max) {
1216 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1218 if (nsyscalls == NULL)
1221 if (trace->syscalls.max != -1) {
1222 memset(nsyscalls + trace->syscalls.max + 1, 0,
1223 (id - trace->syscalls.max) * sizeof(*sc));
1225 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1228 trace->syscalls.table = nsyscalls;
1229 trace->syscalls.max = id;
1232 sc = trace->syscalls.table + id;
1235 sc->fmt = syscall_fmt__find(sc->name);
1237 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1238 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1240 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1241 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1242 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1245 if (IS_ERR(sc->tp_format))
1248 sc->args = sc->tp_format->format.fields;
1249 sc->nr_args = sc->tp_format->format.nr_fields;
1251 * We need to check and discard the first variable '__syscall_nr'
1252 * or 'nr' that mean the syscall number. It is needless here.
1253 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1255 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1256 sc->args = sc->args->next;
1260 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1262 return syscall__set_arg_fmts(sc);
1265 static int trace__validate_ev_qualifier(struct trace *trace)
1268 struct str_node *pos;
1270 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1271 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1272 sizeof(trace->ev_qualifier_ids.entries[0]));
1274 if (trace->ev_qualifier_ids.entries == NULL) {
1275 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1283 strlist__for_each_entry(pos, trace->ev_qualifier) {
1284 const char *sc = pos->s;
1285 int id = syscalltbl__id(trace->sctbl, sc);
1289 fputs("Error:\tInvalid syscall ", trace->output);
1292 fputs(", ", trace->output);
1295 fputs(sc, trace->output);
1298 trace->ev_qualifier_ids.entries[i++] = id;
1302 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1303 "\nHint:\tand: 'man syscalls'\n", trace->output);
1304 zfree(&trace->ev_qualifier_ids.entries);
1305 trace->ev_qualifier_ids.nr = 0;
1312 * args is to be interpreted as a series of longs but we need to handle
1313 * 8-byte unaligned accesses. args points to raw_data within the event
1314 * and raw_data is guaranteed to be 8-byte unaligned because it is
1315 * preceded by raw_size which is a u32. So we need to copy args to a temp
1316 * variable to read it. Most notably this avoids extended load instructions
1317 * on unaligned addresses
1320 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1321 unsigned char *args, struct trace *trace,
1322 struct thread *thread)
1328 if (sc->args != NULL) {
1329 struct format_field *field;
1331 struct syscall_arg arg = {
1338 for (field = sc->args; field;
1339 field = field->next, ++arg.idx, bit <<= 1) {
1343 /* special care for unaligned accesses */
1344 p = args + sizeof(unsigned long) * arg.idx;
1345 memcpy(&val, p, sizeof(val));
1348 * Suppress this argument if its value is zero and
1349 * and we don't have a string associated in an
1353 !(sc->arg_scnprintf &&
1354 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1355 sc->arg_parm[arg.idx]))
1358 printed += scnprintf(bf + printed, size - printed,
1359 "%s%s: ", printed ? ", " : "", field->name);
1360 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1363 arg.parm = sc->arg_parm[arg.idx];
1364 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1365 size - printed, &arg);
1367 printed += scnprintf(bf + printed, size - printed,
1371 } else if (IS_ERR(sc->tp_format)) {
1373 * If we managed to read the tracepoint /format file, then we
1374 * may end up not having any args, like with gettid(), so only
1375 * print the raw args when we didn't manage to read it.
1380 /* special care for unaligned accesses */
1381 p = args + sizeof(unsigned long) * i;
1382 memcpy(&val, p, sizeof(val));
1383 printed += scnprintf(bf + printed, size - printed,
1385 printed ? ", " : "", i, val);
1393 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1394 union perf_event *event,
1395 struct perf_sample *sample);
1397 static struct syscall *trace__syscall_info(struct trace *trace,
1398 struct perf_evsel *evsel, int id)
1404 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1405 * before that, leaving at a higher verbosity level till that is
1406 * explained. Reproduced with plain ftrace with:
1408 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1409 * grep "NR -1 " /t/trace_pipe
1411 * After generating some load on the machine.
1415 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1416 id, perf_evsel__name(evsel), ++n);
1421 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1422 trace__read_syscall_info(trace, id))
1425 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1428 return &trace->syscalls.table[id];
1432 fprintf(trace->output, "Problems reading syscall %d", id);
1433 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1434 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1435 fputs(" information\n", trace->output);
1440 static void thread__update_stats(struct thread_trace *ttrace,
1441 int id, struct perf_sample *sample)
1443 struct int_node *inode;
1444 struct stats *stats;
1447 inode = intlist__findnew(ttrace->syscall_stats, id);
1451 stats = inode->priv;
1452 if (stats == NULL) {
1453 stats = malloc(sizeof(struct stats));
1457 inode->priv = stats;
1460 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1461 duration = sample->time - ttrace->entry_time;
1463 update_stats(stats, duration);
1466 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1468 struct thread_trace *ttrace;
1472 if (trace->current == NULL)
1475 ttrace = thread__priv(trace->current);
1477 if (!ttrace->entry_pending)
1480 duration = sample->time - ttrace->entry_time;
1482 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1483 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1484 ttrace->entry_pending = false;
1489 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1490 union perf_event *event __maybe_unused,
1491 struct perf_sample *sample)
1496 struct thread *thread;
1497 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1498 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1499 struct thread_trace *ttrace;
1504 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1505 ttrace = thread__trace(thread, trace->output);
1509 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1511 if (ttrace->entry_str == NULL) {
1512 ttrace->entry_str = malloc(trace__entry_str_size);
1513 if (!ttrace->entry_str)
1517 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1518 trace__printf_interrupted_entry(trace, sample);
1520 ttrace->entry_time = sample->time;
1521 msg = ttrace->entry_str;
1522 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1524 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1525 args, trace, thread);
1528 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1529 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1530 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1533 ttrace->entry_pending = true;
1534 /* See trace__vfs_getname & trace__sys_exit */
1535 ttrace->filename.pending_open = false;
1538 if (trace->current != thread) {
1539 thread__put(trace->current);
1540 trace->current = thread__get(thread);
1544 thread__put(thread);
1548 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1549 struct perf_sample *sample,
1550 struct callchain_cursor *cursor)
1552 struct addr_location al;
1554 if (machine__resolve(trace->host, &al, sample) < 0 ||
1555 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1561 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1563 /* TODO: user-configurable print_opts */
1564 const unsigned int print_opts = EVSEL__PRINT_SYM |
1566 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1568 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1571 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1572 union perf_event *event __maybe_unused,
1573 struct perf_sample *sample)
1577 bool duration_calculated = false;
1578 struct thread *thread;
1579 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1580 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1581 struct thread_trace *ttrace;
1586 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1587 ttrace = thread__trace(thread, trace->output);
1592 thread__update_stats(ttrace, id, sample);
1594 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1596 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1597 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1598 ttrace->filename.pending_open = false;
1599 ++trace->stats.vfs_getname;
1602 if (ttrace->entry_time) {
1603 duration = sample->time - ttrace->entry_time;
1604 if (trace__filter_duration(trace, duration))
1606 duration_calculated = true;
1607 } else if (trace->duration_filter)
1610 if (sample->callchain) {
1611 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1612 if (callchain_ret == 0) {
1613 if (callchain_cursor.nr < trace->min_stack)
1619 if (trace->summary_only)
1622 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1624 if (ttrace->entry_pending) {
1625 fprintf(trace->output, "%-70s", ttrace->entry_str);
1627 fprintf(trace->output, " ... [");
1628 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1629 fprintf(trace->output, "]: %s()", sc->name);
1632 if (sc->fmt == NULL) {
1634 fprintf(trace->output, ") = %ld", ret);
1635 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1636 char bf[STRERR_BUFSIZE];
1637 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1638 *e = audit_errno_to_name(-ret);
1640 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1641 } else if (ret == 0 && sc->fmt->timeout)
1642 fprintf(trace->output, ") = 0 Timeout");
1643 else if (sc->fmt->hexret)
1644 fprintf(trace->output, ") = %#lx", ret);
1645 else if (sc->fmt->errpid) {
1646 struct thread *child = machine__find_thread(trace->host, ret, ret);
1648 if (child != NULL) {
1649 fprintf(trace->output, ") = %ld", ret);
1650 if (child->comm_set)
1651 fprintf(trace->output, " (%s)", thread__comm_str(child));
1657 fputc('\n', trace->output);
1659 if (callchain_ret > 0)
1660 trace__fprintf_callchain(trace, sample);
1661 else if (callchain_ret < 0)
1662 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1664 ttrace->entry_pending = false;
1667 thread__put(thread);
1671 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1672 union perf_event *event __maybe_unused,
1673 struct perf_sample *sample)
1675 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1676 struct thread_trace *ttrace;
1677 size_t filename_len, entry_str_len, to_move;
1678 ssize_t remaining_space;
1680 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1685 ttrace = thread__priv(thread);
1689 filename_len = strlen(filename);
1690 if (filename_len == 0)
1693 if (ttrace->filename.namelen < filename_len) {
1694 char *f = realloc(ttrace->filename.name, filename_len + 1);
1699 ttrace->filename.namelen = filename_len;
1700 ttrace->filename.name = f;
1703 strcpy(ttrace->filename.name, filename);
1704 ttrace->filename.pending_open = true;
1706 if (!ttrace->filename.ptr)
1709 entry_str_len = strlen(ttrace->entry_str);
1710 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1711 if (remaining_space <= 0)
1714 if (filename_len > (size_t)remaining_space) {
1715 filename += filename_len - remaining_space;
1716 filename_len = remaining_space;
1719 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1720 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1721 memmove(pos + filename_len, pos, to_move);
1722 memcpy(pos, filename, filename_len);
1724 ttrace->filename.ptr = 0;
1725 ttrace->filename.entry_str_pos = 0;
1727 thread__put(thread);
1732 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1733 union perf_event *event __maybe_unused,
1734 struct perf_sample *sample)
1736 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1737 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1738 struct thread *thread = machine__findnew_thread(trace->host,
1741 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1746 ttrace->runtime_ms += runtime_ms;
1747 trace->runtime_ms += runtime_ms;
1749 thread__put(thread);
1753 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1755 perf_evsel__strval(evsel, sample, "comm"),
1756 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1758 perf_evsel__intval(evsel, sample, "vruntime"));
1762 static void bpf_output__printer(enum binary_printer_ops op,
1763 unsigned int val, void *extra)
1765 FILE *output = extra;
1766 unsigned char ch = (unsigned char)val;
1769 case BINARY_PRINT_CHAR_DATA:
1770 fprintf(output, "%c", isprint(ch) ? ch : '.');
1772 case BINARY_PRINT_DATA_BEGIN:
1773 case BINARY_PRINT_LINE_BEGIN:
1774 case BINARY_PRINT_ADDR:
1775 case BINARY_PRINT_NUM_DATA:
1776 case BINARY_PRINT_NUM_PAD:
1777 case BINARY_PRINT_SEP:
1778 case BINARY_PRINT_CHAR_PAD:
1779 case BINARY_PRINT_LINE_END:
1780 case BINARY_PRINT_DATA_END:
1786 static void bpf_output__fprintf(struct trace *trace,
1787 struct perf_sample *sample)
1789 print_binary(sample->raw_data, sample->raw_size, 8,
1790 bpf_output__printer, trace->output);
1793 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1794 union perf_event *event __maybe_unused,
1795 struct perf_sample *sample)
1797 int callchain_ret = 0;
1799 if (sample->callchain) {
1800 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1801 if (callchain_ret == 0) {
1802 if (callchain_cursor.nr < trace->min_stack)
1808 trace__printf_interrupted_entry(trace, sample);
1809 trace__fprintf_tstamp(trace, sample->time, trace->output);
1811 if (trace->trace_syscalls)
1812 fprintf(trace->output, "( ): ");
1814 fprintf(trace->output, "%s:", evsel->name);
1816 if (perf_evsel__is_bpf_output(evsel)) {
1817 bpf_output__fprintf(trace, sample);
1818 } else if (evsel->tp_format) {
1819 event_format__fprintf(evsel->tp_format, sample->cpu,
1820 sample->raw_data, sample->raw_size,
1824 fprintf(trace->output, ")\n");
1826 if (callchain_ret > 0)
1827 trace__fprintf_callchain(trace, sample);
1828 else if (callchain_ret < 0)
1829 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1834 static void print_location(FILE *f, struct perf_sample *sample,
1835 struct addr_location *al,
1836 bool print_dso, bool print_sym)
1839 if ((verbose > 0 || print_dso) && al->map)
1840 fprintf(f, "%s@", al->map->dso->long_name);
1842 if ((verbose > 0 || print_sym) && al->sym)
1843 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1844 al->addr - al->sym->start);
1846 fprintf(f, "0x%" PRIx64, al->addr);
1848 fprintf(f, "0x%" PRIx64, sample->addr);
1851 static int trace__pgfault(struct trace *trace,
1852 struct perf_evsel *evsel,
1853 union perf_event *event __maybe_unused,
1854 struct perf_sample *sample)
1856 struct thread *thread;
1857 struct addr_location al;
1858 char map_type = 'd';
1859 struct thread_trace *ttrace;
1861 int callchain_ret = 0;
1863 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1865 if (sample->callchain) {
1866 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1867 if (callchain_ret == 0) {
1868 if (callchain_cursor.nr < trace->min_stack)
1874 ttrace = thread__trace(thread, trace->output);
1878 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1883 if (trace->summary_only)
1886 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1889 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1891 fprintf(trace->output, "%sfault [",
1892 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1895 print_location(trace->output, sample, &al, false, true);
1897 fprintf(trace->output, "] => ");
1899 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1903 thread__find_addr_location(thread, sample->cpumode,
1904 MAP__FUNCTION, sample->addr, &al);
1912 print_location(trace->output, sample, &al, true, false);
1914 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1916 if (callchain_ret > 0)
1917 trace__fprintf_callchain(trace, sample);
1918 else if (callchain_ret < 0)
1919 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1923 thread__put(thread);
1927 static void trace__set_base_time(struct trace *trace,
1928 struct perf_evsel *evsel,
1929 struct perf_sample *sample)
1932 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1933 * and don't use sample->time unconditionally, we may end up having
1934 * some other event in the future without PERF_SAMPLE_TIME for good
1935 * reason, i.e. we may not be interested in its timestamps, just in
1936 * it taking place, picking some piece of information when it
1937 * appears in our event stream (vfs_getname comes to mind).
1939 if (trace->base_time == 0 && !trace->full_time &&
1940 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1941 trace->base_time = sample->time;
1944 static int trace__process_sample(struct perf_tool *tool,
1945 union perf_event *event,
1946 struct perf_sample *sample,
1947 struct perf_evsel *evsel,
1948 struct machine *machine __maybe_unused)
1950 struct trace *trace = container_of(tool, struct trace, tool);
1951 struct thread *thread;
1954 tracepoint_handler handler = evsel->handler;
1956 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1957 if (thread && thread__is_filtered(thread))
1960 trace__set_base_time(trace, evsel, sample);
1964 handler(trace, evsel, event, sample);
1967 thread__put(thread);
1971 static int trace__record(struct trace *trace, int argc, const char **argv)
1973 unsigned int rec_argc, i, j;
1974 const char **rec_argv;
1975 const char * const record_args[] = {
1982 const char * const sc_args[] = { "-e", };
1983 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1984 const char * const majpf_args[] = { "-e", "major-faults" };
1985 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1986 const char * const minpf_args[] = { "-e", "minor-faults" };
1987 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1989 /* +1 is for the event string below */
1990 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1991 majpf_args_nr + minpf_args_nr + argc;
1992 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1994 if (rec_argv == NULL)
1998 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1999 rec_argv[j++] = record_args[i];
2001 if (trace->trace_syscalls) {
2002 for (i = 0; i < sc_args_nr; i++)
2003 rec_argv[j++] = sc_args[i];
2005 /* event string may be different for older kernels - e.g., RHEL6 */
2006 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2007 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2008 else if (is_valid_tracepoint("syscalls:sys_enter"))
2009 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2011 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2016 if (trace->trace_pgfaults & TRACE_PFMAJ)
2017 for (i = 0; i < majpf_args_nr; i++)
2018 rec_argv[j++] = majpf_args[i];
2020 if (trace->trace_pgfaults & TRACE_PFMIN)
2021 for (i = 0; i < minpf_args_nr; i++)
2022 rec_argv[j++] = minpf_args[i];
2024 for (i = 0; i < (unsigned int)argc; i++)
2025 rec_argv[j++] = argv[i];
2027 return cmd_record(j, rec_argv);
2030 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2032 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2034 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2039 if (perf_evsel__field(evsel, "pathname") == NULL) {
2040 perf_evsel__delete(evsel);
2044 evsel->handler = trace__vfs_getname;
2045 perf_evlist__add(evlist, evsel);
2049 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2051 struct perf_evsel *evsel;
2052 struct perf_event_attr attr = {
2053 .type = PERF_TYPE_SOFTWARE,
2057 attr.config = config;
2058 attr.sample_period = 1;
2060 event_attr_init(&attr);
2062 evsel = perf_evsel__new(&attr);
2064 evsel->handler = trace__pgfault;
2069 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2071 const u32 type = event->header.type;
2072 struct perf_evsel *evsel;
2074 if (type != PERF_RECORD_SAMPLE) {
2075 trace__process_event(trace, trace->host, event, sample);
2079 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2080 if (evsel == NULL) {
2081 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2085 trace__set_base_time(trace, evsel, sample);
2087 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2088 sample->raw_data == NULL) {
2089 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2090 perf_evsel__name(evsel), sample->tid,
2091 sample->cpu, sample->raw_size);
2093 tracepoint_handler handler = evsel->handler;
2094 handler(trace, evsel, event, sample);
2098 static int trace__add_syscall_newtp(struct trace *trace)
2101 struct perf_evlist *evlist = trace->evlist;
2102 struct perf_evsel *sys_enter, *sys_exit;
2104 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2105 if (sys_enter == NULL)
2108 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2109 goto out_delete_sys_enter;
2111 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2112 if (sys_exit == NULL)
2113 goto out_delete_sys_enter;
2115 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2116 goto out_delete_sys_exit;
2118 perf_evlist__add(evlist, sys_enter);
2119 perf_evlist__add(evlist, sys_exit);
2121 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2123 * We're interested only in the user space callchain
2124 * leading to the syscall, allow overriding that for
2125 * debugging reasons using --kernel_syscall_callchains
2127 sys_exit->attr.exclude_callchain_kernel = 1;
2130 trace->syscalls.events.sys_enter = sys_enter;
2131 trace->syscalls.events.sys_exit = sys_exit;
2137 out_delete_sys_exit:
2138 perf_evsel__delete_priv(sys_exit);
2139 out_delete_sys_enter:
2140 perf_evsel__delete_priv(sys_enter);
2144 static int trace__set_ev_qualifier_filter(struct trace *trace)
2147 struct perf_evsel *sys_exit;
2148 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2149 trace->ev_qualifier_ids.nr,
2150 trace->ev_qualifier_ids.entries);
2155 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2157 sys_exit = trace->syscalls.events.sys_exit;
2158 err = perf_evsel__append_tp_filter(sys_exit, filter);
2169 static int trace__run(struct trace *trace, int argc, const char **argv)
2171 struct perf_evlist *evlist = trace->evlist;
2172 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2174 unsigned long before;
2175 const bool forks = argc > 0;
2176 bool draining = false;
2180 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2181 goto out_error_raw_syscalls;
2183 if (trace->trace_syscalls)
2184 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2186 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2187 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2188 if (pgfault_maj == NULL)
2190 perf_evlist__add(evlist, pgfault_maj);
2193 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2194 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2195 if (pgfault_min == NULL)
2197 perf_evlist__add(evlist, pgfault_min);
2201 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2202 trace__sched_stat_runtime))
2203 goto out_error_sched_stat_runtime;
2205 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2207 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2208 goto out_delete_evlist;
2211 err = trace__symbols_init(trace, evlist);
2213 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2214 goto out_delete_evlist;
2217 perf_evlist__config(evlist, &trace->opts, NULL);
2219 if (callchain_param.enabled) {
2220 bool use_identifier = false;
2222 if (trace->syscalls.events.sys_exit) {
2223 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2224 &trace->opts, &callchain_param);
2225 use_identifier = true;
2229 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2230 use_identifier = true;
2234 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2235 use_identifier = true;
2238 if (use_identifier) {
2240 * Now we have evsels with different sample_ids, use
2241 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2242 * from a fixed position in each ring buffer record.
2244 * As of this the changeset introducing this comment, this
2245 * isn't strictly needed, as the fields that can come before
2246 * PERF_SAMPLE_ID are all used, but we'll probably disable
2247 * some of those for things like copying the payload of
2248 * pointer syscall arguments, and for vfs_getname we don't
2249 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2250 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2252 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2253 perf_evlist__reset_sample_bit(evlist, ID);
2257 signal(SIGCHLD, sig_handler);
2258 signal(SIGINT, sig_handler);
2261 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2264 fprintf(trace->output, "Couldn't run the workload!\n");
2265 goto out_delete_evlist;
2269 err = perf_evlist__open(evlist);
2271 goto out_error_open;
2273 err = bpf__apply_obj_config();
2275 char errbuf[BUFSIZ];
2277 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2278 pr_err("ERROR: Apply config to BPF failed: %s\n",
2280 goto out_error_open;
2284 * Better not use !target__has_task() here because we need to cover the
2285 * case where no threads were specified in the command line, but a
2286 * workload was, and in that case we will fill in the thread_map when
2287 * we fork the workload in perf_evlist__prepare_workload.
2289 if (trace->filter_pids.nr > 0)
2290 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2291 else if (thread_map__pid(evlist->threads, 0) == -1)
2292 err = perf_evlist__set_filter_pid(evlist, getpid());
2297 if (trace->ev_qualifier_ids.nr > 0) {
2298 err = trace__set_ev_qualifier_filter(trace);
2302 pr_debug("event qualifier tracepoint filter: %s\n",
2303 trace->syscalls.events.sys_exit->filter);
2306 err = perf_evlist__apply_filters(evlist, &evsel);
2308 goto out_error_apply_filters;
2310 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2312 goto out_error_mmap;
2314 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2315 perf_evlist__enable(evlist);
2318 perf_evlist__start_workload(evlist);
2320 if (trace->opts.initial_delay) {
2321 usleep(trace->opts.initial_delay * 1000);
2322 perf_evlist__enable(evlist);
2325 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2326 evlist->threads->nr > 1 ||
2327 perf_evlist__first(evlist)->attr.inherit;
2329 before = trace->nr_events;
2331 for (i = 0; i < evlist->nr_mmaps; i++) {
2332 union perf_event *event;
2334 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2335 struct perf_sample sample;
2339 err = perf_evlist__parse_sample(evlist, event, &sample);
2341 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2345 trace__handle_event(trace, event, &sample);
2347 perf_evlist__mmap_consume(evlist, i);
2352 if (done && !draining) {
2353 perf_evlist__disable(evlist);
2359 if (trace->nr_events == before) {
2360 int timeout = done ? 100 : -1;
2362 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2363 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2373 thread__zput(trace->current);
2375 perf_evlist__disable(evlist);
2379 trace__fprintf_thread_summary(trace, trace->output);
2381 if (trace->show_tool_stats) {
2382 fprintf(trace->output, "Stats:\n "
2383 " vfs_getname : %" PRIu64 "\n"
2384 " proc_getname: %" PRIu64 "\n",
2385 trace->stats.vfs_getname,
2386 trace->stats.proc_getname);
2391 perf_evlist__delete(evlist);
2392 trace->evlist = NULL;
2393 trace->live = false;
2396 char errbuf[BUFSIZ];
2398 out_error_sched_stat_runtime:
2399 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2402 out_error_raw_syscalls:
2403 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2407 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2411 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2414 fprintf(trace->output, "%s\n", errbuf);
2415 goto out_delete_evlist;
2417 out_error_apply_filters:
2418 fprintf(trace->output,
2419 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2420 evsel->filter, perf_evsel__name(evsel), errno,
2421 str_error_r(errno, errbuf, sizeof(errbuf)));
2422 goto out_delete_evlist;
2425 fprintf(trace->output, "Not enough memory to run!\n");
2426 goto out_delete_evlist;
2429 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2430 goto out_delete_evlist;
2433 static int trace__replay(struct trace *trace)
2435 const struct perf_evsel_str_handler handlers[] = {
2436 { "probe:vfs_getname", trace__vfs_getname, },
2438 struct perf_data_file file = {
2440 .mode = PERF_DATA_MODE_READ,
2441 .force = trace->force,
2443 struct perf_session *session;
2444 struct perf_evsel *evsel;
2447 trace->tool.sample = trace__process_sample;
2448 trace->tool.mmap = perf_event__process_mmap;
2449 trace->tool.mmap2 = perf_event__process_mmap2;
2450 trace->tool.comm = perf_event__process_comm;
2451 trace->tool.exit = perf_event__process_exit;
2452 trace->tool.fork = perf_event__process_fork;
2453 trace->tool.attr = perf_event__process_attr;
2454 trace->tool.tracing_data = perf_event__process_tracing_data;
2455 trace->tool.build_id = perf_event__process_build_id;
2456 trace->tool.namespaces = perf_event__process_namespaces;
2458 trace->tool.ordered_events = true;
2459 trace->tool.ordering_requires_timestamps = true;
2461 /* add tid to output */
2462 trace->multiple_threads = true;
2464 session = perf_session__new(&file, false, &trace->tool);
2465 if (session == NULL)
2468 if (trace->opts.target.pid)
2469 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2471 if (trace->opts.target.tid)
2472 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2474 if (symbol__init(&session->header.env) < 0)
2477 trace->host = &session->machines.host;
2479 err = perf_session__set_tracepoints_handlers(session, handlers);
2483 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2484 "raw_syscalls:sys_enter");
2485 /* older kernels have syscalls tp versus raw_syscalls */
2487 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2488 "syscalls:sys_enter");
2491 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2492 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2493 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2497 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2498 "raw_syscalls:sys_exit");
2500 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2501 "syscalls:sys_exit");
2503 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2504 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2505 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2509 evlist__for_each_entry(session->evlist, evsel) {
2510 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2511 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2512 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2513 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2514 evsel->handler = trace__pgfault;
2519 err = perf_session__process_events(session);
2521 pr_err("Failed to process events, error %d", err);
2523 else if (trace->summary)
2524 trace__fprintf_thread_summary(trace, trace->output);
2527 perf_session__delete(session);
2532 static size_t trace__fprintf_threads_header(FILE *fp)
2536 printed = fprintf(fp, "\n Summary of events:\n\n");
2541 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2542 struct stats *stats;
2547 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2548 struct stats *stats = source->priv;
2550 entry->syscall = source->i;
2551 entry->stats = stats;
2552 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2555 static size_t thread__dump_stats(struct thread_trace *ttrace,
2556 struct trace *trace, FILE *fp)
2561 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2563 if (syscall_stats == NULL)
2566 printed += fprintf(fp, "\n");
2568 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2569 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2570 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2572 resort_rb__for_each_entry(nd, syscall_stats) {
2573 struct stats *stats = syscall_stats_entry->stats;
2575 double min = (double)(stats->min) / NSEC_PER_MSEC;
2576 double max = (double)(stats->max) / NSEC_PER_MSEC;
2577 double avg = avg_stats(stats);
2579 u64 n = (u64) stats->n;
2581 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2582 avg /= NSEC_PER_MSEC;
2584 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2585 printed += fprintf(fp, " %-15s", sc->name);
2586 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2587 n, syscall_stats_entry->msecs, min, avg);
2588 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2592 resort_rb__delete(syscall_stats);
2593 printed += fprintf(fp, "\n\n");
2598 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2601 struct thread_trace *ttrace = thread__priv(thread);
2607 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2609 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2610 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2611 printed += fprintf(fp, "%.1f%%", ratio);
2613 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2615 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2617 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2618 else if (fputc('\n', fp) != EOF)
2621 printed += thread__dump_stats(ttrace, trace, fp);
2626 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2628 return ttrace ? ttrace->nr_events : 0;
2631 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2632 struct thread *thread;
2635 entry->thread = rb_entry(nd, struct thread, rb_node);
2638 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2640 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2641 size_t printed = trace__fprintf_threads_header(fp);
2644 if (threads == NULL) {
2645 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2649 resort_rb__for_each_entry(nd, threads)
2650 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2652 resort_rb__delete(threads);
2657 static int trace__set_duration(const struct option *opt, const char *str,
2658 int unset __maybe_unused)
2660 struct trace *trace = opt->value;
2662 trace->duration_filter = atof(str);
2666 static int trace__set_filter_pids(const struct option *opt, const char *str,
2667 int unset __maybe_unused)
2671 struct trace *trace = opt->value;
2673 * FIXME: introduce a intarray class, plain parse csv and create a
2674 * { int nr, int entries[] } struct...
2676 struct intlist *list = intlist__new(str);
2681 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2682 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2684 if (trace->filter_pids.entries == NULL)
2687 trace->filter_pids.entries[0] = getpid();
2689 for (i = 1; i < trace->filter_pids.nr; ++i)
2690 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2692 intlist__delete(list);
2698 static int trace__open_output(struct trace *trace, const char *filename)
2702 if (!stat(filename, &st) && st.st_size) {
2703 char oldname[PATH_MAX];
2705 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2707 rename(filename, oldname);
2710 trace->output = fopen(filename, "w");
2712 return trace->output == NULL ? -errno : 0;
2715 static int parse_pagefaults(const struct option *opt, const char *str,
2716 int unset __maybe_unused)
2718 int *trace_pgfaults = opt->value;
2720 if (strcmp(str, "all") == 0)
2721 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2722 else if (strcmp(str, "maj") == 0)
2723 *trace_pgfaults |= TRACE_PFMAJ;
2724 else if (strcmp(str, "min") == 0)
2725 *trace_pgfaults |= TRACE_PFMIN;
2732 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2734 struct perf_evsel *evsel;
2736 evlist__for_each_entry(evlist, evsel)
2737 evsel->handler = handler;
2741 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2742 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2743 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2745 * It'd be better to introduce a parse_options() variant that would return a
2746 * list with the terms it didn't match to an event...
2748 static int trace__parse_events_option(const struct option *opt, const char *str,
2749 int unset __maybe_unused)
2751 struct trace *trace = (struct trace *)opt->value;
2752 const char *s = str;
2753 char *sep = NULL, *lists[2] = { NULL, NULL, };
2754 int len = strlen(str), err = -1, list;
2755 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2756 char group_name[PATH_MAX];
2758 if (strace_groups_dir == NULL)
2763 trace->not_ev_qualifier = true;
2767 if ((sep = strchr(s, ',')) != NULL)
2771 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2774 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2775 if (access(group_name, R_OK) == 0)
2780 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2782 lists[list] = malloc(len);
2783 if (lists[list] == NULL)
2785 strcpy(lists[list], s);
2795 if (lists[1] != NULL) {
2796 struct strlist_config slist_config = {
2797 .dirname = strace_groups_dir,
2800 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2801 if (trace->ev_qualifier == NULL) {
2802 fputs("Not enough memory to parse event qualifier", trace->output);
2806 if (trace__validate_ev_qualifier(trace))
2813 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2814 "event selector. use 'perf list' to list available events",
2815 parse_events_option);
2816 err = parse_events_option(&o, lists[0], 0);
2825 int cmd_trace(int argc, const char **argv)
2827 const char *trace_usage[] = {
2828 "perf trace [<options>] [<command>]",
2829 "perf trace [<options>] -- <command> [<options>]",
2830 "perf trace record [<options>] [<command>]",
2831 "perf trace record [<options>] -- <command> [<options>]",
2834 struct trace trace = {
2843 .user_freq = UINT_MAX,
2844 .user_interval = ULLONG_MAX,
2845 .no_buffering = true,
2846 .mmap_pages = UINT_MAX,
2847 .proc_map_timeout = 500,
2851 .trace_syscalls = true,
2852 .kernel_syscallchains = false,
2853 .max_stack = UINT_MAX,
2855 const char *output_name = NULL;
2856 const struct option trace_options[] = {
2857 OPT_CALLBACK('e', "event", &trace, "event",
2858 "event/syscall selector. use 'perf list' to list available events",
2859 trace__parse_events_option),
2860 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2861 "show the thread COMM next to its id"),
2862 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2863 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2864 trace__parse_events_option),
2865 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2866 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2867 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2868 "trace events on existing process id"),
2869 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2870 "trace events on existing thread id"),
2871 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2872 "pids to filter (by the kernel)", trace__set_filter_pids),
2873 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2874 "system-wide collection from all CPUs"),
2875 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2876 "list of cpus to monitor"),
2877 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2878 "child tasks do not inherit counters"),
2879 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2880 "number of mmap data pages",
2881 perf_evlist__parse_mmap_pages),
2882 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2884 OPT_CALLBACK(0, "duration", &trace, "float",
2885 "show only events with duration > N.M ms",
2886 trace__set_duration),
2887 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2888 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2889 OPT_BOOLEAN('T', "time", &trace.full_time,
2890 "Show full timestamp, not time relative to first start"),
2891 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2892 "Show only syscall summary with statistics"),
2893 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2894 "Show all syscalls and summary with statistics"),
2895 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2896 "Trace pagefaults", parse_pagefaults, "maj"),
2897 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2898 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2899 OPT_CALLBACK(0, "call-graph", &trace.opts,
2900 "record_mode[,record_size]", record_callchain_help,
2901 &record_parse_callchain_opt),
2902 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2903 "Show the kernel callchains on the syscall exit path"),
2904 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2905 "Set the minimum stack depth when parsing the callchain, "
2906 "anything below the specified depth will be ignored."),
2907 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2908 "Set the maximum stack depth when parsing the callchain, "
2909 "anything beyond the specified depth will be ignored. "
2910 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2911 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2912 "per thread proc mmap processing timeout in ms"),
2913 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2914 "ms to wait before starting measurement after program "
2918 bool __maybe_unused max_stack_user_set = true;
2919 bool mmap_pages_user_set = true;
2920 const char * const trace_subcommands[] = { "record", NULL };
2924 signal(SIGSEGV, sighandler_dump_stack);
2925 signal(SIGFPE, sighandler_dump_stack);
2927 trace.evlist = perf_evlist__new();
2928 trace.sctbl = syscalltbl__new();
2930 if (trace.evlist == NULL || trace.sctbl == NULL) {
2931 pr_err("Not enough memory to run!\n");
2936 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2937 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2939 err = bpf__setup_stdout(trace.evlist);
2941 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2942 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2948 if (trace.trace_pgfaults) {
2949 trace.opts.sample_address = true;
2950 trace.opts.sample_time = true;
2953 if (trace.opts.mmap_pages == UINT_MAX)
2954 mmap_pages_user_set = false;
2956 if (trace.max_stack == UINT_MAX) {
2957 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2958 max_stack_user_set = false;
2961 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2962 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2963 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2966 if (callchain_param.enabled) {
2967 if (!mmap_pages_user_set && geteuid() == 0)
2968 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2970 symbol_conf.use_callchain = true;
2973 if (trace.evlist->nr_entries > 0)
2974 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2976 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2977 return trace__record(&trace, argc-1, &argv[1]);
2979 /* summary_only implies summary option, but don't overwrite summary if set */
2980 if (trace.summary_only)
2981 trace.summary = trace.summary_only;
2983 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2984 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2985 pr_err("Please specify something to trace.\n");
2989 if (!trace.trace_syscalls && trace.ev_qualifier) {
2990 pr_err("The -e option can't be used with --no-syscalls.\n");
2994 if (output_name != NULL) {
2995 err = trace__open_output(&trace, output_name);
2997 perror("failed to create output file");
3002 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3004 err = target__validate(&trace.opts.target);
3006 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3007 fprintf(trace.output, "%s", bf);
3011 err = target__parse_uid(&trace.opts.target);
3013 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3014 fprintf(trace.output, "%s", bf);
3018 if (!argc && target__none(&trace.opts.target))
3019 trace.opts.target.system_wide = true;
3022 err = trace__replay(&trace);
3024 err = trace__run(&trace, argc, argv);
3027 if (output_name != NULL)
3028 fclose(trace.output);