Merge tag 'mmc-v4.14-rc4-3' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
[linux-2.6-microblaze.git] / tools / perf / builtin-trace.c
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/event.h"
25 #include "util/evlist.h"
26 #include <subcmd/exec-cmd.h>
27 #include "util/machine.h"
28 #include "util/path.h"
29 #include "util/session.h"
30 #include "util/thread.h"
31 #include <subcmd/parse-options.h>
32 #include "util/strlist.h"
33 #include "util/intlist.h"
34 #include "util/thread_map.h"
35 #include "util/stat.h"
36 #include "trace/beauty/beauty.h"
37 #include "trace-event.h"
38 #include "util/parse-events.h"
39 #include "util/bpf-loader.h"
40 #include "callchain.h"
41 #include "print_binary.h"
42 #include "string2.h"
43 #include "syscalltbl.h"
44 #include "rb_resort.h"
45
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
49 #include <poll.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <linux/err.h>
54 #include <linux/filter.h>
55 #include <linux/audit.h>
56 #include <linux/kernel.h>
57 #include <linux/random.h>
58 #include <linux/stringify.h>
59 #include <linux/time64.h>
60
61 #include "sane_ctype.h"
62
63 #ifndef O_CLOEXEC
64 # define O_CLOEXEC              02000000
65 #endif
66
67 #ifndef F_LINUX_SPECIFIC_BASE
68 # define F_LINUX_SPECIFIC_BASE  1024
69 #endif
70
71 struct trace {
72         struct perf_tool        tool;
73         struct syscalltbl       *sctbl;
74         struct {
75                 int             max;
76                 struct syscall  *table;
77                 struct {
78                         struct perf_evsel *sys_enter,
79                                           *sys_exit;
80                 }               events;
81         } syscalls;
82         struct record_opts      opts;
83         struct perf_evlist      *evlist;
84         struct machine          *host;
85         struct thread           *current;
86         u64                     base_time;
87         FILE                    *output;
88         unsigned long           nr_events;
89         struct strlist          *ev_qualifier;
90         struct {
91                 size_t          nr;
92                 int             *entries;
93         }                       ev_qualifier_ids;
94         struct {
95                 size_t          nr;
96                 pid_t           *entries;
97         }                       filter_pids;
98         double                  duration_filter;
99         double                  runtime_ms;
100         struct {
101                 u64             vfs_getname,
102                                 proc_getname;
103         } stats;
104         unsigned int            max_stack;
105         unsigned int            min_stack;
106         bool                    not_ev_qualifier;
107         bool                    live;
108         bool                    full_time;
109         bool                    sched;
110         bool                    multiple_threads;
111         bool                    summary;
112         bool                    summary_only;
113         bool                    show_comm;
114         bool                    show_tool_stats;
115         bool                    trace_syscalls;
116         bool                    kernel_syscallchains;
117         bool                    force;
118         bool                    vfs_getname;
119         int                     trace_pgfaults;
120         int                     open_id;
121 };
122
123 struct tp_field {
124         int offset;
125         union {
126                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128         };
129 };
130
131 #define TP_UINT_FIELD(bits) \
132 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134         u##bits value; \
135         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136         return value;  \
137 }
138
139 TP_UINT_FIELD(8);
140 TP_UINT_FIELD(16);
141 TP_UINT_FIELD(32);
142 TP_UINT_FIELD(64);
143
144 #define TP_UINT_FIELD__SWAPPED(bits) \
145 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146 { \
147         u##bits value; \
148         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
149         return bswap_##bits(value);\
150 }
151
152 TP_UINT_FIELD__SWAPPED(16);
153 TP_UINT_FIELD__SWAPPED(32);
154 TP_UINT_FIELD__SWAPPED(64);
155
156 static int tp_field__init_uint(struct tp_field *field,
157                                struct format_field *format_field,
158                                bool needs_swap)
159 {
160         field->offset = format_field->offset;
161
162         switch (format_field->size) {
163         case 1:
164                 field->integer = tp_field__u8;
165                 break;
166         case 2:
167                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168                 break;
169         case 4:
170                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171                 break;
172         case 8:
173                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174                 break;
175         default:
176                 return -1;
177         }
178
179         return 0;
180 }
181
182 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183 {
184         return sample->raw_data + field->offset;
185 }
186
187 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188 {
189         field->offset = format_field->offset;
190         field->pointer = tp_field__ptr;
191         return 0;
192 }
193
194 struct syscall_tp {
195         struct tp_field id;
196         union {
197                 struct tp_field args, ret;
198         };
199 };
200
201 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202                                           struct tp_field *field,
203                                           const char *name)
204 {
205         struct format_field *format_field = perf_evsel__field(evsel, name);
206
207         if (format_field == NULL)
208                 return -1;
209
210         return tp_field__init_uint(field, format_field, evsel->needs_swap);
211 }
212
213 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214         ({ struct syscall_tp *sc = evsel->priv;\
215            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218                                          struct tp_field *field,
219                                          const char *name)
220 {
221         struct format_field *format_field = perf_evsel__field(evsel, name);
222
223         if (format_field == NULL)
224                 return -1;
225
226         return tp_field__init_ptr(field, format_field);
227 }
228
229 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230         ({ struct syscall_tp *sc = evsel->priv;\
231            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234 {
235         zfree(&evsel->priv);
236         perf_evsel__delete(evsel);
237 }
238
239 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240 {
241         evsel->priv = malloc(sizeof(struct syscall_tp));
242         if (evsel->priv != NULL) {
243                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244                         goto out_delete;
245
246                 evsel->handler = handler;
247                 return 0;
248         }
249
250         return -ENOMEM;
251
252 out_delete:
253         zfree(&evsel->priv);
254         return -ENOENT;
255 }
256
257 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
258 {
259         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
260
261         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
262         if (IS_ERR(evsel))
263                 evsel = perf_evsel__newtp("syscalls", direction);
264
265         if (IS_ERR(evsel))
266                 return NULL;
267
268         if (perf_evsel__init_syscall_tp(evsel, handler))
269                 goto out_delete;
270
271         return evsel;
272
273 out_delete:
274         perf_evsel__delete_priv(evsel);
275         return NULL;
276 }
277
278 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
279         ({ struct syscall_tp *fields = evsel->priv; \
280            fields->name.integer(&fields->name, sample); })
281
282 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283         ({ struct syscall_tp *fields = evsel->priv; \
284            fields->name.pointer(&fields->name, sample); })
285
286 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287 {
288         int idx = val - sa->offset;
289
290         if (idx < 0 || idx >= sa->nr_entries)
291                 return scnprintf(bf, size, intfmt, val);
292
293         return scnprintf(bf, size, "%s", sa->entries[idx]);
294 }
295
296 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297                                                 const char *intfmt,
298                                                 struct syscall_arg *arg)
299 {
300         return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
301 }
302
303 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304                                               struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307 }
308
309 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
311 struct strarrays {
312         int             nr_entries;
313         struct strarray **entries;
314 };
315
316 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317         .nr_entries = ARRAY_SIZE(array), \
318         .entries = array, \
319 }
320
321 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322                                         struct syscall_arg *arg)
323 {
324         struct strarrays *sas = arg->parm;
325         int i;
326
327         for (i = 0; i < sas->nr_entries; ++i) {
328                 struct strarray *sa = sas->entries[i];
329                 int idx = arg->val - sa->offset;
330
331                 if (idx >= 0 && idx < sa->nr_entries) {
332                         if (sa->entries[idx] == NULL)
333                                 break;
334                         return scnprintf(bf, size, "%s", sa->entries[idx]);
335                 }
336         }
337
338         return scnprintf(bf, size, "%d", arg->val);
339 }
340
341 #ifndef AT_FDCWD
342 #define AT_FDCWD        -100
343 #endif
344
345 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
346                                            struct syscall_arg *arg)
347 {
348         int fd = arg->val;
349
350         if (fd == AT_FDCWD)
351                 return scnprintf(bf, size, "CWD");
352
353         return syscall_arg__scnprintf_fd(bf, size, arg);
354 }
355
356 #define SCA_FDAT syscall_arg__scnprintf_fd_at
357
358 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
359                                               struct syscall_arg *arg);
360
361 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
362
363 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
364 {
365         return scnprintf(bf, size, "%#lx", arg->val);
366 }
367
368 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
369 {
370         return scnprintf(bf, size, "%d", arg->val);
371 }
372
373 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
374 {
375         return scnprintf(bf, size, "%ld", arg->val);
376 }
377
378 static const char *bpf_cmd[] = {
379         "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
380         "MAP_GET_NEXT_KEY", "PROG_LOAD",
381 };
382 static DEFINE_STRARRAY(bpf_cmd);
383
384 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
385 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
386
387 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
388 static DEFINE_STRARRAY(itimers);
389
390 static const char *keyctl_options[] = {
391         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
392         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
393         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
394         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
395         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
396 };
397 static DEFINE_STRARRAY(keyctl_options);
398
399 static const char *whences[] = { "SET", "CUR", "END",
400 #ifdef SEEK_DATA
401 "DATA",
402 #endif
403 #ifdef SEEK_HOLE
404 "HOLE",
405 #endif
406 };
407 static DEFINE_STRARRAY(whences);
408
409 static const char *fcntl_cmds[] = {
410         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
411         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
412         "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
413         "GETOWNER_UIDS",
414 };
415 static DEFINE_STRARRAY(fcntl_cmds);
416
417 static const char *fcntl_linux_specific_cmds[] = {
418         "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
419         "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
420         "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
421 };
422
423 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
424
425 static struct strarray *fcntl_cmds_arrays[] = {
426         &strarray__fcntl_cmds,
427         &strarray__fcntl_linux_specific_cmds,
428 };
429
430 static DEFINE_STRARRAYS(fcntl_cmds_arrays);
431
432 static const char *rlimit_resources[] = {
433         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
434         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
435         "RTTIME",
436 };
437 static DEFINE_STRARRAY(rlimit_resources);
438
439 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
440 static DEFINE_STRARRAY(sighow);
441
442 static const char *clockid[] = {
443         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
444         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
445         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
446 };
447 static DEFINE_STRARRAY(clockid);
448
449 static const char *socket_families[] = {
450         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
451         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
452         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
453         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
454         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
455         "ALG", "NFC", "VSOCK",
456 };
457 static DEFINE_STRARRAY(socket_families);
458
459 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
460                                                  struct syscall_arg *arg)
461 {
462         size_t printed = 0;
463         int mode = arg->val;
464
465         if (mode == F_OK) /* 0 */
466                 return scnprintf(bf, size, "F");
467 #define P_MODE(n) \
468         if (mode & n##_OK) { \
469                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
470                 mode &= ~n##_OK; \
471         }
472
473         P_MODE(R);
474         P_MODE(W);
475         P_MODE(X);
476 #undef P_MODE
477
478         if (mode)
479                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
480
481         return printed;
482 }
483
484 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
485
486 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
487                                               struct syscall_arg *arg);
488
489 #define SCA_FILENAME syscall_arg__scnprintf_filename
490
491 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
492                                                 struct syscall_arg *arg)
493 {
494         int printed = 0, flags = arg->val;
495
496 #define P_FLAG(n) \
497         if (flags & O_##n) { \
498                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
499                 flags &= ~O_##n; \
500         }
501
502         P_FLAG(CLOEXEC);
503         P_FLAG(NONBLOCK);
504 #undef P_FLAG
505
506         if (flags)
507                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
508
509         return printed;
510 }
511
512 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
513
514 #ifndef GRND_NONBLOCK
515 #define GRND_NONBLOCK   0x0001
516 #endif
517 #ifndef GRND_RANDOM
518 #define GRND_RANDOM     0x0002
519 #endif
520
521 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
522                                                    struct syscall_arg *arg)
523 {
524         int printed = 0, flags = arg->val;
525
526 #define P_FLAG(n) \
527         if (flags & GRND_##n) { \
528                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
529                 flags &= ~GRND_##n; \
530         }
531
532         P_FLAG(RANDOM);
533         P_FLAG(NONBLOCK);
534 #undef P_FLAG
535
536         if (flags)
537                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
538
539         return printed;
540 }
541
542 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
543
544 #define STRARRAY(name, array) \
545           { .scnprintf  = SCA_STRARRAY, \
546             .parm       = &strarray__##array, }
547
548 #include "trace/beauty/eventfd.c"
549 #include "trace/beauty/flock.c"
550 #include "trace/beauty/futex_op.c"
551 #include "trace/beauty/mmap.c"
552 #include "trace/beauty/mode_t.c"
553 #include "trace/beauty/msg_flags.c"
554 #include "trace/beauty/open_flags.c"
555 #include "trace/beauty/perf_event_open.c"
556 #include "trace/beauty/pid.c"
557 #include "trace/beauty/sched_policy.c"
558 #include "trace/beauty/seccomp.c"
559 #include "trace/beauty/signum.c"
560 #include "trace/beauty/socket_type.c"
561 #include "trace/beauty/waitid_options.c"
562
563 struct syscall_arg_fmt {
564         size_t     (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
565         void       *parm;
566         const char *name;
567         bool       show_zero;
568 };
569
570 static struct syscall_fmt {
571         const char *name;
572         const char *alias;
573         struct syscall_arg_fmt arg[6];
574         u8         nr_args;
575         bool       errpid;
576         bool       timeout;
577         bool       hexret;
578 } syscall_fmts[] = {
579         { .name     = "access",
580           .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
581         { .name     = "arch_prctl", .alias = "prctl", },
582         { .name     = "bpf",
583           .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
584         { .name     = "brk",        .hexret = true,
585           .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
586         { .name     = "clock_gettime",
587           .arg = { [0] = STRARRAY(clk_id, clockid), }, },
588         { .name     = "clone",      .errpid = true, .nr_args = 5,
589           .arg = { [0] = { .name = "flags",         .scnprintf = SCA_CLONE_FLAGS, },
590                    [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
591                    [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
592                    [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
593                    [4] = { .name = "tls",           .scnprintf = SCA_HEX, }, }, },
594         { .name     = "close",
595           .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
596         { .name     = "epoll_ctl",
597           .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
598         { .name     = "eventfd2",
599           .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
600         { .name     = "fchmodat",
601           .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
602         { .name     = "fchownat",
603           .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
604         { .name     = "fcntl",
605           .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
606                            .parm      = &strarrays__fcntl_cmds_arrays,
607                            .show_zero = true, },
608                    [2] = { .scnprintf =  SCA_FCNTL_ARG, /* arg */ }, }, },
609         { .name     = "flock",
610           .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
611         { .name     = "fstat", .alias = "newfstat", },
612         { .name     = "fstatat", .alias = "newfstatat", },
613         { .name     = "futex",
614           .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
615         { .name     = "futimesat",
616           .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
617         { .name     = "getitimer",
618           .arg = { [0] = STRARRAY(which, itimers), }, },
619         { .name     = "getpid",     .errpid = true, },
620         { .name     = "getpgid",    .errpid = true, },
621         { .name     = "getppid",    .errpid = true, },
622         { .name     = "getrandom",
623           .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
624         { .name     = "getrlimit",
625           .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
626         { .name     = "ioctl",
627           .arg = {
628 #if defined(__i386__) || defined(__x86_64__)
629 /*
630  * FIXME: Make this available to all arches.
631  */
632                    [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
633                    [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
634 #else
635                    [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
636 #endif
637         { .name     = "keyctl",
638           .arg = { [0] = STRARRAY(option, keyctl_options), }, },
639         { .name     = "kill",
640           .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
641         { .name     = "linkat",
642           .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
643         { .name     = "lseek",
644           .arg = { [2] = STRARRAY(whence, whences), }, },
645         { .name     = "lstat", .alias = "newlstat", },
646         { .name     = "madvise",
647           .arg = { [0] = { .scnprintf = SCA_HEX,      /* start */ },
648                    [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
649         { .name     = "mkdirat",
650           .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
651         { .name     = "mknodat",
652           .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
653         { .name     = "mlock",
654           .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
655         { .name     = "mlockall",
656           .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
657         { .name     = "mmap",       .hexret = true,
658 /* The standard mmap maps to old_mmap on s390x */
659 #if defined(__s390x__)
660         .alias = "old_mmap",
661 #endif
662           .arg = { [0] = { .scnprintf = SCA_HEX,        /* addr */ },
663                    [2] = { .scnprintf = SCA_MMAP_PROT,  /* prot */ },
664                    [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
665         { .name     = "mprotect",
666           .arg = { [0] = { .scnprintf = SCA_HEX,        /* start */ },
667                    [2] = { .scnprintf = SCA_MMAP_PROT,  /* prot */ }, }, },
668         { .name     = "mq_unlink",
669           .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
670         { .name     = "mremap",     .hexret = true,
671           .arg = { [0] = { .scnprintf = SCA_HEX,          /* addr */ },
672                    [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
673                    [4] = { .scnprintf = SCA_HEX,          /* new_addr */ }, }, },
674         { .name     = "munlock",
675           .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
676         { .name     = "munmap",
677           .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
678         { .name     = "name_to_handle_at",
679           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
680         { .name     = "newfstatat",
681           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
682         { .name     = "open",
683           .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
684         { .name     = "open_by_handle_at",
685           .arg = { [0] = { .scnprintf = SCA_FDAT,       /* dfd */ },
686                    [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
687         { .name     = "openat",
688           .arg = { [0] = { .scnprintf = SCA_FDAT,       /* dfd */ },
689                    [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
690         { .name     = "perf_event_open",
691           .arg = { [2] = { .scnprintf = SCA_INT,        /* cpu */ },
692                    [3] = { .scnprintf = SCA_FD,         /* group_fd */ },
693                    [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
694         { .name     = "pipe2",
695           .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
696         { .name     = "pkey_alloc",
697           .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS,   /* access_rights */ }, }, },
698         { .name     = "pkey_free",
699           .arg = { [0] = { .scnprintf = SCA_INT,        /* key */ }, }, },
700         { .name     = "pkey_mprotect",
701           .arg = { [0] = { .scnprintf = SCA_HEX,        /* start */ },
702                    [2] = { .scnprintf = SCA_MMAP_PROT,  /* prot */ },
703                    [3] = { .scnprintf = SCA_INT,        /* pkey */ }, }, },
704         { .name     = "poll", .timeout = true, },
705         { .name     = "ppoll", .timeout = true, },
706         { .name     = "pread", .alias = "pread64", },
707         { .name     = "preadv", .alias = "pread", },
708         { .name     = "prlimit64",
709           .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
710         { .name     = "pwrite", .alias = "pwrite64", },
711         { .name     = "readlinkat",
712           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
713         { .name     = "recvfrom",
714           .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
715         { .name     = "recvmmsg",
716           .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
717         { .name     = "recvmsg",
718           .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
719         { .name     = "renameat",
720           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
721         { .name     = "rt_sigaction",
722           .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
723         { .name     = "rt_sigprocmask",
724           .arg = { [0] = STRARRAY(how, sighow), }, },
725         { .name     = "rt_sigqueueinfo",
726           .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
727         { .name     = "rt_tgsigqueueinfo",
728           .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
729         { .name     = "sched_setscheduler",
730           .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
731         { .name     = "seccomp",
732           .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP,    /* op */ },
733                    [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
734         { .name     = "select", .timeout = true, },
735         { .name     = "sendmmsg",
736           .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
737         { .name     = "sendmsg",
738           .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
739         { .name     = "sendto",
740           .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
741         { .name     = "set_tid_address", .errpid = true, },
742         { .name     = "setitimer",
743           .arg = { [0] = STRARRAY(which, itimers), }, },
744         { .name     = "setrlimit",
745           .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
746         { .name     = "socket",
747           .arg = { [0] = STRARRAY(family, socket_families),
748                    [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
749         { .name     = "socketpair",
750           .arg = { [0] = STRARRAY(family, socket_families),
751                    [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
752         { .name     = "stat", .alias = "newstat", },
753         { .name     = "statx",
754           .arg = { [0] = { .scnprintf = SCA_FDAT,        /* fdat */ },
755                    [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
756                    [3] = { .scnprintf = SCA_STATX_MASK,  /* mask */ }, }, },
757         { .name     = "swapoff",
758           .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
759         { .name     = "swapon",
760           .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
761         { .name     = "symlinkat",
762           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
763         { .name     = "tgkill",
764           .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
765         { .name     = "tkill",
766           .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
767         { .name     = "uname", .alias = "newuname", },
768         { .name     = "unlinkat",
769           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
770         { .name     = "utimensat",
771           .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
772         { .name     = "wait4",      .errpid = true,
773           .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
774         { .name     = "waitid",     .errpid = true,
775           .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
776 };
777
778 static int syscall_fmt__cmp(const void *name, const void *fmtp)
779 {
780         const struct syscall_fmt *fmt = fmtp;
781         return strcmp(name, fmt->name);
782 }
783
784 static struct syscall_fmt *syscall_fmt__find(const char *name)
785 {
786         const int nmemb = ARRAY_SIZE(syscall_fmts);
787         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
788 }
789
790 struct syscall {
791         struct event_format *tp_format;
792         int                 nr_args;
793         struct format_field *args;
794         const char          *name;
795         bool                is_exit;
796         struct syscall_fmt  *fmt;
797         struct syscall_arg_fmt *arg_fmt;
798 };
799
800 /*
801  * We need to have this 'calculated' boolean because in some cases we really
802  * don't know what is the duration of a syscall, for instance, when we start
803  * a session and some threads are waiting for a syscall to finish, say 'poll',
804  * in which case all we can do is to print "( ? ) for duration and for the
805  * start timestamp.
806  */
807 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
808 {
809         double duration = (double)t / NSEC_PER_MSEC;
810         size_t printed = fprintf(fp, "(");
811
812         if (!calculated)
813                 printed += fprintf(fp, "     ?   ");
814         else if (duration >= 1.0)
815                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
816         else if (duration >= 0.01)
817                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
818         else
819                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
820         return printed + fprintf(fp, "): ");
821 }
822
823 /**
824  * filename.ptr: The filename char pointer that will be vfs_getname'd
825  * filename.entry_str_pos: Where to insert the string translated from
826  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
827  * ret_scnprintf: syscall args may set this to a different syscall return
828  *                formatter, for instance, fcntl may return fds, file flags, etc.
829  */
830 struct thread_trace {
831         u64               entry_time;
832         bool              entry_pending;
833         unsigned long     nr_events;
834         unsigned long     pfmaj, pfmin;
835         char              *entry_str;
836         double            runtime_ms;
837         size_t            (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
838         struct {
839                 unsigned long ptr;
840                 short int     entry_str_pos;
841                 bool          pending_open;
842                 unsigned int  namelen;
843                 char          *name;
844         } filename;
845         struct {
846                 int       max;
847                 char      **table;
848         } paths;
849
850         struct intlist *syscall_stats;
851 };
852
853 static struct thread_trace *thread_trace__new(void)
854 {
855         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
856
857         if (ttrace)
858                 ttrace->paths.max = -1;
859
860         ttrace->syscall_stats = intlist__new(NULL);
861
862         return ttrace;
863 }
864
865 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
866 {
867         struct thread_trace *ttrace;
868
869         if (thread == NULL)
870                 goto fail;
871
872         if (thread__priv(thread) == NULL)
873                 thread__set_priv(thread, thread_trace__new());
874
875         if (thread__priv(thread) == NULL)
876                 goto fail;
877
878         ttrace = thread__priv(thread);
879         ++ttrace->nr_events;
880
881         return ttrace;
882 fail:
883         color_fprintf(fp, PERF_COLOR_RED,
884                       "WARNING: not enough memory, dropping samples!\n");
885         return NULL;
886 }
887
888
889 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
890                                     size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
891 {
892         struct thread_trace *ttrace = thread__priv(arg->thread);
893
894         ttrace->ret_scnprintf = ret_scnprintf;
895 }
896
897 #define TRACE_PFMAJ             (1 << 0)
898 #define TRACE_PFMIN             (1 << 1)
899
900 static const size_t trace__entry_str_size = 2048;
901
902 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
903 {
904         struct thread_trace *ttrace = thread__priv(thread);
905
906         if (fd > ttrace->paths.max) {
907                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
908
909                 if (npath == NULL)
910                         return -1;
911
912                 if (ttrace->paths.max != -1) {
913                         memset(npath + ttrace->paths.max + 1, 0,
914                                (fd - ttrace->paths.max) * sizeof(char *));
915                 } else {
916                         memset(npath, 0, (fd + 1) * sizeof(char *));
917                 }
918
919                 ttrace->paths.table = npath;
920                 ttrace->paths.max   = fd;
921         }
922
923         ttrace->paths.table[fd] = strdup(pathname);
924
925         return ttrace->paths.table[fd] != NULL ? 0 : -1;
926 }
927
928 static int thread__read_fd_path(struct thread *thread, int fd)
929 {
930         char linkname[PATH_MAX], pathname[PATH_MAX];
931         struct stat st;
932         int ret;
933
934         if (thread->pid_ == thread->tid) {
935                 scnprintf(linkname, sizeof(linkname),
936                           "/proc/%d/fd/%d", thread->pid_, fd);
937         } else {
938                 scnprintf(linkname, sizeof(linkname),
939                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
940         }
941
942         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
943                 return -1;
944
945         ret = readlink(linkname, pathname, sizeof(pathname));
946
947         if (ret < 0 || ret > st.st_size)
948                 return -1;
949
950         pathname[ret] = '\0';
951         return trace__set_fd_pathname(thread, fd, pathname);
952 }
953
954 static const char *thread__fd_path(struct thread *thread, int fd,
955                                    struct trace *trace)
956 {
957         struct thread_trace *ttrace = thread__priv(thread);
958
959         if (ttrace == NULL)
960                 return NULL;
961
962         if (fd < 0)
963                 return NULL;
964
965         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
966                 if (!trace->live)
967                         return NULL;
968                 ++trace->stats.proc_getname;
969                 if (thread__read_fd_path(thread, fd))
970                         return NULL;
971         }
972
973         return ttrace->paths.table[fd];
974 }
975
976 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
977 {
978         int fd = arg->val;
979         size_t printed = scnprintf(bf, size, "%d", fd);
980         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
981
982         if (path)
983                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
984
985         return printed;
986 }
987
988 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
989                                               struct syscall_arg *arg)
990 {
991         int fd = arg->val;
992         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
993         struct thread_trace *ttrace = thread__priv(arg->thread);
994
995         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
996                 zfree(&ttrace->paths.table[fd]);
997
998         return printed;
999 }
1000
1001 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1002                                      unsigned long ptr)
1003 {
1004         struct thread_trace *ttrace = thread__priv(thread);
1005
1006         ttrace->filename.ptr = ptr;
1007         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1008 }
1009
1010 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1011                                               struct syscall_arg *arg)
1012 {
1013         unsigned long ptr = arg->val;
1014
1015         if (!arg->trace->vfs_getname)
1016                 return scnprintf(bf, size, "%#x", ptr);
1017
1018         thread__set_filename_pos(arg->thread, bf, ptr);
1019         return 0;
1020 }
1021
1022 static bool trace__filter_duration(struct trace *trace, double t)
1023 {
1024         return t < (trace->duration_filter * NSEC_PER_MSEC);
1025 }
1026
1027 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1028 {
1029         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1030
1031         return fprintf(fp, "%10.3f ", ts);
1032 }
1033
1034 /*
1035  * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1036  * using ttrace->entry_time for a thread that receives a sys_exit without
1037  * first having received a sys_enter ("poll" issued before tracing session
1038  * starts, lost sys_enter exit due to ring buffer overflow).
1039  */
1040 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1041 {
1042         if (tstamp > 0)
1043                 return __trace__fprintf_tstamp(trace, tstamp, fp);
1044
1045         return fprintf(fp, "         ? ");
1046 }
1047
1048 static bool done = false;
1049 static bool interrupted = false;
1050
1051 static void sig_handler(int sig)
1052 {
1053         done = true;
1054         interrupted = sig == SIGINT;
1055 }
1056
1057 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1058                                         u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1059 {
1060         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1061         printed += fprintf_duration(duration, duration_calculated, fp);
1062
1063         if (trace->multiple_threads) {
1064                 if (trace->show_comm)
1065                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1066                 printed += fprintf(fp, "%d ", thread->tid);
1067         }
1068
1069         return printed;
1070 }
1071
1072 static int trace__process_event(struct trace *trace, struct machine *machine,
1073                                 union perf_event *event, struct perf_sample *sample)
1074 {
1075         int ret = 0;
1076
1077         switch (event->header.type) {
1078         case PERF_RECORD_LOST:
1079                 color_fprintf(trace->output, PERF_COLOR_RED,
1080                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1081                 ret = machine__process_lost_event(machine, event, sample);
1082                 break;
1083         default:
1084                 ret = machine__process_event(machine, event, sample);
1085                 break;
1086         }
1087
1088         return ret;
1089 }
1090
1091 static int trace__tool_process(struct perf_tool *tool,
1092                                union perf_event *event,
1093                                struct perf_sample *sample,
1094                                struct machine *machine)
1095 {
1096         struct trace *trace = container_of(tool, struct trace, tool);
1097         return trace__process_event(trace, machine, event, sample);
1098 }
1099
1100 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1101 {
1102         struct machine *machine = vmachine;
1103
1104         if (machine->kptr_restrict_warned)
1105                 return NULL;
1106
1107         if (symbol_conf.kptr_restrict) {
1108                 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1109                            "Check /proc/sys/kernel/kptr_restrict.\n\n"
1110                            "Kernel samples will not be resolved.\n");
1111                 machine->kptr_restrict_warned = true;
1112                 return NULL;
1113         }
1114
1115         return machine__resolve_kernel_addr(vmachine, addrp, modp);
1116 }
1117
1118 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1119 {
1120         int err = symbol__init(NULL);
1121
1122         if (err)
1123                 return err;
1124
1125         trace->host = machine__new_host();
1126         if (trace->host == NULL)
1127                 return -ENOMEM;
1128
1129         if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1130                 return -errno;
1131
1132         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1133                                             evlist->threads, trace__tool_process, false,
1134                                             trace->opts.proc_map_timeout);
1135         if (err)
1136                 symbol__exit();
1137
1138         return err;
1139 }
1140
1141 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1142 {
1143         int idx;
1144
1145         if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1146                 nr_args = sc->fmt->nr_args;
1147
1148         sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1149         if (sc->arg_fmt == NULL)
1150                 return -1;
1151
1152         for (idx = 0; idx < nr_args; ++idx) {
1153                 if (sc->fmt)
1154                         sc->arg_fmt[idx] = sc->fmt->arg[idx];
1155         }
1156
1157         sc->nr_args = nr_args;
1158         return 0;
1159 }
1160
1161 static int syscall__set_arg_fmts(struct syscall *sc)
1162 {
1163         struct format_field *field;
1164         int idx = 0, len;
1165
1166         for (field = sc->args; field; field = field->next, ++idx) {
1167                 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1168                         continue;
1169
1170                 if (strcmp(field->type, "const char *") == 0 &&
1171                          (strcmp(field->name, "filename") == 0 ||
1172                           strcmp(field->name, "path") == 0 ||
1173                           strcmp(field->name, "pathname") == 0))
1174                         sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1175                 else if (field->flags & FIELD_IS_POINTER)
1176                         sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
1177                 else if (strcmp(field->type, "pid_t") == 0)
1178                         sc->arg_fmt[idx].scnprintf = SCA_PID;
1179                 else if (strcmp(field->type, "umode_t") == 0)
1180                         sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1181                 else if ((strcmp(field->type, "int") == 0 ||
1182                           strcmp(field->type, "unsigned int") == 0 ||
1183                           strcmp(field->type, "long") == 0) &&
1184                          (len = strlen(field->name)) >= 2 &&
1185                          strcmp(field->name + len - 2, "fd") == 0) {
1186                         /*
1187                          * /sys/kernel/tracing/events/syscalls/sys_enter*
1188                          * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1189                          * 65 int
1190                          * 23 unsigned int
1191                          * 7 unsigned long
1192                          */
1193                         sc->arg_fmt[idx].scnprintf = SCA_FD;
1194                 }
1195         }
1196
1197         return 0;
1198 }
1199
1200 static int trace__read_syscall_info(struct trace *trace, int id)
1201 {
1202         char tp_name[128];
1203         struct syscall *sc;
1204         const char *name = syscalltbl__name(trace->sctbl, id);
1205
1206         if (name == NULL)
1207                 return -1;
1208
1209         if (id > trace->syscalls.max) {
1210                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1211
1212                 if (nsyscalls == NULL)
1213                         return -1;
1214
1215                 if (trace->syscalls.max != -1) {
1216                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1217                                (id - trace->syscalls.max) * sizeof(*sc));
1218                 } else {
1219                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1220                 }
1221
1222                 trace->syscalls.table = nsyscalls;
1223                 trace->syscalls.max   = id;
1224         }
1225
1226         sc = trace->syscalls.table + id;
1227         sc->name = name;
1228
1229         sc->fmt  = syscall_fmt__find(sc->name);
1230
1231         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1232         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1233
1234         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1235                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1236                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1237         }
1238
1239         if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1240                 return -1;
1241
1242         if (IS_ERR(sc->tp_format))
1243                 return -1;
1244
1245         sc->args = sc->tp_format->format.fields;
1246         /*
1247          * We need to check and discard the first variable '__syscall_nr'
1248          * or 'nr' that mean the syscall number. It is needless here.
1249          * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1250          */
1251         if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1252                 sc->args = sc->args->next;
1253                 --sc->nr_args;
1254         }
1255
1256         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1257
1258         return syscall__set_arg_fmts(sc);
1259 }
1260
1261 static int trace__validate_ev_qualifier(struct trace *trace)
1262 {
1263         int err = 0, i;
1264         size_t nr_allocated;
1265         struct str_node *pos;
1266
1267         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1268         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1269                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1270
1271         if (trace->ev_qualifier_ids.entries == NULL) {
1272                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1273                        trace->output);
1274                 err = -EINVAL;
1275                 goto out;
1276         }
1277
1278         nr_allocated = trace->ev_qualifier_ids.nr;
1279         i = 0;
1280
1281         strlist__for_each_entry(pos, trace->ev_qualifier) {
1282                 const char *sc = pos->s;
1283                 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1284
1285                 if (id < 0) {
1286                         id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1287                         if (id >= 0)
1288                                 goto matches;
1289
1290                         if (err == 0) {
1291                                 fputs("Error:\tInvalid syscall ", trace->output);
1292                                 err = -EINVAL;
1293                         } else {
1294                                 fputs(", ", trace->output);
1295                         }
1296
1297                         fputs(sc, trace->output);
1298                 }
1299 matches:
1300                 trace->ev_qualifier_ids.entries[i++] = id;
1301                 if (match_next == -1)
1302                         continue;
1303
1304                 while (1) {
1305                         id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1306                         if (id < 0)
1307                                 break;
1308                         if (nr_allocated == trace->ev_qualifier_ids.nr) {
1309                                 void *entries;
1310
1311                                 nr_allocated += 8;
1312                                 entries = realloc(trace->ev_qualifier_ids.entries,
1313                                                   nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1314                                 if (entries == NULL) {
1315                                         err = -ENOMEM;
1316                                         fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1317                                         goto out_free;
1318                                 }
1319                                 trace->ev_qualifier_ids.entries = entries;
1320                         }
1321                         trace->ev_qualifier_ids.nr++;
1322                         trace->ev_qualifier_ids.entries[i++] = id;
1323                 }
1324         }
1325
1326         if (err < 0) {
1327                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1328                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1329 out_free:
1330                 zfree(&trace->ev_qualifier_ids.entries);
1331                 trace->ev_qualifier_ids.nr = 0;
1332         }
1333 out:
1334         return err;
1335 }
1336
1337 /*
1338  * args is to be interpreted as a series of longs but we need to handle
1339  * 8-byte unaligned accesses. args points to raw_data within the event
1340  * and raw_data is guaranteed to be 8-byte unaligned because it is
1341  * preceded by raw_size which is a u32. So we need to copy args to a temp
1342  * variable to read it. Most notably this avoids extended load instructions
1343  * on unaligned addresses
1344  */
1345 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1346 {
1347         unsigned long val;
1348         unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1349
1350         memcpy(&val, p, sizeof(val));
1351         return val;
1352 }
1353
1354 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1355                                       struct syscall_arg *arg)
1356 {
1357         if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1358                 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1359
1360         return scnprintf(bf, size, "arg%d: ", arg->idx);
1361 }
1362
1363 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1364                                      struct syscall_arg *arg, unsigned long val)
1365 {
1366         if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1367                 arg->val = val;
1368                 if (sc->arg_fmt[arg->idx].parm)
1369                         arg->parm = sc->arg_fmt[arg->idx].parm;
1370                 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1371         }
1372         return scnprintf(bf, size, "%ld", val);
1373 }
1374
1375 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1376                                       unsigned char *args, struct trace *trace,
1377                                       struct thread *thread)
1378 {
1379         size_t printed = 0;
1380         unsigned long val;
1381         u8 bit = 1;
1382         struct syscall_arg arg = {
1383                 .args   = args,
1384                 .idx    = 0,
1385                 .mask   = 0,
1386                 .trace  = trace,
1387                 .thread = thread,
1388         };
1389         struct thread_trace *ttrace = thread__priv(thread);
1390
1391         /*
1392          * Things like fcntl will set this in its 'cmd' formatter to pick the
1393          * right formatter for the return value (an fd? file flags?), which is
1394          * not needed for syscalls that always return a given type, say an fd.
1395          */
1396         ttrace->ret_scnprintf = NULL;
1397
1398         if (sc->args != NULL) {
1399                 struct format_field *field;
1400
1401                 for (field = sc->args; field;
1402                      field = field->next, ++arg.idx, bit <<= 1) {
1403                         if (arg.mask & bit)
1404                                 continue;
1405
1406                         val = syscall_arg__val(&arg, arg.idx);
1407
1408                         /*
1409                          * Suppress this argument if its value is zero and
1410                          * and we don't have a string associated in an
1411                          * strarray for it.
1412                          */
1413                         if (val == 0 &&
1414                             !(sc->arg_fmt &&
1415                               (sc->arg_fmt[arg.idx].show_zero ||
1416                                sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
1417                                sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1418                               sc->arg_fmt[arg.idx].parm))
1419                                 continue;
1420
1421                         printed += scnprintf(bf + printed, size - printed,
1422                                              "%s%s: ", printed ? ", " : "", field->name);
1423                         printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1424                 }
1425         } else if (IS_ERR(sc->tp_format)) {
1426                 /*
1427                  * If we managed to read the tracepoint /format file, then we
1428                  * may end up not having any args, like with gettid(), so only
1429                  * print the raw args when we didn't manage to read it.
1430                  */
1431                 while (arg.idx < sc->nr_args) {
1432                         if (arg.mask & bit)
1433                                 goto next_arg;
1434                         val = syscall_arg__val(&arg, arg.idx);
1435                         if (printed)
1436                                 printed += scnprintf(bf + printed, size - printed, ", ");
1437                         printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1438                         printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1439 next_arg:
1440                         ++arg.idx;
1441                         bit <<= 1;
1442                 }
1443         }
1444
1445         return printed;
1446 }
1447
1448 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1449                                   union perf_event *event,
1450                                   struct perf_sample *sample);
1451
1452 static struct syscall *trace__syscall_info(struct trace *trace,
1453                                            struct perf_evsel *evsel, int id)
1454 {
1455
1456         if (id < 0) {
1457
1458                 /*
1459                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1460                  * before that, leaving at a higher verbosity level till that is
1461                  * explained. Reproduced with plain ftrace with:
1462                  *
1463                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1464                  * grep "NR -1 " /t/trace_pipe
1465                  *
1466                  * After generating some load on the machine.
1467                  */
1468                 if (verbose > 1) {
1469                         static u64 n;
1470                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1471                                 id, perf_evsel__name(evsel), ++n);
1472                 }
1473                 return NULL;
1474         }
1475
1476         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1477             trace__read_syscall_info(trace, id))
1478                 goto out_cant_read;
1479
1480         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1481                 goto out_cant_read;
1482
1483         return &trace->syscalls.table[id];
1484
1485 out_cant_read:
1486         if (verbose > 0) {
1487                 fprintf(trace->output, "Problems reading syscall %d", id);
1488                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1489                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1490                 fputs(" information\n", trace->output);
1491         }
1492         return NULL;
1493 }
1494
1495 static void thread__update_stats(struct thread_trace *ttrace,
1496                                  int id, struct perf_sample *sample)
1497 {
1498         struct int_node *inode;
1499         struct stats *stats;
1500         u64 duration = 0;
1501
1502         inode = intlist__findnew(ttrace->syscall_stats, id);
1503         if (inode == NULL)
1504                 return;
1505
1506         stats = inode->priv;
1507         if (stats == NULL) {
1508                 stats = malloc(sizeof(struct stats));
1509                 if (stats == NULL)
1510                         return;
1511                 init_stats(stats);
1512                 inode->priv = stats;
1513         }
1514
1515         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1516                 duration = sample->time - ttrace->entry_time;
1517
1518         update_stats(stats, duration);
1519 }
1520
1521 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1522 {
1523         struct thread_trace *ttrace;
1524         u64 duration;
1525         size_t printed;
1526
1527         if (trace->current == NULL)
1528                 return 0;
1529
1530         ttrace = thread__priv(trace->current);
1531
1532         if (!ttrace->entry_pending)
1533                 return 0;
1534
1535         duration = sample->time - ttrace->entry_time;
1536
1537         printed  = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1538         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1539         ttrace->entry_pending = false;
1540
1541         return printed;
1542 }
1543
1544 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1545                             union perf_event *event __maybe_unused,
1546                             struct perf_sample *sample)
1547 {
1548         char *msg;
1549         void *args;
1550         size_t printed = 0;
1551         struct thread *thread;
1552         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1553         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1554         struct thread_trace *ttrace;
1555
1556         if (sc == NULL)
1557                 return -1;
1558
1559         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1560         ttrace = thread__trace(thread, trace->output);
1561         if (ttrace == NULL)
1562                 goto out_put;
1563
1564         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1565
1566         if (ttrace->entry_str == NULL) {
1567                 ttrace->entry_str = malloc(trace__entry_str_size);
1568                 if (!ttrace->entry_str)
1569                         goto out_put;
1570         }
1571
1572         if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1573                 trace__printf_interrupted_entry(trace, sample);
1574
1575         ttrace->entry_time = sample->time;
1576         msg = ttrace->entry_str;
1577         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1578
1579         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1580                                            args, trace, thread);
1581
1582         if (sc->is_exit) {
1583                 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1584                         trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1585                         fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1586                 }
1587         } else {
1588                 ttrace->entry_pending = true;
1589                 /* See trace__vfs_getname & trace__sys_exit */
1590                 ttrace->filename.pending_open = false;
1591         }
1592
1593         if (trace->current != thread) {
1594                 thread__put(trace->current);
1595                 trace->current = thread__get(thread);
1596         }
1597         err = 0;
1598 out_put:
1599         thread__put(thread);
1600         return err;
1601 }
1602
1603 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1604                                     struct perf_sample *sample,
1605                                     struct callchain_cursor *cursor)
1606 {
1607         struct addr_location al;
1608
1609         if (machine__resolve(trace->host, &al, sample) < 0 ||
1610             thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1611                 return -1;
1612
1613         return 0;
1614 }
1615
1616 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1617 {
1618         /* TODO: user-configurable print_opts */
1619         const unsigned int print_opts = EVSEL__PRINT_SYM |
1620                                         EVSEL__PRINT_DSO |
1621                                         EVSEL__PRINT_UNKNOWN_AS_ADDR;
1622
1623         return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1624 }
1625
1626 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1627                            union perf_event *event __maybe_unused,
1628                            struct perf_sample *sample)
1629 {
1630         long ret;
1631         u64 duration = 0;
1632         bool duration_calculated = false;
1633         struct thread *thread;
1634         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1635         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1636         struct thread_trace *ttrace;
1637
1638         if (sc == NULL)
1639                 return -1;
1640
1641         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1642         ttrace = thread__trace(thread, trace->output);
1643         if (ttrace == NULL)
1644                 goto out_put;
1645
1646         if (trace->summary)
1647                 thread__update_stats(ttrace, id, sample);
1648
1649         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1650
1651         if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1652                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1653                 ttrace->filename.pending_open = false;
1654                 ++trace->stats.vfs_getname;
1655         }
1656
1657         if (ttrace->entry_time) {
1658                 duration = sample->time - ttrace->entry_time;
1659                 if (trace__filter_duration(trace, duration))
1660                         goto out;
1661                 duration_calculated = true;
1662         } else if (trace->duration_filter)
1663                 goto out;
1664
1665         if (sample->callchain) {
1666                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1667                 if (callchain_ret == 0) {
1668                         if (callchain_cursor.nr < trace->min_stack)
1669                                 goto out;
1670                         callchain_ret = 1;
1671                 }
1672         }
1673
1674         if (trace->summary_only)
1675                 goto out;
1676
1677         trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1678
1679         if (ttrace->entry_pending) {
1680                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1681         } else {
1682                 fprintf(trace->output, " ... [");
1683                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1684                 fprintf(trace->output, "]: %s()", sc->name);
1685         }
1686
1687         if (sc->fmt == NULL) {
1688                 if (ret < 0)
1689                         goto errno_print;
1690 signed_print:
1691                 fprintf(trace->output, ") = %ld", ret);
1692         } else if (ret < 0) {
1693 errno_print: {
1694                 char bf[STRERR_BUFSIZE];
1695                 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1696                            *e = audit_errno_to_name(-ret);
1697
1698                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1699         }
1700         } else if (ret == 0 && sc->fmt->timeout)
1701                 fprintf(trace->output, ") = 0 Timeout");
1702         else if (ttrace->ret_scnprintf) {
1703                 char bf[1024];
1704                 struct syscall_arg arg = {
1705                         .val    = ret,
1706                         .thread = thread,
1707                         .trace  = trace,
1708                 };
1709                 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
1710                 ttrace->ret_scnprintf = NULL;
1711                 fprintf(trace->output, ") = %s", bf);
1712         } else if (sc->fmt->hexret)
1713                 fprintf(trace->output, ") = %#lx", ret);
1714         else if (sc->fmt->errpid) {
1715                 struct thread *child = machine__find_thread(trace->host, ret, ret);
1716
1717                 if (child != NULL) {
1718                         fprintf(trace->output, ") = %ld", ret);
1719                         if (child->comm_set)
1720                                 fprintf(trace->output, " (%s)", thread__comm_str(child));
1721                         thread__put(child);
1722                 }
1723         } else
1724                 goto signed_print;
1725
1726         fputc('\n', trace->output);
1727
1728         if (callchain_ret > 0)
1729                 trace__fprintf_callchain(trace, sample);
1730         else if (callchain_ret < 0)
1731                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1732 out:
1733         ttrace->entry_pending = false;
1734         err = 0;
1735 out_put:
1736         thread__put(thread);
1737         return err;
1738 }
1739
1740 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1741                               union perf_event *event __maybe_unused,
1742                               struct perf_sample *sample)
1743 {
1744         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1745         struct thread_trace *ttrace;
1746         size_t filename_len, entry_str_len, to_move;
1747         ssize_t remaining_space;
1748         char *pos;
1749         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1750
1751         if (!thread)
1752                 goto out;
1753
1754         ttrace = thread__priv(thread);
1755         if (!ttrace)
1756                 goto out_put;
1757
1758         filename_len = strlen(filename);
1759         if (filename_len == 0)
1760                 goto out_put;
1761
1762         if (ttrace->filename.namelen < filename_len) {
1763                 char *f = realloc(ttrace->filename.name, filename_len + 1);
1764
1765                 if (f == NULL)
1766                         goto out_put;
1767
1768                 ttrace->filename.namelen = filename_len;
1769                 ttrace->filename.name = f;
1770         }
1771
1772         strcpy(ttrace->filename.name, filename);
1773         ttrace->filename.pending_open = true;
1774
1775         if (!ttrace->filename.ptr)
1776                 goto out_put;
1777
1778         entry_str_len = strlen(ttrace->entry_str);
1779         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1780         if (remaining_space <= 0)
1781                 goto out_put;
1782
1783         if (filename_len > (size_t)remaining_space) {
1784                 filename += filename_len - remaining_space;
1785                 filename_len = remaining_space;
1786         }
1787
1788         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1789         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1790         memmove(pos + filename_len, pos, to_move);
1791         memcpy(pos, filename, filename_len);
1792
1793         ttrace->filename.ptr = 0;
1794         ttrace->filename.entry_str_pos = 0;
1795 out_put:
1796         thread__put(thread);
1797 out:
1798         return 0;
1799 }
1800
1801 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1802                                      union perf_event *event __maybe_unused,
1803                                      struct perf_sample *sample)
1804 {
1805         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1806         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1807         struct thread *thread = machine__findnew_thread(trace->host,
1808                                                         sample->pid,
1809                                                         sample->tid);
1810         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1811
1812         if (ttrace == NULL)
1813                 goto out_dump;
1814
1815         ttrace->runtime_ms += runtime_ms;
1816         trace->runtime_ms += runtime_ms;
1817 out_put:
1818         thread__put(thread);
1819         return 0;
1820
1821 out_dump:
1822         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1823                evsel->name,
1824                perf_evsel__strval(evsel, sample, "comm"),
1825                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1826                runtime,
1827                perf_evsel__intval(evsel, sample, "vruntime"));
1828         goto out_put;
1829 }
1830
1831 static void bpf_output__printer(enum binary_printer_ops op,
1832                                 unsigned int val, void *extra)
1833 {
1834         FILE *output = extra;
1835         unsigned char ch = (unsigned char)val;
1836
1837         switch (op) {
1838         case BINARY_PRINT_CHAR_DATA:
1839                 fprintf(output, "%c", isprint(ch) ? ch : '.');
1840                 break;
1841         case BINARY_PRINT_DATA_BEGIN:
1842         case BINARY_PRINT_LINE_BEGIN:
1843         case BINARY_PRINT_ADDR:
1844         case BINARY_PRINT_NUM_DATA:
1845         case BINARY_PRINT_NUM_PAD:
1846         case BINARY_PRINT_SEP:
1847         case BINARY_PRINT_CHAR_PAD:
1848         case BINARY_PRINT_LINE_END:
1849         case BINARY_PRINT_DATA_END:
1850         default:
1851                 break;
1852         }
1853 }
1854
1855 static void bpf_output__fprintf(struct trace *trace,
1856                                 struct perf_sample *sample)
1857 {
1858         print_binary(sample->raw_data, sample->raw_size, 8,
1859                      bpf_output__printer, trace->output);
1860 }
1861
1862 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1863                                 union perf_event *event __maybe_unused,
1864                                 struct perf_sample *sample)
1865 {
1866         int callchain_ret = 0;
1867
1868         if (sample->callchain) {
1869                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1870                 if (callchain_ret == 0) {
1871                         if (callchain_cursor.nr < trace->min_stack)
1872                                 goto out;
1873                         callchain_ret = 1;
1874                 }
1875         }
1876
1877         trace__printf_interrupted_entry(trace, sample);
1878         trace__fprintf_tstamp(trace, sample->time, trace->output);
1879
1880         if (trace->trace_syscalls)
1881                 fprintf(trace->output, "(         ): ");
1882
1883         fprintf(trace->output, "%s:", evsel->name);
1884
1885         if (perf_evsel__is_bpf_output(evsel)) {
1886                 bpf_output__fprintf(trace, sample);
1887         } else if (evsel->tp_format) {
1888                 event_format__fprintf(evsel->tp_format, sample->cpu,
1889                                       sample->raw_data, sample->raw_size,
1890                                       trace->output);
1891         }
1892
1893         fprintf(trace->output, ")\n");
1894
1895         if (callchain_ret > 0)
1896                 trace__fprintf_callchain(trace, sample);
1897         else if (callchain_ret < 0)
1898                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1899 out:
1900         return 0;
1901 }
1902
1903 static void print_location(FILE *f, struct perf_sample *sample,
1904                            struct addr_location *al,
1905                            bool print_dso, bool print_sym)
1906 {
1907
1908         if ((verbose > 0 || print_dso) && al->map)
1909                 fprintf(f, "%s@", al->map->dso->long_name);
1910
1911         if ((verbose > 0 || print_sym) && al->sym)
1912                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1913                         al->addr - al->sym->start);
1914         else if (al->map)
1915                 fprintf(f, "0x%" PRIx64, al->addr);
1916         else
1917                 fprintf(f, "0x%" PRIx64, sample->addr);
1918 }
1919
1920 static int trace__pgfault(struct trace *trace,
1921                           struct perf_evsel *evsel,
1922                           union perf_event *event __maybe_unused,
1923                           struct perf_sample *sample)
1924 {
1925         struct thread *thread;
1926         struct addr_location al;
1927         char map_type = 'd';
1928         struct thread_trace *ttrace;
1929         int err = -1;
1930         int callchain_ret = 0;
1931
1932         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1933
1934         if (sample->callchain) {
1935                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1936                 if (callchain_ret == 0) {
1937                         if (callchain_cursor.nr < trace->min_stack)
1938                                 goto out_put;
1939                         callchain_ret = 1;
1940                 }
1941         }
1942
1943         ttrace = thread__trace(thread, trace->output);
1944         if (ttrace == NULL)
1945                 goto out_put;
1946
1947         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1948                 ttrace->pfmaj++;
1949         else
1950                 ttrace->pfmin++;
1951
1952         if (trace->summary_only)
1953                 goto out;
1954
1955         thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1956                               sample->ip, &al);
1957
1958         trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1959
1960         fprintf(trace->output, "%sfault [",
1961                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1962                 "maj" : "min");
1963
1964         print_location(trace->output, sample, &al, false, true);
1965
1966         fprintf(trace->output, "] => ");
1967
1968         thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1969                                    sample->addr, &al);
1970
1971         if (!al.map) {
1972                 thread__find_addr_location(thread, sample->cpumode,
1973                                            MAP__FUNCTION, sample->addr, &al);
1974
1975                 if (al.map)
1976                         map_type = 'x';
1977                 else
1978                         map_type = '?';
1979         }
1980
1981         print_location(trace->output, sample, &al, true, false);
1982
1983         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1984
1985         if (callchain_ret > 0)
1986                 trace__fprintf_callchain(trace, sample);
1987         else if (callchain_ret < 0)
1988                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1989 out:
1990         err = 0;
1991 out_put:
1992         thread__put(thread);
1993         return err;
1994 }
1995
1996 static void trace__set_base_time(struct trace *trace,
1997                                  struct perf_evsel *evsel,
1998                                  struct perf_sample *sample)
1999 {
2000         /*
2001          * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2002          * and don't use sample->time unconditionally, we may end up having
2003          * some other event in the future without PERF_SAMPLE_TIME for good
2004          * reason, i.e. we may not be interested in its timestamps, just in
2005          * it taking place, picking some piece of information when it
2006          * appears in our event stream (vfs_getname comes to mind).
2007          */
2008         if (trace->base_time == 0 && !trace->full_time &&
2009             (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2010                 trace->base_time = sample->time;
2011 }
2012
2013 static int trace__process_sample(struct perf_tool *tool,
2014                                  union perf_event *event,
2015                                  struct perf_sample *sample,
2016                                  struct perf_evsel *evsel,
2017                                  struct machine *machine __maybe_unused)
2018 {
2019         struct trace *trace = container_of(tool, struct trace, tool);
2020         struct thread *thread;
2021         int err = 0;
2022
2023         tracepoint_handler handler = evsel->handler;
2024
2025         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2026         if (thread && thread__is_filtered(thread))
2027                 goto out;
2028
2029         trace__set_base_time(trace, evsel, sample);
2030
2031         if (handler) {
2032                 ++trace->nr_events;
2033                 handler(trace, evsel, event, sample);
2034         }
2035 out:
2036         thread__put(thread);
2037         return err;
2038 }
2039
2040 static int trace__record(struct trace *trace, int argc, const char **argv)
2041 {
2042         unsigned int rec_argc, i, j;
2043         const char **rec_argv;
2044         const char * const record_args[] = {
2045                 "record",
2046                 "-R",
2047                 "-m", "1024",
2048                 "-c", "1",
2049         };
2050
2051         const char * const sc_args[] = { "-e", };
2052         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2053         const char * const majpf_args[] = { "-e", "major-faults" };
2054         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2055         const char * const minpf_args[] = { "-e", "minor-faults" };
2056         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2057
2058         /* +1 is for the event string below */
2059         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2060                 majpf_args_nr + minpf_args_nr + argc;
2061         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2062
2063         if (rec_argv == NULL)
2064                 return -ENOMEM;
2065
2066         j = 0;
2067         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2068                 rec_argv[j++] = record_args[i];
2069
2070         if (trace->trace_syscalls) {
2071                 for (i = 0; i < sc_args_nr; i++)
2072                         rec_argv[j++] = sc_args[i];
2073
2074                 /* event string may be different for older kernels - e.g., RHEL6 */
2075                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2076                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2077                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2078                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2079                 else {
2080                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2081                         return -1;
2082                 }
2083         }
2084
2085         if (trace->trace_pgfaults & TRACE_PFMAJ)
2086                 for (i = 0; i < majpf_args_nr; i++)
2087                         rec_argv[j++] = majpf_args[i];
2088
2089         if (trace->trace_pgfaults & TRACE_PFMIN)
2090                 for (i = 0; i < minpf_args_nr; i++)
2091                         rec_argv[j++] = minpf_args[i];
2092
2093         for (i = 0; i < (unsigned int)argc; i++)
2094                 rec_argv[j++] = argv[i];
2095
2096         return cmd_record(j, rec_argv);
2097 }
2098
2099 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2100
2101 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2102 {
2103         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2104
2105         if (IS_ERR(evsel))
2106                 return false;
2107
2108         if (perf_evsel__field(evsel, "pathname") == NULL) {
2109                 perf_evsel__delete(evsel);
2110                 return false;
2111         }
2112
2113         evsel->handler = trace__vfs_getname;
2114         perf_evlist__add(evlist, evsel);
2115         return true;
2116 }
2117
2118 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2119 {
2120         struct perf_evsel *evsel;
2121         struct perf_event_attr attr = {
2122                 .type = PERF_TYPE_SOFTWARE,
2123                 .mmap_data = 1,
2124         };
2125
2126         attr.config = config;
2127         attr.sample_period = 1;
2128
2129         event_attr_init(&attr);
2130
2131         evsel = perf_evsel__new(&attr);
2132         if (evsel)
2133                 evsel->handler = trace__pgfault;
2134
2135         return evsel;
2136 }
2137
2138 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2139 {
2140         const u32 type = event->header.type;
2141         struct perf_evsel *evsel;
2142
2143         if (type != PERF_RECORD_SAMPLE) {
2144                 trace__process_event(trace, trace->host, event, sample);
2145                 return;
2146         }
2147
2148         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2149         if (evsel == NULL) {
2150                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2151                 return;
2152         }
2153
2154         trace__set_base_time(trace, evsel, sample);
2155
2156         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2157             sample->raw_data == NULL) {
2158                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2159                        perf_evsel__name(evsel), sample->tid,
2160                        sample->cpu, sample->raw_size);
2161         } else {
2162                 tracepoint_handler handler = evsel->handler;
2163                 handler(trace, evsel, event, sample);
2164         }
2165 }
2166
2167 static int trace__add_syscall_newtp(struct trace *trace)
2168 {
2169         int ret = -1;
2170         struct perf_evlist *evlist = trace->evlist;
2171         struct perf_evsel *sys_enter, *sys_exit;
2172
2173         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2174         if (sys_enter == NULL)
2175                 goto out;
2176
2177         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2178                 goto out_delete_sys_enter;
2179
2180         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2181         if (sys_exit == NULL)
2182                 goto out_delete_sys_enter;
2183
2184         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2185                 goto out_delete_sys_exit;
2186
2187         perf_evlist__add(evlist, sys_enter);
2188         perf_evlist__add(evlist, sys_exit);
2189
2190         if (callchain_param.enabled && !trace->kernel_syscallchains) {
2191                 /*
2192                  * We're interested only in the user space callchain
2193                  * leading to the syscall, allow overriding that for
2194                  * debugging reasons using --kernel_syscall_callchains
2195                  */
2196                 sys_exit->attr.exclude_callchain_kernel = 1;
2197         }
2198
2199         trace->syscalls.events.sys_enter = sys_enter;
2200         trace->syscalls.events.sys_exit  = sys_exit;
2201
2202         ret = 0;
2203 out:
2204         return ret;
2205
2206 out_delete_sys_exit:
2207         perf_evsel__delete_priv(sys_exit);
2208 out_delete_sys_enter:
2209         perf_evsel__delete_priv(sys_enter);
2210         goto out;
2211 }
2212
2213 static int trace__set_ev_qualifier_filter(struct trace *trace)
2214 {
2215         int err = -1;
2216         struct perf_evsel *sys_exit;
2217         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2218                                                 trace->ev_qualifier_ids.nr,
2219                                                 trace->ev_qualifier_ids.entries);
2220
2221         if (filter == NULL)
2222                 goto out_enomem;
2223
2224         if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2225                                           filter)) {
2226                 sys_exit = trace->syscalls.events.sys_exit;
2227                 err = perf_evsel__append_tp_filter(sys_exit, filter);
2228         }
2229
2230         free(filter);
2231 out:
2232         return err;
2233 out_enomem:
2234         errno = ENOMEM;
2235         goto out;
2236 }
2237
2238 static int trace__set_filter_loop_pids(struct trace *trace)
2239 {
2240         unsigned int nr = 1;
2241         pid_t pids[32] = {
2242                 getpid(),
2243         };
2244         struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2245
2246         while (thread && nr < ARRAY_SIZE(pids)) {
2247                 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2248
2249                 if (parent == NULL)
2250                         break;
2251
2252                 if (!strcmp(thread__comm_str(parent), "sshd")) {
2253                         pids[nr++] = parent->tid;
2254                         break;
2255                 }
2256                 thread = parent;
2257         }
2258
2259         return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2260 }
2261
2262 static int trace__run(struct trace *trace, int argc, const char **argv)
2263 {
2264         struct perf_evlist *evlist = trace->evlist;
2265         struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2266         int err = -1, i;
2267         unsigned long before;
2268         const bool forks = argc > 0;
2269         bool draining = false;
2270
2271         trace->live = true;
2272
2273         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2274                 goto out_error_raw_syscalls;
2275
2276         if (trace->trace_syscalls)
2277                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2278
2279         if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2280                 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2281                 if (pgfault_maj == NULL)
2282                         goto out_error_mem;
2283                 perf_evlist__add(evlist, pgfault_maj);
2284         }
2285
2286         if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2287                 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2288                 if (pgfault_min == NULL)
2289                         goto out_error_mem;
2290                 perf_evlist__add(evlist, pgfault_min);
2291         }
2292
2293         if (trace->sched &&
2294             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2295                                    trace__sched_stat_runtime))
2296                 goto out_error_sched_stat_runtime;
2297
2298         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2299         if (err < 0) {
2300                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2301                 goto out_delete_evlist;
2302         }
2303
2304         err = trace__symbols_init(trace, evlist);
2305         if (err < 0) {
2306                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2307                 goto out_delete_evlist;
2308         }
2309
2310         perf_evlist__config(evlist, &trace->opts, NULL);
2311
2312         if (callchain_param.enabled) {
2313                 bool use_identifier = false;
2314
2315                 if (trace->syscalls.events.sys_exit) {
2316                         perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2317                                                      &trace->opts, &callchain_param);
2318                         use_identifier = true;
2319                 }
2320
2321                 if (pgfault_maj) {
2322                         perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2323                         use_identifier = true;
2324                 }
2325
2326                 if (pgfault_min) {
2327                         perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2328                         use_identifier = true;
2329                 }
2330
2331                 if (use_identifier) {
2332                        /*
2333                         * Now we have evsels with different sample_ids, use
2334                         * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2335                         * from a fixed position in each ring buffer record.
2336                         *
2337                         * As of this the changeset introducing this comment, this
2338                         * isn't strictly needed, as the fields that can come before
2339                         * PERF_SAMPLE_ID are all used, but we'll probably disable
2340                         * some of those for things like copying the payload of
2341                         * pointer syscall arguments, and for vfs_getname we don't
2342                         * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2343                         * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2344                         */
2345                         perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2346                         perf_evlist__reset_sample_bit(evlist, ID);
2347                 }
2348         }
2349
2350         signal(SIGCHLD, sig_handler);
2351         signal(SIGINT, sig_handler);
2352
2353         if (forks) {
2354                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2355                                                     argv, false, NULL);
2356                 if (err < 0) {
2357                         fprintf(trace->output, "Couldn't run the workload!\n");
2358                         goto out_delete_evlist;
2359                 }
2360         }
2361
2362         err = perf_evlist__open(evlist);
2363         if (err < 0)
2364                 goto out_error_open;
2365
2366         err = bpf__apply_obj_config();
2367         if (err) {
2368                 char errbuf[BUFSIZ];
2369
2370                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2371                 pr_err("ERROR: Apply config to BPF failed: %s\n",
2372                          errbuf);
2373                 goto out_error_open;
2374         }
2375
2376         /*
2377          * Better not use !target__has_task() here because we need to cover the
2378          * case where no threads were specified in the command line, but a
2379          * workload was, and in that case we will fill in the thread_map when
2380          * we fork the workload in perf_evlist__prepare_workload.
2381          */
2382         if (trace->filter_pids.nr > 0)
2383                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2384         else if (thread_map__pid(evlist->threads, 0) == -1)
2385                 err = trace__set_filter_loop_pids(trace);
2386
2387         if (err < 0)
2388                 goto out_error_mem;
2389
2390         if (trace->ev_qualifier_ids.nr > 0) {
2391                 err = trace__set_ev_qualifier_filter(trace);
2392                 if (err < 0)
2393                         goto out_errno;
2394
2395                 pr_debug("event qualifier tracepoint filter: %s\n",
2396                          trace->syscalls.events.sys_exit->filter);
2397         }
2398
2399         err = perf_evlist__apply_filters(evlist, &evsel);
2400         if (err < 0)
2401                 goto out_error_apply_filters;
2402
2403         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2404         if (err < 0)
2405                 goto out_error_mmap;
2406
2407         if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2408                 perf_evlist__enable(evlist);
2409
2410         if (forks)
2411                 perf_evlist__start_workload(evlist);
2412
2413         if (trace->opts.initial_delay) {
2414                 usleep(trace->opts.initial_delay * 1000);
2415                 perf_evlist__enable(evlist);
2416         }
2417
2418         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2419                                   evlist->threads->nr > 1 ||
2420                                   perf_evlist__first(evlist)->attr.inherit;
2421 again:
2422         before = trace->nr_events;
2423
2424         for (i = 0; i < evlist->nr_mmaps; i++) {
2425                 union perf_event *event;
2426
2427                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2428                         struct perf_sample sample;
2429
2430                         ++trace->nr_events;
2431
2432                         err = perf_evlist__parse_sample(evlist, event, &sample);
2433                         if (err) {
2434                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2435                                 goto next_event;
2436                         }
2437
2438                         trace__handle_event(trace, event, &sample);
2439 next_event:
2440                         perf_evlist__mmap_consume(evlist, i);
2441
2442                         if (interrupted)
2443                                 goto out_disable;
2444
2445                         if (done && !draining) {
2446                                 perf_evlist__disable(evlist);
2447                                 draining = true;
2448                         }
2449                 }
2450         }
2451
2452         if (trace->nr_events == before) {
2453                 int timeout = done ? 100 : -1;
2454
2455                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2456                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2457                                 draining = true;
2458
2459                         goto again;
2460                 }
2461         } else {
2462                 goto again;
2463         }
2464
2465 out_disable:
2466         thread__zput(trace->current);
2467
2468         perf_evlist__disable(evlist);
2469
2470         if (!err) {
2471                 if (trace->summary)
2472                         trace__fprintf_thread_summary(trace, trace->output);
2473
2474                 if (trace->show_tool_stats) {
2475                         fprintf(trace->output, "Stats:\n "
2476                                                " vfs_getname : %" PRIu64 "\n"
2477                                                " proc_getname: %" PRIu64 "\n",
2478                                 trace->stats.vfs_getname,
2479                                 trace->stats.proc_getname);
2480                 }
2481         }
2482
2483 out_delete_evlist:
2484         perf_evlist__delete(evlist);
2485         trace->evlist = NULL;
2486         trace->live = false;
2487         return err;
2488 {
2489         char errbuf[BUFSIZ];
2490
2491 out_error_sched_stat_runtime:
2492         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2493         goto out_error;
2494
2495 out_error_raw_syscalls:
2496         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2497         goto out_error;
2498
2499 out_error_mmap:
2500         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2501         goto out_error;
2502
2503 out_error_open:
2504         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2505
2506 out_error:
2507         fprintf(trace->output, "%s\n", errbuf);
2508         goto out_delete_evlist;
2509
2510 out_error_apply_filters:
2511         fprintf(trace->output,
2512                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2513                 evsel->filter, perf_evsel__name(evsel), errno,
2514                 str_error_r(errno, errbuf, sizeof(errbuf)));
2515         goto out_delete_evlist;
2516 }
2517 out_error_mem:
2518         fprintf(trace->output, "Not enough memory to run!\n");
2519         goto out_delete_evlist;
2520
2521 out_errno:
2522         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2523         goto out_delete_evlist;
2524 }
2525
2526 static int trace__replay(struct trace *trace)
2527 {
2528         const struct perf_evsel_str_handler handlers[] = {
2529                 { "probe:vfs_getname",       trace__vfs_getname, },
2530         };
2531         struct perf_data_file file = {
2532                 .path  = input_name,
2533                 .mode  = PERF_DATA_MODE_READ,
2534                 .force = trace->force,
2535         };
2536         struct perf_session *session;
2537         struct perf_evsel *evsel;
2538         int err = -1;
2539
2540         trace->tool.sample        = trace__process_sample;
2541         trace->tool.mmap          = perf_event__process_mmap;
2542         trace->tool.mmap2         = perf_event__process_mmap2;
2543         trace->tool.comm          = perf_event__process_comm;
2544         trace->tool.exit          = perf_event__process_exit;
2545         trace->tool.fork          = perf_event__process_fork;
2546         trace->tool.attr          = perf_event__process_attr;
2547         trace->tool.tracing_data  = perf_event__process_tracing_data;
2548         trace->tool.build_id      = perf_event__process_build_id;
2549         trace->tool.namespaces    = perf_event__process_namespaces;
2550
2551         trace->tool.ordered_events = true;
2552         trace->tool.ordering_requires_timestamps = true;
2553
2554         /* add tid to output */
2555         trace->multiple_threads = true;
2556
2557         session = perf_session__new(&file, false, &trace->tool);
2558         if (session == NULL)
2559                 return -1;
2560
2561         if (trace->opts.target.pid)
2562                 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2563
2564         if (trace->opts.target.tid)
2565                 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2566
2567         if (symbol__init(&session->header.env) < 0)
2568                 goto out;
2569
2570         trace->host = &session->machines.host;
2571
2572         err = perf_session__set_tracepoints_handlers(session, handlers);
2573         if (err)
2574                 goto out;
2575
2576         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2577                                                      "raw_syscalls:sys_enter");
2578         /* older kernels have syscalls tp versus raw_syscalls */
2579         if (evsel == NULL)
2580                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2581                                                              "syscalls:sys_enter");
2582
2583         if (evsel &&
2584             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2585             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2586                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2587                 goto out;
2588         }
2589
2590         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2591                                                      "raw_syscalls:sys_exit");
2592         if (evsel == NULL)
2593                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2594                                                              "syscalls:sys_exit");
2595         if (evsel &&
2596             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2597             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2598                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2599                 goto out;
2600         }
2601
2602         evlist__for_each_entry(session->evlist, evsel) {
2603                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2604                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2605                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2606                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2607                         evsel->handler = trace__pgfault;
2608         }
2609
2610         setup_pager();
2611
2612         err = perf_session__process_events(session);
2613         if (err)
2614                 pr_err("Failed to process events, error %d", err);
2615
2616         else if (trace->summary)
2617                 trace__fprintf_thread_summary(trace, trace->output);
2618
2619 out:
2620         perf_session__delete(session);
2621
2622         return err;
2623 }
2624
2625 static size_t trace__fprintf_threads_header(FILE *fp)
2626 {
2627         size_t printed;
2628
2629         printed  = fprintf(fp, "\n Summary of events:\n\n");
2630
2631         return printed;
2632 }
2633
2634 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2635         struct stats    *stats;
2636         double          msecs;
2637         int             syscall;
2638 )
2639 {
2640         struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2641         struct stats *stats = source->priv;
2642
2643         entry->syscall = source->i;
2644         entry->stats   = stats;
2645         entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2646 }
2647
2648 static size_t thread__dump_stats(struct thread_trace *ttrace,
2649                                  struct trace *trace, FILE *fp)
2650 {
2651         size_t printed = 0;
2652         struct syscall *sc;
2653         struct rb_node *nd;
2654         DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2655
2656         if (syscall_stats == NULL)
2657                 return 0;
2658
2659         printed += fprintf(fp, "\n");
2660
2661         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2662         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2663         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2664
2665         resort_rb__for_each_entry(nd, syscall_stats) {
2666                 struct stats *stats = syscall_stats_entry->stats;
2667                 if (stats) {
2668                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2669                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2670                         double avg = avg_stats(stats);
2671                         double pct;
2672                         u64 n = (u64) stats->n;
2673
2674                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2675                         avg /= NSEC_PER_MSEC;
2676
2677                         sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2678                         printed += fprintf(fp, "   %-15s", sc->name);
2679                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2680                                            n, syscall_stats_entry->msecs, min, avg);
2681                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2682                 }
2683         }
2684
2685         resort_rb__delete(syscall_stats);
2686         printed += fprintf(fp, "\n\n");
2687
2688         return printed;
2689 }
2690
2691 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2692 {
2693         size_t printed = 0;
2694         struct thread_trace *ttrace = thread__priv(thread);
2695         double ratio;
2696
2697         if (ttrace == NULL)
2698                 return 0;
2699
2700         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2701
2702         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2703         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2704         printed += fprintf(fp, "%.1f%%", ratio);
2705         if (ttrace->pfmaj)
2706                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2707         if (ttrace->pfmin)
2708                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2709         if (trace->sched)
2710                 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2711         else if (fputc('\n', fp) != EOF)
2712                 ++printed;
2713
2714         printed += thread__dump_stats(ttrace, trace, fp);
2715
2716         return printed;
2717 }
2718
2719 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2720 {
2721         return ttrace ? ttrace->nr_events : 0;
2722 }
2723
2724 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2725         struct thread *thread;
2726 )
2727 {
2728         entry->thread = rb_entry(nd, struct thread, rb_node);
2729 }
2730
2731 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2732 {
2733         DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2734         size_t printed = trace__fprintf_threads_header(fp);
2735         struct rb_node *nd;
2736
2737         if (threads == NULL) {
2738                 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2739                 return 0;
2740         }
2741
2742         resort_rb__for_each_entry(nd, threads)
2743                 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2744
2745         resort_rb__delete(threads);
2746
2747         return printed;
2748 }
2749
2750 static int trace__set_duration(const struct option *opt, const char *str,
2751                                int unset __maybe_unused)
2752 {
2753         struct trace *trace = opt->value;
2754
2755         trace->duration_filter = atof(str);
2756         return 0;
2757 }
2758
2759 static int trace__set_filter_pids(const struct option *opt, const char *str,
2760                                   int unset __maybe_unused)
2761 {
2762         int ret = -1;
2763         size_t i;
2764         struct trace *trace = opt->value;
2765         /*
2766          * FIXME: introduce a intarray class, plain parse csv and create a
2767          * { int nr, int entries[] } struct...
2768          */
2769         struct intlist *list = intlist__new(str);
2770
2771         if (list == NULL)
2772                 return -1;
2773
2774         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2775         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2776
2777         if (trace->filter_pids.entries == NULL)
2778                 goto out;
2779
2780         trace->filter_pids.entries[0] = getpid();
2781
2782         for (i = 1; i < trace->filter_pids.nr; ++i)
2783                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2784
2785         intlist__delete(list);
2786         ret = 0;
2787 out:
2788         return ret;
2789 }
2790
2791 static int trace__open_output(struct trace *trace, const char *filename)
2792 {
2793         struct stat st;
2794
2795         if (!stat(filename, &st) && st.st_size) {
2796                 char oldname[PATH_MAX];
2797
2798                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2799                 unlink(oldname);
2800                 rename(filename, oldname);
2801         }
2802
2803         trace->output = fopen(filename, "w");
2804
2805         return trace->output == NULL ? -errno : 0;
2806 }
2807
2808 static int parse_pagefaults(const struct option *opt, const char *str,
2809                             int unset __maybe_unused)
2810 {
2811         int *trace_pgfaults = opt->value;
2812
2813         if (strcmp(str, "all") == 0)
2814                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2815         else if (strcmp(str, "maj") == 0)
2816                 *trace_pgfaults |= TRACE_PFMAJ;
2817         else if (strcmp(str, "min") == 0)
2818                 *trace_pgfaults |= TRACE_PFMIN;
2819         else
2820                 return -1;
2821
2822         return 0;
2823 }
2824
2825 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2826 {
2827         struct perf_evsel *evsel;
2828
2829         evlist__for_each_entry(evlist, evsel)
2830                 evsel->handler = handler;
2831 }
2832
2833 /*
2834  * XXX: Hackish, just splitting the combined -e+--event (syscalls
2835  * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2836  * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2837  *
2838  * It'd be better to introduce a parse_options() variant that would return a
2839  * list with the terms it didn't match to an event...
2840  */
2841 static int trace__parse_events_option(const struct option *opt, const char *str,
2842                                       int unset __maybe_unused)
2843 {
2844         struct trace *trace = (struct trace *)opt->value;
2845         const char *s = str;
2846         char *sep = NULL, *lists[2] = { NULL, NULL, };
2847         int len = strlen(str) + 1, err = -1, list, idx;
2848         char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2849         char group_name[PATH_MAX];
2850
2851         if (strace_groups_dir == NULL)
2852                 return -1;
2853
2854         if (*s == '!') {
2855                 ++s;
2856                 trace->not_ev_qualifier = true;
2857         }
2858
2859         while (1) {
2860                 if ((sep = strchr(s, ',')) != NULL)
2861                         *sep = '\0';
2862
2863                 list = 0;
2864                 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2865                     syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
2866                         list = 1;
2867                 } else {
2868                         path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2869                         if (access(group_name, R_OK) == 0)
2870                                 list = 1;
2871                 }
2872
2873                 if (lists[list]) {
2874                         sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2875                 } else {
2876                         lists[list] = malloc(len);
2877                         if (lists[list] == NULL)
2878                                 goto out;
2879                         strcpy(lists[list], s);
2880                 }
2881
2882                 if (!sep)
2883                         break;
2884
2885                 *sep = ',';
2886                 s = sep + 1;
2887         }
2888
2889         if (lists[1] != NULL) {
2890                 struct strlist_config slist_config = {
2891                         .dirname = strace_groups_dir,
2892                 };
2893
2894                 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2895                 if (trace->ev_qualifier == NULL) {
2896                         fputs("Not enough memory to parse event qualifier", trace->output);
2897                         goto out;
2898                 }
2899
2900                 if (trace__validate_ev_qualifier(trace))
2901                         goto out;
2902         }
2903
2904         err = 0;
2905
2906         if (lists[0]) {
2907                 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2908                                                "event selector. use 'perf list' to list available events",
2909                                                parse_events_option);
2910                 err = parse_events_option(&o, lists[0], 0);
2911         }
2912 out:
2913         if (sep)
2914                 *sep = ',';
2915
2916         return err;
2917 }
2918
2919 int cmd_trace(int argc, const char **argv)
2920 {
2921         const char *trace_usage[] = {
2922                 "perf trace [<options>] [<command>]",
2923                 "perf trace [<options>] -- <command> [<options>]",
2924                 "perf trace record [<options>] [<command>]",
2925                 "perf trace record [<options>] -- <command> [<options>]",
2926                 NULL
2927         };
2928         struct trace trace = {
2929                 .syscalls = {
2930                         . max = -1,
2931                 },
2932                 .opts = {
2933                         .target = {
2934                                 .uid       = UINT_MAX,
2935                                 .uses_mmap = true,
2936                         },
2937                         .user_freq     = UINT_MAX,
2938                         .user_interval = ULLONG_MAX,
2939                         .no_buffering  = true,
2940                         .mmap_pages    = UINT_MAX,
2941                         .proc_map_timeout  = 500,
2942                 },
2943                 .output = stderr,
2944                 .show_comm = true,
2945                 .trace_syscalls = true,
2946                 .kernel_syscallchains = false,
2947                 .max_stack = UINT_MAX,
2948         };
2949         const char *output_name = NULL;
2950         const struct option trace_options[] = {
2951         OPT_CALLBACK('e', "event", &trace, "event",
2952                      "event/syscall selector. use 'perf list' to list available events",
2953                      trace__parse_events_option),
2954         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2955                     "show the thread COMM next to its id"),
2956         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2957         OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2958                      trace__parse_events_option),
2959         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2960         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2961         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2962                     "trace events on existing process id"),
2963         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2964                     "trace events on existing thread id"),
2965         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2966                      "pids to filter (by the kernel)", trace__set_filter_pids),
2967         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2968                     "system-wide collection from all CPUs"),
2969         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2970                     "list of cpus to monitor"),
2971         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2972                     "child tasks do not inherit counters"),
2973         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2974                      "number of mmap data pages",
2975                      perf_evlist__parse_mmap_pages),
2976         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2977                    "user to profile"),
2978         OPT_CALLBACK(0, "duration", &trace, "float",
2979                      "show only events with duration > N.M ms",
2980                      trace__set_duration),
2981         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2982         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2983         OPT_BOOLEAN('T', "time", &trace.full_time,
2984                     "Show full timestamp, not time relative to first start"),
2985         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2986                     "Show only syscall summary with statistics"),
2987         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2988                     "Show all syscalls and summary with statistics"),
2989         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2990                      "Trace pagefaults", parse_pagefaults, "maj"),
2991         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2992         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2993         OPT_CALLBACK(0, "call-graph", &trace.opts,
2994                      "record_mode[,record_size]", record_callchain_help,
2995                      &record_parse_callchain_opt),
2996         OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2997                     "Show the kernel callchains on the syscall exit path"),
2998         OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2999                      "Set the minimum stack depth when parsing the callchain, "
3000                      "anything below the specified depth will be ignored."),
3001         OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3002                      "Set the maximum stack depth when parsing the callchain, "
3003                      "anything beyond the specified depth will be ignored. "
3004                      "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
3005         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3006                         "per thread proc mmap processing timeout in ms"),
3007         OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3008                      "ms to wait before starting measurement after program "
3009                      "start"),
3010         OPT_END()
3011         };
3012         bool __maybe_unused max_stack_user_set = true;
3013         bool mmap_pages_user_set = true;
3014         const char * const trace_subcommands[] = { "record", NULL };
3015         int err;
3016         char bf[BUFSIZ];
3017
3018         signal(SIGSEGV, sighandler_dump_stack);
3019         signal(SIGFPE, sighandler_dump_stack);
3020
3021         trace.evlist = perf_evlist__new();
3022         trace.sctbl = syscalltbl__new();
3023
3024         if (trace.evlist == NULL || trace.sctbl == NULL) {
3025                 pr_err("Not enough memory to run!\n");
3026                 err = -ENOMEM;
3027                 goto out;
3028         }
3029
3030         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3031                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3032
3033         err = bpf__setup_stdout(trace.evlist);
3034         if (err) {
3035                 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3036                 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3037                 goto out;
3038         }
3039
3040         err = -1;
3041
3042         if (trace.trace_pgfaults) {
3043                 trace.opts.sample_address = true;
3044                 trace.opts.sample_time = true;
3045         }
3046
3047         if (trace.opts.mmap_pages == UINT_MAX)
3048                 mmap_pages_user_set = false;
3049
3050         if (trace.max_stack == UINT_MAX) {
3051                 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
3052                 max_stack_user_set = false;
3053         }
3054
3055 #ifdef HAVE_DWARF_UNWIND_SUPPORT
3056         if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
3057                 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3058 #endif
3059
3060         if (callchain_param.enabled) {
3061                 if (!mmap_pages_user_set && geteuid() == 0)
3062                         trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3063
3064                 symbol_conf.use_callchain = true;
3065         }
3066
3067         if (trace.evlist->nr_entries > 0)
3068                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3069
3070         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3071                 return trace__record(&trace, argc-1, &argv[1]);
3072
3073         /* summary_only implies summary option, but don't overwrite summary if set */
3074         if (trace.summary_only)
3075                 trace.summary = trace.summary_only;
3076
3077         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3078             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3079                 pr_err("Please specify something to trace.\n");
3080                 return -1;
3081         }
3082
3083         if (!trace.trace_syscalls && trace.ev_qualifier) {
3084                 pr_err("The -e option can't be used with --no-syscalls.\n");
3085                 goto out;
3086         }
3087
3088         if (output_name != NULL) {
3089                 err = trace__open_output(&trace, output_name);
3090                 if (err < 0) {
3091                         perror("failed to create output file");
3092                         goto out;
3093                 }
3094         }
3095
3096         trace.open_id = syscalltbl__id(trace.sctbl, "open");
3097
3098         err = target__validate(&trace.opts.target);
3099         if (err) {
3100                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3101                 fprintf(trace.output, "%s", bf);
3102                 goto out_close;
3103         }
3104
3105         err = target__parse_uid(&trace.opts.target);
3106         if (err) {
3107                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3108                 fprintf(trace.output, "%s", bf);
3109                 goto out_close;
3110         }
3111
3112         if (!argc && target__none(&trace.opts.target))
3113                 trace.opts.target.system_wide = true;
3114
3115         if (input_name)
3116                 err = trace__replay(&trace);
3117         else
3118                 err = trace__run(&trace, argc, argv);
3119
3120 out_close:
3121         if (output_name != NULL)
3122                 fclose(trace.output);
3123 out:
3124         return err;
3125 }