tracing: Fix kernel-doc
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0 || !trace_parser_loaded(&parser))
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 ret = -EINVAL;
732                 if (kstrtoul(parser.buffer, 0, &val))
733                         break;
734
735                 pid = (pid_t)val;
736
737                 if (trace_pid_list_set(pid_list, pid) < 0) {
738                         ret = -1;
739                         break;
740                 }
741                 nr_pids++;
742
743                 trace_parser_clear(&parser);
744                 ret = 0;
745         }
746         trace_parser_put(&parser);
747
748         if (ret < 0) {
749                 trace_pid_list_free(pid_list);
750                 return ret;
751         }
752
753         if (!nr_pids) {
754                 /* Cleared the list of pids */
755                 trace_pid_list_free(pid_list);
756                 read = ret;
757                 pid_list = NULL;
758         }
759
760         *new_pid_list = pid_list;
761
762         return read;
763 }
764
765 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
766 {
767         u64 ts;
768
769         /* Early boot up does not have a buffer yet */
770         if (!buf->buffer)
771                 return trace_clock_local();
772
773         ts = ring_buffer_time_stamp(buf->buffer);
774         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
775
776         return ts;
777 }
778
779 u64 ftrace_now(int cpu)
780 {
781         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
782 }
783
784 /**
785  * tracing_is_enabled - Show if global_trace has been enabled
786  *
787  * Shows if the global trace has been enabled or not. It uses the
788  * mirror flag "buffer_disabled" to be used in fast paths such as for
789  * the irqsoff tracer. But it may be inaccurate due to races. If you
790  * need to know the accurate state, use tracing_is_on() which is a little
791  * slower, but accurate.
792  */
793 int tracing_is_enabled(void)
794 {
795         /*
796          * For quick access (irqsoff uses this in fast path), just
797          * return the mirror variable of the state of the ring buffer.
798          * It's a little racy, but we don't really care.
799          */
800         smp_rmb();
801         return !global_trace.buffer_disabled;
802 }
803
804 /*
805  * trace_buf_size is the size in bytes that is allocated
806  * for a buffer. Note, the number of bytes is always rounded
807  * to page size.
808  *
809  * This number is purposely set to a low number of 16384.
810  * If the dump on oops happens, it will be much appreciated
811  * to not have to wait for all that output. Anyway this can be
812  * boot time and run time configurable.
813  */
814 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
815
816 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
817
818 /* trace_types holds a link list of available tracers. */
819 static struct tracer            *trace_types __read_mostly;
820
821 /*
822  * trace_types_lock is used to protect the trace_types list.
823  */
824 DEFINE_MUTEX(trace_types_lock);
825
826 /*
827  * serialize the access of the ring buffer
828  *
829  * ring buffer serializes readers, but it is low level protection.
830  * The validity of the events (which returns by ring_buffer_peek() ..etc)
831  * are not protected by ring buffer.
832  *
833  * The content of events may become garbage if we allow other process consumes
834  * these events concurrently:
835  *   A) the page of the consumed events may become a normal page
836  *      (not reader page) in ring buffer, and this page will be rewritten
837  *      by events producer.
838  *   B) The page of the consumed events may become a page for splice_read,
839  *      and this page will be returned to system.
840  *
841  * These primitives allow multi process access to different cpu ring buffer
842  * concurrently.
843  *
844  * These primitives don't distinguish read-only and read-consume access.
845  * Multi read-only access are also serialized.
846  */
847
848 #ifdef CONFIG_SMP
849 static DECLARE_RWSEM(all_cpu_access_lock);
850 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
851
852 static inline void trace_access_lock(int cpu)
853 {
854         if (cpu == RING_BUFFER_ALL_CPUS) {
855                 /* gain it for accessing the whole ring buffer. */
856                 down_write(&all_cpu_access_lock);
857         } else {
858                 /* gain it for accessing a cpu ring buffer. */
859
860                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
861                 down_read(&all_cpu_access_lock);
862
863                 /* Secondly block other access to this @cpu ring buffer. */
864                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
865         }
866 }
867
868 static inline void trace_access_unlock(int cpu)
869 {
870         if (cpu == RING_BUFFER_ALL_CPUS) {
871                 up_write(&all_cpu_access_lock);
872         } else {
873                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
874                 up_read(&all_cpu_access_lock);
875         }
876 }
877
878 static inline void trace_access_lock_init(void)
879 {
880         int cpu;
881
882         for_each_possible_cpu(cpu)
883                 mutex_init(&per_cpu(cpu_access_lock, cpu));
884 }
885
886 #else
887
888 static DEFINE_MUTEX(access_lock);
889
890 static inline void trace_access_lock(int cpu)
891 {
892         (void)cpu;
893         mutex_lock(&access_lock);
894 }
895
896 static inline void trace_access_unlock(int cpu)
897 {
898         (void)cpu;
899         mutex_unlock(&access_lock);
900 }
901
902 static inline void trace_access_lock_init(void)
903 {
904 }
905
906 #endif
907
908 #ifdef CONFIG_STACKTRACE
909 static void __ftrace_trace_stack(struct trace_buffer *buffer,
910                                  unsigned int trace_ctx,
911                                  int skip, struct pt_regs *regs);
912 static inline void ftrace_trace_stack(struct trace_array *tr,
913                                       struct trace_buffer *buffer,
914                                       unsigned int trace_ctx,
915                                       int skip, struct pt_regs *regs);
916
917 #else
918 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                         unsigned int trace_ctx,
920                                         int skip, struct pt_regs *regs)
921 {
922 }
923 static inline void ftrace_trace_stack(struct trace_array *tr,
924                                       struct trace_buffer *buffer,
925                                       unsigned long trace_ctx,
926                                       int skip, struct pt_regs *regs)
927 {
928 }
929
930 #endif
931
932 static __always_inline void
933 trace_event_setup(struct ring_buffer_event *event,
934                   int type, unsigned int trace_ctx)
935 {
936         struct trace_entry *ent = ring_buffer_event_data(event);
937
938         tracing_generic_entry_update(ent, type, trace_ctx);
939 }
940
941 static __always_inline struct ring_buffer_event *
942 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
943                           int type,
944                           unsigned long len,
945                           unsigned int trace_ctx)
946 {
947         struct ring_buffer_event *event;
948
949         event = ring_buffer_lock_reserve(buffer, len);
950         if (event != NULL)
951                 trace_event_setup(event, type, trace_ctx);
952
953         return event;
954 }
955
956 void tracer_tracing_on(struct trace_array *tr)
957 {
958         if (tr->array_buffer.buffer)
959                 ring_buffer_record_on(tr->array_buffer.buffer);
960         /*
961          * This flag is looked at when buffers haven't been allocated
962          * yet, or by some tracers (like irqsoff), that just want to
963          * know if the ring buffer has been disabled, but it can handle
964          * races of where it gets disabled but we still do a record.
965          * As the check is in the fast path of the tracers, it is more
966          * important to be fast than accurate.
967          */
968         tr->buffer_disabled = 0;
969         /* Make the flag seen by readers */
970         smp_wmb();
971 }
972
973 /**
974  * tracing_on - enable tracing buffers
975  *
976  * This function enables tracing buffers that may have been
977  * disabled with tracing_off.
978  */
979 void tracing_on(void)
980 {
981         tracer_tracing_on(&global_trace);
982 }
983 EXPORT_SYMBOL_GPL(tracing_on);
984
985
986 static __always_inline void
987 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
988 {
989         __this_cpu_write(trace_taskinfo_save, true);
990
991         /* If this is the temp buffer, we need to commit fully */
992         if (this_cpu_read(trace_buffered_event) == event) {
993                 /* Length is in event->array[0] */
994                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
995                 /* Release the temp buffer */
996                 this_cpu_dec(trace_buffered_event_cnt);
997                 /* ring_buffer_unlock_commit() enables preemption */
998                 preempt_enable_notrace();
999         } else
1000                 ring_buffer_unlock_commit(buffer, event);
1001 }
1002
1003 /**
1004  * __trace_puts - write a constant string into the trace buffer.
1005  * @ip:    The address of the caller
1006  * @str:   The constant string to write
1007  * @size:  The size of the string.
1008  */
1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011         struct ring_buffer_event *event;
1012         struct trace_buffer *buffer;
1013         struct print_entry *entry;
1014         unsigned int trace_ctx;
1015         int alloc;
1016
1017         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1018                 return 0;
1019
1020         if (unlikely(tracing_selftest_running || tracing_disabled))
1021                 return 0;
1022
1023         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024
1025         trace_ctx = tracing_gen_ctx();
1026         buffer = global_trace.array_buffer.buffer;
1027         ring_buffer_nest_start(buffer);
1028         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1029                                             trace_ctx);
1030         if (!event) {
1031                 size = 0;
1032                 goto out;
1033         }
1034
1035         entry = ring_buffer_event_data(event);
1036         entry->ip = ip;
1037
1038         memcpy(&entry->buf, str, size);
1039
1040         /* Add a newline if necessary */
1041         if (entry->buf[size - 1] != '\n') {
1042                 entry->buf[size] = '\n';
1043                 entry->buf[size + 1] = '\0';
1044         } else
1045                 entry->buf[size] = '\0';
1046
1047         __buffer_unlock_commit(buffer, event);
1048         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049  out:
1050         ring_buffer_nest_end(buffer);
1051         return size;
1052 }
1053 EXPORT_SYMBOL_GPL(__trace_puts);
1054
1055 /**
1056  * __trace_bputs - write the pointer to a constant string into trace buffer
1057  * @ip:    The address of the caller
1058  * @str:   The constant string to write to the buffer to
1059  */
1060 int __trace_bputs(unsigned long ip, const char *str)
1061 {
1062         struct ring_buffer_event *event;
1063         struct trace_buffer *buffer;
1064         struct bputs_entry *entry;
1065         unsigned int trace_ctx;
1066         int size = sizeof(struct bputs_entry);
1067         int ret = 0;
1068
1069         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070                 return 0;
1071
1072         if (unlikely(tracing_selftest_running || tracing_disabled))
1073                 return 0;
1074
1075         trace_ctx = tracing_gen_ctx();
1076         buffer = global_trace.array_buffer.buffer;
1077
1078         ring_buffer_nest_start(buffer);
1079         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1080                                             trace_ctx);
1081         if (!event)
1082                 goto out;
1083
1084         entry = ring_buffer_event_data(event);
1085         entry->ip                       = ip;
1086         entry->str                      = str;
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1090
1091         ret = 1;
1092  out:
1093         ring_buffer_nest_end(buffer);
1094         return ret;
1095 }
1096 EXPORT_SYMBOL_GPL(__trace_bputs);
1097
1098 #ifdef CONFIG_TRACER_SNAPSHOT
1099 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1100                                            void *cond_data)
1101 {
1102         struct tracer *tracer = tr->current_trace;
1103         unsigned long flags;
1104
1105         if (in_nmi()) {
1106                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1107                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1108                 return;
1109         }
1110
1111         if (!tr->allocated_snapshot) {
1112                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1113                 internal_trace_puts("*** stopping trace here!   ***\n");
1114                 tracing_off();
1115                 return;
1116         }
1117
1118         /* Note, snapshot can not be used when the tracer uses it */
1119         if (tracer->use_max_tr) {
1120                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1121                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1122                 return;
1123         }
1124
1125         local_irq_save(flags);
1126         update_max_tr(tr, current, smp_processor_id(), cond_data);
1127         local_irq_restore(flags);
1128 }
1129
1130 void tracing_snapshot_instance(struct trace_array *tr)
1131 {
1132         tracing_snapshot_instance_cond(tr, NULL);
1133 }
1134
1135 /**
1136  * tracing_snapshot - take a snapshot of the current buffer.
1137  *
1138  * This causes a swap between the snapshot buffer and the current live
1139  * tracing buffer. You can use this to take snapshots of the live
1140  * trace when some condition is triggered, but continue to trace.
1141  *
1142  * Note, make sure to allocate the snapshot with either
1143  * a tracing_snapshot_alloc(), or by doing it manually
1144  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145  *
1146  * If the snapshot buffer is not allocated, it will stop tracing.
1147  * Basically making a permanent snapshot.
1148  */
1149 void tracing_snapshot(void)
1150 {
1151         struct trace_array *tr = &global_trace;
1152
1153         tracing_snapshot_instance(tr);
1154 }
1155 EXPORT_SYMBOL_GPL(tracing_snapshot);
1156
1157 /**
1158  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1159  * @tr:         The tracing instance to snapshot
1160  * @cond_data:  The data to be tested conditionally, and possibly saved
1161  *
1162  * This is the same as tracing_snapshot() except that the snapshot is
1163  * conditional - the snapshot will only happen if the
1164  * cond_snapshot.update() implementation receiving the cond_data
1165  * returns true, which means that the trace array's cond_snapshot
1166  * update() operation used the cond_data to determine whether the
1167  * snapshot should be taken, and if it was, presumably saved it along
1168  * with the snapshot.
1169  */
1170 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 {
1172         tracing_snapshot_instance_cond(tr, cond_data);
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1175
1176 /**
1177  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1178  * @tr:         The tracing instance
1179  *
1180  * When the user enables a conditional snapshot using
1181  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1182  * with the snapshot.  This accessor is used to retrieve it.
1183  *
1184  * Should not be called from cond_snapshot.update(), since it takes
1185  * the tr->max_lock lock, which the code calling
1186  * cond_snapshot.update() has already done.
1187  *
1188  * Returns the cond_data associated with the trace array's snapshot.
1189  */
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192         void *cond_data = NULL;
1193
1194         arch_spin_lock(&tr->max_lock);
1195
1196         if (tr->cond_snapshot)
1197                 cond_data = tr->cond_snapshot->cond_data;
1198
1199         arch_spin_unlock(&tr->max_lock);
1200
1201         return cond_data;
1202 }
1203 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204
1205 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1206                                         struct array_buffer *size_buf, int cpu_id);
1207 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208
1209 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1210 {
1211         int ret;
1212
1213         if (!tr->allocated_snapshot) {
1214
1215                 /* allocate spare buffer */
1216                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1217                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1218                 if (ret < 0)
1219                         return ret;
1220
1221                 tr->allocated_snapshot = true;
1222         }
1223
1224         return 0;
1225 }
1226
1227 static void free_snapshot(struct trace_array *tr)
1228 {
1229         /*
1230          * We don't free the ring buffer. instead, resize it because
1231          * The max_tr ring buffer has some state (e.g. ring->clock) and
1232          * we want preserve it.
1233          */
1234         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1235         set_buffer_entries(&tr->max_buffer, 1);
1236         tracing_reset_online_cpus(&tr->max_buffer);
1237         tr->allocated_snapshot = false;
1238 }
1239
1240 /**
1241  * tracing_alloc_snapshot - allocate snapshot buffer.
1242  *
1243  * This only allocates the snapshot buffer if it isn't already
1244  * allocated - it doesn't also take a snapshot.
1245  *
1246  * This is meant to be used in cases where the snapshot buffer needs
1247  * to be set up for events that can't sleep but need to be able to
1248  * trigger a snapshot.
1249  */
1250 int tracing_alloc_snapshot(void)
1251 {
1252         struct trace_array *tr = &global_trace;
1253         int ret;
1254
1255         ret = tracing_alloc_snapshot_instance(tr);
1256         WARN_ON(ret < 0);
1257
1258         return ret;
1259 }
1260 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1261
1262 /**
1263  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264  *
1265  * This is similar to tracing_snapshot(), but it will allocate the
1266  * snapshot buffer if it isn't already allocated. Use this only
1267  * where it is safe to sleep, as the allocation may sleep.
1268  *
1269  * This causes a swap between the snapshot buffer and the current live
1270  * tracing buffer. You can use this to take snapshots of the live
1271  * trace when some condition is triggered, but continue to trace.
1272  */
1273 void tracing_snapshot_alloc(void)
1274 {
1275         int ret;
1276
1277         ret = tracing_alloc_snapshot();
1278         if (ret < 0)
1279                 return;
1280
1281         tracing_snapshot();
1282 }
1283 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1284
1285 /**
1286  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1287  * @tr:         The tracing instance
1288  * @cond_data:  User data to associate with the snapshot
1289  * @update:     Implementation of the cond_snapshot update function
1290  *
1291  * Check whether the conditional snapshot for the given instance has
1292  * already been enabled, or if the current tracer is already using a
1293  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1294  * save the cond_data and update function inside.
1295  *
1296  * Returns 0 if successful, error otherwise.
1297  */
1298 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1299                                  cond_update_fn_t update)
1300 {
1301         struct cond_snapshot *cond_snapshot;
1302         int ret = 0;
1303
1304         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1305         if (!cond_snapshot)
1306                 return -ENOMEM;
1307
1308         cond_snapshot->cond_data = cond_data;
1309         cond_snapshot->update = update;
1310
1311         mutex_lock(&trace_types_lock);
1312
1313         ret = tracing_alloc_snapshot_instance(tr);
1314         if (ret)
1315                 goto fail_unlock;
1316
1317         if (tr->current_trace->use_max_tr) {
1318                 ret = -EBUSY;
1319                 goto fail_unlock;
1320         }
1321
1322         /*
1323          * The cond_snapshot can only change to NULL without the
1324          * trace_types_lock. We don't care if we race with it going
1325          * to NULL, but we want to make sure that it's not set to
1326          * something other than NULL when we get here, which we can
1327          * do safely with only holding the trace_types_lock and not
1328          * having to take the max_lock.
1329          */
1330         if (tr->cond_snapshot) {
1331                 ret = -EBUSY;
1332                 goto fail_unlock;
1333         }
1334
1335         arch_spin_lock(&tr->max_lock);
1336         tr->cond_snapshot = cond_snapshot;
1337         arch_spin_unlock(&tr->max_lock);
1338
1339         mutex_unlock(&trace_types_lock);
1340
1341         return ret;
1342
1343  fail_unlock:
1344         mutex_unlock(&trace_types_lock);
1345         kfree(cond_snapshot);
1346         return ret;
1347 }
1348 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1349
1350 /**
1351  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1352  * @tr:         The tracing instance
1353  *
1354  * Check whether the conditional snapshot for the given instance is
1355  * enabled; if so, free the cond_snapshot associated with it,
1356  * otherwise return -EINVAL.
1357  *
1358  * Returns 0 if successful, error otherwise.
1359  */
1360 int tracing_snapshot_cond_disable(struct trace_array *tr)
1361 {
1362         int ret = 0;
1363
1364         arch_spin_lock(&tr->max_lock);
1365
1366         if (!tr->cond_snapshot)
1367                 ret = -EINVAL;
1368         else {
1369                 kfree(tr->cond_snapshot);
1370                 tr->cond_snapshot = NULL;
1371         }
1372
1373         arch_spin_unlock(&tr->max_lock);
1374
1375         return ret;
1376 }
1377 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 #else
1379 void tracing_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 }
1383 EXPORT_SYMBOL_GPL(tracing_snapshot);
1384 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 {
1386         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1389 int tracing_alloc_snapshot(void)
1390 {
1391         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1392         return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1395 void tracing_snapshot_alloc(void)
1396 {
1397         /* Give warning */
1398         tracing_snapshot();
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1401 void *tracing_cond_snapshot_data(struct trace_array *tr)
1402 {
1403         return NULL;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1406 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1407 {
1408         return -ENODEV;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1411 int tracing_snapshot_cond_disable(struct trace_array *tr)
1412 {
1413         return false;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1416 #endif /* CONFIG_TRACER_SNAPSHOT */
1417
1418 void tracer_tracing_off(struct trace_array *tr)
1419 {
1420         if (tr->array_buffer.buffer)
1421                 ring_buffer_record_off(tr->array_buffer.buffer);
1422         /*
1423          * This flag is looked at when buffers haven't been allocated
1424          * yet, or by some tracers (like irqsoff), that just want to
1425          * know if the ring buffer has been disabled, but it can handle
1426          * races of where it gets disabled but we still do a record.
1427          * As the check is in the fast path of the tracers, it is more
1428          * important to be fast than accurate.
1429          */
1430         tr->buffer_disabled = 1;
1431         /* Make the flag seen by readers */
1432         smp_wmb();
1433 }
1434
1435 /**
1436  * tracing_off - turn off tracing buffers
1437  *
1438  * This function stops the tracing buffers from recording data.
1439  * It does not disable any overhead the tracers themselves may
1440  * be causing. This function simply causes all recording to
1441  * the ring buffers to fail.
1442  */
1443 void tracing_off(void)
1444 {
1445         tracer_tracing_off(&global_trace);
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_off);
1448
1449 void disable_trace_on_warning(void)
1450 {
1451         if (__disable_trace_on_warning) {
1452                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1453                         "Disabling tracing due to warning\n");
1454                 tracing_off();
1455         }
1456 }
1457
1458 /**
1459  * tracer_tracing_is_on - show real state of ring buffer enabled
1460  * @tr : the trace array to know if ring buffer is enabled
1461  *
1462  * Shows real state of the ring buffer if it is enabled or not.
1463  */
1464 bool tracer_tracing_is_on(struct trace_array *tr)
1465 {
1466         if (tr->array_buffer.buffer)
1467                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1468         return !tr->buffer_disabled;
1469 }
1470
1471 /**
1472  * tracing_is_on - show state of ring buffers enabled
1473  */
1474 int tracing_is_on(void)
1475 {
1476         return tracer_tracing_is_on(&global_trace);
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_is_on);
1479
1480 static int __init set_buf_size(char *str)
1481 {
1482         unsigned long buf_size;
1483
1484         if (!str)
1485                 return 0;
1486         buf_size = memparse(str, &str);
1487         /*
1488          * nr_entries can not be zero and the startup
1489          * tests require some buffer space. Therefore
1490          * ensure we have at least 4096 bytes of buffer.
1491          */
1492         trace_buf_size = max(4096UL, buf_size);
1493         return 1;
1494 }
1495 __setup("trace_buf_size=", set_buf_size);
1496
1497 static int __init set_tracing_thresh(char *str)
1498 {
1499         unsigned long threshold;
1500         int ret;
1501
1502         if (!str)
1503                 return 0;
1504         ret = kstrtoul(str, 0, &threshold);
1505         if (ret < 0)
1506                 return 0;
1507         tracing_thresh = threshold * 1000;
1508         return 1;
1509 }
1510 __setup("tracing_thresh=", set_tracing_thresh);
1511
1512 unsigned long nsecs_to_usecs(unsigned long nsecs)
1513 {
1514         return nsecs / 1000;
1515 }
1516
1517 /*
1518  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1519  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1520  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1521  * of strings in the order that the evals (enum) were defined.
1522  */
1523 #undef C
1524 #define C(a, b) b
1525
1526 /* These must match the bit positions in trace_iterator_flags */
1527 static const char *trace_options[] = {
1528         TRACE_FLAGS
1529         NULL
1530 };
1531
1532 static struct {
1533         u64 (*func)(void);
1534         const char *name;
1535         int in_ns;              /* is this clock in nanoseconds? */
1536 } trace_clocks[] = {
1537         { trace_clock_local,            "local",        1 },
1538         { trace_clock_global,           "global",       1 },
1539         { trace_clock_counter,          "counter",      0 },
1540         { trace_clock_jiffies,          "uptime",       0 },
1541         { trace_clock,                  "perf",         1 },
1542         { ktime_get_mono_fast_ns,       "mono",         1 },
1543         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1544         { ktime_get_boot_fast_ns,       "boot",         1 },
1545         ARCH_TRACE_CLOCKS
1546 };
1547
1548 bool trace_clock_in_ns(struct trace_array *tr)
1549 {
1550         if (trace_clocks[tr->clock_id].in_ns)
1551                 return true;
1552
1553         return false;
1554 }
1555
1556 /*
1557  * trace_parser_get_init - gets the buffer for trace parser
1558  */
1559 int trace_parser_get_init(struct trace_parser *parser, int size)
1560 {
1561         memset(parser, 0, sizeof(*parser));
1562
1563         parser->buffer = kmalloc(size, GFP_KERNEL);
1564         if (!parser->buffer)
1565                 return 1;
1566
1567         parser->size = size;
1568         return 0;
1569 }
1570
1571 /*
1572  * trace_parser_put - frees the buffer for trace parser
1573  */
1574 void trace_parser_put(struct trace_parser *parser)
1575 {
1576         kfree(parser->buffer);
1577         parser->buffer = NULL;
1578 }
1579
1580 /*
1581  * trace_get_user - reads the user input string separated by  space
1582  * (matched by isspace(ch))
1583  *
1584  * For each string found the 'struct trace_parser' is updated,
1585  * and the function returns.
1586  *
1587  * Returns number of bytes read.
1588  *
1589  * See kernel/trace/trace.h for 'struct trace_parser' details.
1590  */
1591 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1592         size_t cnt, loff_t *ppos)
1593 {
1594         char ch;
1595         size_t read = 0;
1596         ssize_t ret;
1597
1598         if (!*ppos)
1599                 trace_parser_clear(parser);
1600
1601         ret = get_user(ch, ubuf++);
1602         if (ret)
1603                 goto out;
1604
1605         read++;
1606         cnt--;
1607
1608         /*
1609          * The parser is not finished with the last write,
1610          * continue reading the user input without skipping spaces.
1611          */
1612         if (!parser->cont) {
1613                 /* skip white space */
1614                 while (cnt && isspace(ch)) {
1615                         ret = get_user(ch, ubuf++);
1616                         if (ret)
1617                                 goto out;
1618                         read++;
1619                         cnt--;
1620                 }
1621
1622                 parser->idx = 0;
1623
1624                 /* only spaces were written */
1625                 if (isspace(ch) || !ch) {
1626                         *ppos += read;
1627                         ret = read;
1628                         goto out;
1629                 }
1630         }
1631
1632         /* read the non-space input */
1633         while (cnt && !isspace(ch) && ch) {
1634                 if (parser->idx < parser->size - 1)
1635                         parser->buffer[parser->idx++] = ch;
1636                 else {
1637                         ret = -EINVAL;
1638                         goto out;
1639                 }
1640                 ret = get_user(ch, ubuf++);
1641                 if (ret)
1642                         goto out;
1643                 read++;
1644                 cnt--;
1645         }
1646
1647         /* We either got finished input or we have to wait for another call. */
1648         if (isspace(ch) || !ch) {
1649                 parser->buffer[parser->idx] = 0;
1650                 parser->cont = false;
1651         } else if (parser->idx < parser->size - 1) {
1652                 parser->cont = true;
1653                 parser->buffer[parser->idx++] = ch;
1654                 /* Make sure the parsed string always terminates with '\0'. */
1655                 parser->buffer[parser->idx] = 0;
1656         } else {
1657                 ret = -EINVAL;
1658                 goto out;
1659         }
1660
1661         *ppos += read;
1662         ret = read;
1663
1664 out:
1665         return ret;
1666 }
1667
1668 /* TODO add a seq_buf_to_buffer() */
1669 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 {
1671         int len;
1672
1673         if (trace_seq_used(s) <= s->seq.readpos)
1674                 return -EBUSY;
1675
1676         len = trace_seq_used(s) - s->seq.readpos;
1677         if (cnt > len)
1678                 cnt = len;
1679         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1680
1681         s->seq.readpos += cnt;
1682         return cnt;
1683 }
1684
1685 unsigned long __read_mostly     tracing_thresh;
1686 static const struct file_operations tracing_max_lat_fops;
1687
1688 #ifdef LATENCY_FS_NOTIFY
1689
1690 static struct workqueue_struct *fsnotify_wq;
1691
1692 static void latency_fsnotify_workfn(struct work_struct *work)
1693 {
1694         struct trace_array *tr = container_of(work, struct trace_array,
1695                                               fsnotify_work);
1696         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1697 }
1698
1699 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1700 {
1701         struct trace_array *tr = container_of(iwork, struct trace_array,
1702                                               fsnotify_irqwork);
1703         queue_work(fsnotify_wq, &tr->fsnotify_work);
1704 }
1705
1706 static void trace_create_maxlat_file(struct trace_array *tr,
1707                                      struct dentry *d_tracer)
1708 {
1709         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1710         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1711         tr->d_max_latency = trace_create_file("tracing_max_latency",
1712                                               TRACE_MODE_WRITE,
1713                                               d_tracer, &tr->max_latency,
1714                                               &tracing_max_lat_fops);
1715 }
1716
1717 __init static int latency_fsnotify_init(void)
1718 {
1719         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1720                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1721         if (!fsnotify_wq) {
1722                 pr_err("Unable to allocate tr_max_lat_wq\n");
1723                 return -ENOMEM;
1724         }
1725         return 0;
1726 }
1727
1728 late_initcall_sync(latency_fsnotify_init);
1729
1730 void latency_fsnotify(struct trace_array *tr)
1731 {
1732         if (!fsnotify_wq)
1733                 return;
1734         /*
1735          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1736          * possible that we are called from __schedule() or do_idle(), which
1737          * could cause a deadlock.
1738          */
1739         irq_work_queue(&tr->fsnotify_irqwork);
1740 }
1741
1742 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1743         || defined(CONFIG_OSNOISE_TRACER)
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1747                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #else
1750 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1751 #endif
1752
1753 #ifdef CONFIG_TRACER_MAX_TRACE
1754 /*
1755  * Copy the new maximum trace into the separate maximum-trace
1756  * structure. (this way the maximum trace is permanently saved,
1757  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1758  */
1759 static void
1760 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1761 {
1762         struct array_buffer *trace_buf = &tr->array_buffer;
1763         struct array_buffer *max_buf = &tr->max_buffer;
1764         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1765         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1766
1767         max_buf->cpu = cpu;
1768         max_buf->time_start = data->preempt_timestamp;
1769
1770         max_data->saved_latency = tr->max_latency;
1771         max_data->critical_start = data->critical_start;
1772         max_data->critical_end = data->critical_end;
1773
1774         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1775         max_data->pid = tsk->pid;
1776         /*
1777          * If tsk == current, then use current_uid(), as that does not use
1778          * RCU. The irq tracer can be called out of RCU scope.
1779          */
1780         if (tsk == current)
1781                 max_data->uid = current_uid();
1782         else
1783                 max_data->uid = task_uid(tsk);
1784
1785         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1786         max_data->policy = tsk->policy;
1787         max_data->rt_priority = tsk->rt_priority;
1788
1789         /* record this tasks comm */
1790         tracing_record_cmdline(tsk);
1791         latency_fsnotify(tr);
1792 }
1793
1794 /**
1795  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1796  * @tr: tracer
1797  * @tsk: the task with the latency
1798  * @cpu: The cpu that initiated the trace.
1799  * @cond_data: User data associated with a conditional snapshot
1800  *
1801  * Flip the buffers between the @tr and the max_tr and record information
1802  * about which task was the cause of this latency.
1803  */
1804 void
1805 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1806               void *cond_data)
1807 {
1808         if (tr->stop_count)
1809                 return;
1810
1811         WARN_ON_ONCE(!irqs_disabled());
1812
1813         if (!tr->allocated_snapshot) {
1814                 /* Only the nop tracer should hit this when disabling */
1815                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1816                 return;
1817         }
1818
1819         arch_spin_lock(&tr->max_lock);
1820
1821         /* Inherit the recordable setting from array_buffer */
1822         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1823                 ring_buffer_record_on(tr->max_buffer.buffer);
1824         else
1825                 ring_buffer_record_off(tr->max_buffer.buffer);
1826
1827 #ifdef CONFIG_TRACER_SNAPSHOT
1828         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1829                 goto out_unlock;
1830 #endif
1831         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1832
1833         __update_max_tr(tr, tsk, cpu);
1834
1835  out_unlock:
1836         arch_spin_unlock(&tr->max_lock);
1837 }
1838
1839 /**
1840  * update_max_tr_single - only copy one trace over, and reset the rest
1841  * @tr: tracer
1842  * @tsk: task with the latency
1843  * @cpu: the cpu of the buffer to copy.
1844  *
1845  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1846  */
1847 void
1848 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1849 {
1850         int ret;
1851
1852         if (tr->stop_count)
1853                 return;
1854
1855         WARN_ON_ONCE(!irqs_disabled());
1856         if (!tr->allocated_snapshot) {
1857                 /* Only the nop tracer should hit this when disabling */
1858                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1859                 return;
1860         }
1861
1862         arch_spin_lock(&tr->max_lock);
1863
1864         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1865
1866         if (ret == -EBUSY) {
1867                 /*
1868                  * We failed to swap the buffer due to a commit taking
1869                  * place on this CPU. We fail to record, but we reset
1870                  * the max trace buffer (no one writes directly to it)
1871                  * and flag that it failed.
1872                  */
1873                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1874                         "Failed to swap buffers due to commit in progress\n");
1875         }
1876
1877         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1878
1879         __update_max_tr(tr, tsk, cpu);
1880         arch_spin_unlock(&tr->max_lock);
1881 }
1882 #endif /* CONFIG_TRACER_MAX_TRACE */
1883
1884 static int wait_on_pipe(struct trace_iterator *iter, int full)
1885 {
1886         /* Iterators are static, they should be filled or empty */
1887         if (trace_buffer_iter(iter, iter->cpu_file))
1888                 return 0;
1889
1890         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1891                                 full);
1892 }
1893
1894 #ifdef CONFIG_FTRACE_STARTUP_TEST
1895 static bool selftests_can_run;
1896
1897 struct trace_selftests {
1898         struct list_head                list;
1899         struct tracer                   *type;
1900 };
1901
1902 static LIST_HEAD(postponed_selftests);
1903
1904 static int save_selftest(struct tracer *type)
1905 {
1906         struct trace_selftests *selftest;
1907
1908         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1909         if (!selftest)
1910                 return -ENOMEM;
1911
1912         selftest->type = type;
1913         list_add(&selftest->list, &postponed_selftests);
1914         return 0;
1915 }
1916
1917 static int run_tracer_selftest(struct tracer *type)
1918 {
1919         struct trace_array *tr = &global_trace;
1920         struct tracer *saved_tracer = tr->current_trace;
1921         int ret;
1922
1923         if (!type->selftest || tracing_selftest_disabled)
1924                 return 0;
1925
1926         /*
1927          * If a tracer registers early in boot up (before scheduling is
1928          * initialized and such), then do not run its selftests yet.
1929          * Instead, run it a little later in the boot process.
1930          */
1931         if (!selftests_can_run)
1932                 return save_selftest(type);
1933
1934         if (!tracing_is_on()) {
1935                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1936                         type->name);
1937                 return 0;
1938         }
1939
1940         /*
1941          * Run a selftest on this tracer.
1942          * Here we reset the trace buffer, and set the current
1943          * tracer to be this tracer. The tracer can then run some
1944          * internal tracing to verify that everything is in order.
1945          * If we fail, we do not register this tracer.
1946          */
1947         tracing_reset_online_cpus(&tr->array_buffer);
1948
1949         tr->current_trace = type;
1950
1951 #ifdef CONFIG_TRACER_MAX_TRACE
1952         if (type->use_max_tr) {
1953                 /* If we expanded the buffers, make sure the max is expanded too */
1954                 if (ring_buffer_expanded)
1955                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1956                                            RING_BUFFER_ALL_CPUS);
1957                 tr->allocated_snapshot = true;
1958         }
1959 #endif
1960
1961         /* the test is responsible for initializing and enabling */
1962         pr_info("Testing tracer %s: ", type->name);
1963         ret = type->selftest(type, tr);
1964         /* the test is responsible for resetting too */
1965         tr->current_trace = saved_tracer;
1966         if (ret) {
1967                 printk(KERN_CONT "FAILED!\n");
1968                 /* Add the warning after printing 'FAILED' */
1969                 WARN_ON(1);
1970                 return -1;
1971         }
1972         /* Only reset on passing, to avoid touching corrupted buffers */
1973         tracing_reset_online_cpus(&tr->array_buffer);
1974
1975 #ifdef CONFIG_TRACER_MAX_TRACE
1976         if (type->use_max_tr) {
1977                 tr->allocated_snapshot = false;
1978
1979                 /* Shrink the max buffer again */
1980                 if (ring_buffer_expanded)
1981                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1982                                            RING_BUFFER_ALL_CPUS);
1983         }
1984 #endif
1985
1986         printk(KERN_CONT "PASSED\n");
1987         return 0;
1988 }
1989
1990 static __init int init_trace_selftests(void)
1991 {
1992         struct trace_selftests *p, *n;
1993         struct tracer *t, **last;
1994         int ret;
1995
1996         selftests_can_run = true;
1997
1998         mutex_lock(&trace_types_lock);
1999
2000         if (list_empty(&postponed_selftests))
2001                 goto out;
2002
2003         pr_info("Running postponed tracer tests:\n");
2004
2005         tracing_selftest_running = true;
2006         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2007                 /* This loop can take minutes when sanitizers are enabled, so
2008                  * lets make sure we allow RCU processing.
2009                  */
2010                 cond_resched();
2011                 ret = run_tracer_selftest(p->type);
2012                 /* If the test fails, then warn and remove from available_tracers */
2013                 if (ret < 0) {
2014                         WARN(1, "tracer: %s failed selftest, disabling\n",
2015                              p->type->name);
2016                         last = &trace_types;
2017                         for (t = trace_types; t; t = t->next) {
2018                                 if (t == p->type) {
2019                                         *last = t->next;
2020                                         break;
2021                                 }
2022                                 last = &t->next;
2023                         }
2024                 }
2025                 list_del(&p->list);
2026                 kfree(p);
2027         }
2028         tracing_selftest_running = false;
2029
2030  out:
2031         mutex_unlock(&trace_types_lock);
2032
2033         return 0;
2034 }
2035 core_initcall(init_trace_selftests);
2036 #else
2037 static inline int run_tracer_selftest(struct tracer *type)
2038 {
2039         return 0;
2040 }
2041 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2042
2043 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2044
2045 static void __init apply_trace_boot_options(void);
2046
2047 /**
2048  * register_tracer - register a tracer with the ftrace system.
2049  * @type: the plugin for the tracer
2050  *
2051  * Register a new plugin tracer.
2052  */
2053 int __init register_tracer(struct tracer *type)
2054 {
2055         struct tracer *t;
2056         int ret = 0;
2057
2058         if (!type->name) {
2059                 pr_info("Tracer must have a name\n");
2060                 return -1;
2061         }
2062
2063         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2064                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2065                 return -1;
2066         }
2067
2068         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2069                 pr_warn("Can not register tracer %s due to lockdown\n",
2070                            type->name);
2071                 return -EPERM;
2072         }
2073
2074         mutex_lock(&trace_types_lock);
2075
2076         tracing_selftest_running = true;
2077
2078         for (t = trace_types; t; t = t->next) {
2079                 if (strcmp(type->name, t->name) == 0) {
2080                         /* already found */
2081                         pr_info("Tracer %s already registered\n",
2082                                 type->name);
2083                         ret = -1;
2084                         goto out;
2085                 }
2086         }
2087
2088         if (!type->set_flag)
2089                 type->set_flag = &dummy_set_flag;
2090         if (!type->flags) {
2091                 /*allocate a dummy tracer_flags*/
2092                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2093                 if (!type->flags) {
2094                         ret = -ENOMEM;
2095                         goto out;
2096                 }
2097                 type->flags->val = 0;
2098                 type->flags->opts = dummy_tracer_opt;
2099         } else
2100                 if (!type->flags->opts)
2101                         type->flags->opts = dummy_tracer_opt;
2102
2103         /* store the tracer for __set_tracer_option */
2104         type->flags->trace = type;
2105
2106         ret = run_tracer_selftest(type);
2107         if (ret < 0)
2108                 goto out;
2109
2110         type->next = trace_types;
2111         trace_types = type;
2112         add_tracer_options(&global_trace, type);
2113
2114  out:
2115         tracing_selftest_running = false;
2116         mutex_unlock(&trace_types_lock);
2117
2118         if (ret || !default_bootup_tracer)
2119                 goto out_unlock;
2120
2121         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2122                 goto out_unlock;
2123
2124         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2125         /* Do we want this tracer to start on bootup? */
2126         tracing_set_tracer(&global_trace, type->name);
2127         default_bootup_tracer = NULL;
2128
2129         apply_trace_boot_options();
2130
2131         /* disable other selftests, since this will break it. */
2132         disable_tracing_selftest("running a tracer");
2133
2134  out_unlock:
2135         return ret;
2136 }
2137
2138 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2139 {
2140         struct trace_buffer *buffer = buf->buffer;
2141
2142         if (!buffer)
2143                 return;
2144
2145         ring_buffer_record_disable(buffer);
2146
2147         /* Make sure all commits have finished */
2148         synchronize_rcu();
2149         ring_buffer_reset_cpu(buffer, cpu);
2150
2151         ring_buffer_record_enable(buffer);
2152 }
2153
2154 void tracing_reset_online_cpus(struct array_buffer *buf)
2155 {
2156         struct trace_buffer *buffer = buf->buffer;
2157
2158         if (!buffer)
2159                 return;
2160
2161         ring_buffer_record_disable(buffer);
2162
2163         /* Make sure all commits have finished */
2164         synchronize_rcu();
2165
2166         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2167
2168         ring_buffer_reset_online_cpus(buffer);
2169
2170         ring_buffer_record_enable(buffer);
2171 }
2172
2173 /* Must have trace_types_lock held */
2174 void tracing_reset_all_online_cpus(void)
2175 {
2176         struct trace_array *tr;
2177
2178         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2179                 if (!tr->clear_trace)
2180                         continue;
2181                 tr->clear_trace = false;
2182                 tracing_reset_online_cpus(&tr->array_buffer);
2183 #ifdef CONFIG_TRACER_MAX_TRACE
2184                 tracing_reset_online_cpus(&tr->max_buffer);
2185 #endif
2186         }
2187 }
2188
2189 /*
2190  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2191  * is the tgid last observed corresponding to pid=i.
2192  */
2193 static int *tgid_map;
2194
2195 /* The maximum valid index into tgid_map. */
2196 static size_t tgid_map_max;
2197
2198 #define SAVED_CMDLINES_DEFAULT 128
2199 #define NO_CMDLINE_MAP UINT_MAX
2200 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2201 struct saved_cmdlines_buffer {
2202         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2203         unsigned *map_cmdline_to_pid;
2204         unsigned cmdline_num;
2205         int cmdline_idx;
2206         char *saved_cmdlines;
2207 };
2208 static struct saved_cmdlines_buffer *savedcmd;
2209
2210 static inline char *get_saved_cmdlines(int idx)
2211 {
2212         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2213 }
2214
2215 static inline void set_cmdline(int idx, const char *cmdline)
2216 {
2217         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2218 }
2219
2220 static int allocate_cmdlines_buffer(unsigned int val,
2221                                     struct saved_cmdlines_buffer *s)
2222 {
2223         s->map_cmdline_to_pid = kmalloc_array(val,
2224                                               sizeof(*s->map_cmdline_to_pid),
2225                                               GFP_KERNEL);
2226         if (!s->map_cmdline_to_pid)
2227                 return -ENOMEM;
2228
2229         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2230         if (!s->saved_cmdlines) {
2231                 kfree(s->map_cmdline_to_pid);
2232                 return -ENOMEM;
2233         }
2234
2235         s->cmdline_idx = 0;
2236         s->cmdline_num = val;
2237         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2238                sizeof(s->map_pid_to_cmdline));
2239         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2240                val * sizeof(*s->map_cmdline_to_pid));
2241
2242         return 0;
2243 }
2244
2245 static int trace_create_savedcmd(void)
2246 {
2247         int ret;
2248
2249         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2250         if (!savedcmd)
2251                 return -ENOMEM;
2252
2253         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2254         if (ret < 0) {
2255                 kfree(savedcmd);
2256                 savedcmd = NULL;
2257                 return -ENOMEM;
2258         }
2259
2260         return 0;
2261 }
2262
2263 int is_tracing_stopped(void)
2264 {
2265         return global_trace.stop_count;
2266 }
2267
2268 /**
2269  * tracing_start - quick start of the tracer
2270  *
2271  * If tracing is enabled but was stopped by tracing_stop,
2272  * this will start the tracer back up.
2273  */
2274 void tracing_start(void)
2275 {
2276         struct trace_buffer *buffer;
2277         unsigned long flags;
2278
2279         if (tracing_disabled)
2280                 return;
2281
2282         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2283         if (--global_trace.stop_count) {
2284                 if (global_trace.stop_count < 0) {
2285                         /* Someone screwed up their debugging */
2286                         WARN_ON_ONCE(1);
2287                         global_trace.stop_count = 0;
2288                 }
2289                 goto out;
2290         }
2291
2292         /* Prevent the buffers from switching */
2293         arch_spin_lock(&global_trace.max_lock);
2294
2295         buffer = global_trace.array_buffer.buffer;
2296         if (buffer)
2297                 ring_buffer_record_enable(buffer);
2298
2299 #ifdef CONFIG_TRACER_MAX_TRACE
2300         buffer = global_trace.max_buffer.buffer;
2301         if (buffer)
2302                 ring_buffer_record_enable(buffer);
2303 #endif
2304
2305         arch_spin_unlock(&global_trace.max_lock);
2306
2307  out:
2308         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2309 }
2310
2311 static void tracing_start_tr(struct trace_array *tr)
2312 {
2313         struct trace_buffer *buffer;
2314         unsigned long flags;
2315
2316         if (tracing_disabled)
2317                 return;
2318
2319         /* If global, we need to also start the max tracer */
2320         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2321                 return tracing_start();
2322
2323         raw_spin_lock_irqsave(&tr->start_lock, flags);
2324
2325         if (--tr->stop_count) {
2326                 if (tr->stop_count < 0) {
2327                         /* Someone screwed up their debugging */
2328                         WARN_ON_ONCE(1);
2329                         tr->stop_count = 0;
2330                 }
2331                 goto out;
2332         }
2333
2334         buffer = tr->array_buffer.buffer;
2335         if (buffer)
2336                 ring_buffer_record_enable(buffer);
2337
2338  out:
2339         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2340 }
2341
2342 /**
2343  * tracing_stop - quick stop of the tracer
2344  *
2345  * Light weight way to stop tracing. Use in conjunction with
2346  * tracing_start.
2347  */
2348 void tracing_stop(void)
2349 {
2350         struct trace_buffer *buffer;
2351         unsigned long flags;
2352
2353         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2354         if (global_trace.stop_count++)
2355                 goto out;
2356
2357         /* Prevent the buffers from switching */
2358         arch_spin_lock(&global_trace.max_lock);
2359
2360         buffer = global_trace.array_buffer.buffer;
2361         if (buffer)
2362                 ring_buffer_record_disable(buffer);
2363
2364 #ifdef CONFIG_TRACER_MAX_TRACE
2365         buffer = global_trace.max_buffer.buffer;
2366         if (buffer)
2367                 ring_buffer_record_disable(buffer);
2368 #endif
2369
2370         arch_spin_unlock(&global_trace.max_lock);
2371
2372  out:
2373         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2374 }
2375
2376 static void tracing_stop_tr(struct trace_array *tr)
2377 {
2378         struct trace_buffer *buffer;
2379         unsigned long flags;
2380
2381         /* If global, we need to also stop the max tracer */
2382         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2383                 return tracing_stop();
2384
2385         raw_spin_lock_irqsave(&tr->start_lock, flags);
2386         if (tr->stop_count++)
2387                 goto out;
2388
2389         buffer = tr->array_buffer.buffer;
2390         if (buffer)
2391                 ring_buffer_record_disable(buffer);
2392
2393  out:
2394         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2395 }
2396
2397 static int trace_save_cmdline(struct task_struct *tsk)
2398 {
2399         unsigned tpid, idx;
2400
2401         /* treat recording of idle task as a success */
2402         if (!tsk->pid)
2403                 return 1;
2404
2405         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2406
2407         /*
2408          * It's not the end of the world if we don't get
2409          * the lock, but we also don't want to spin
2410          * nor do we want to disable interrupts,
2411          * so if we miss here, then better luck next time.
2412          */
2413         if (!arch_spin_trylock(&trace_cmdline_lock))
2414                 return 0;
2415
2416         idx = savedcmd->map_pid_to_cmdline[tpid];
2417         if (idx == NO_CMDLINE_MAP) {
2418                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2419
2420                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2421                 savedcmd->cmdline_idx = idx;
2422         }
2423
2424         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2425         set_cmdline(idx, tsk->comm);
2426
2427         arch_spin_unlock(&trace_cmdline_lock);
2428
2429         return 1;
2430 }
2431
2432 static void __trace_find_cmdline(int pid, char comm[])
2433 {
2434         unsigned map;
2435         int tpid;
2436
2437         if (!pid) {
2438                 strcpy(comm, "<idle>");
2439                 return;
2440         }
2441
2442         if (WARN_ON_ONCE(pid < 0)) {
2443                 strcpy(comm, "<XXX>");
2444                 return;
2445         }
2446
2447         tpid = pid & (PID_MAX_DEFAULT - 1);
2448         map = savedcmd->map_pid_to_cmdline[tpid];
2449         if (map != NO_CMDLINE_MAP) {
2450                 tpid = savedcmd->map_cmdline_to_pid[map];
2451                 if (tpid == pid) {
2452                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2453                         return;
2454                 }
2455         }
2456         strcpy(comm, "<...>");
2457 }
2458
2459 void trace_find_cmdline(int pid, char comm[])
2460 {
2461         preempt_disable();
2462         arch_spin_lock(&trace_cmdline_lock);
2463
2464         __trace_find_cmdline(pid, comm);
2465
2466         arch_spin_unlock(&trace_cmdline_lock);
2467         preempt_enable();
2468 }
2469
2470 static int *trace_find_tgid_ptr(int pid)
2471 {
2472         /*
2473          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2474          * if we observe a non-NULL tgid_map then we also observe the correct
2475          * tgid_map_max.
2476          */
2477         int *map = smp_load_acquire(&tgid_map);
2478
2479         if (unlikely(!map || pid > tgid_map_max))
2480                 return NULL;
2481
2482         return &map[pid];
2483 }
2484
2485 int trace_find_tgid(int pid)
2486 {
2487         int *ptr = trace_find_tgid_ptr(pid);
2488
2489         return ptr ? *ptr : 0;
2490 }
2491
2492 static int trace_save_tgid(struct task_struct *tsk)
2493 {
2494         int *ptr;
2495
2496         /* treat recording of idle task as a success */
2497         if (!tsk->pid)
2498                 return 1;
2499
2500         ptr = trace_find_tgid_ptr(tsk->pid);
2501         if (!ptr)
2502                 return 0;
2503
2504         *ptr = tsk->tgid;
2505         return 1;
2506 }
2507
2508 static bool tracing_record_taskinfo_skip(int flags)
2509 {
2510         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2511                 return true;
2512         if (!__this_cpu_read(trace_taskinfo_save))
2513                 return true;
2514         return false;
2515 }
2516
2517 /**
2518  * tracing_record_taskinfo - record the task info of a task
2519  *
2520  * @task:  task to record
2521  * @flags: TRACE_RECORD_CMDLINE for recording comm
2522  *         TRACE_RECORD_TGID for recording tgid
2523  */
2524 void tracing_record_taskinfo(struct task_struct *task, int flags)
2525 {
2526         bool done;
2527
2528         if (tracing_record_taskinfo_skip(flags))
2529                 return;
2530
2531         /*
2532          * Record as much task information as possible. If some fail, continue
2533          * to try to record the others.
2534          */
2535         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2536         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2537
2538         /* If recording any information failed, retry again soon. */
2539         if (!done)
2540                 return;
2541
2542         __this_cpu_write(trace_taskinfo_save, false);
2543 }
2544
2545 /**
2546  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2547  *
2548  * @prev: previous task during sched_switch
2549  * @next: next task during sched_switch
2550  * @flags: TRACE_RECORD_CMDLINE for recording comm
2551  *         TRACE_RECORD_TGID for recording tgid
2552  */
2553 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2554                                           struct task_struct *next, int flags)
2555 {
2556         bool done;
2557
2558         if (tracing_record_taskinfo_skip(flags))
2559                 return;
2560
2561         /*
2562          * Record as much task information as possible. If some fail, continue
2563          * to try to record the others.
2564          */
2565         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2566         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2567         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2568         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2569
2570         /* If recording any information failed, retry again soon. */
2571         if (!done)
2572                 return;
2573
2574         __this_cpu_write(trace_taskinfo_save, false);
2575 }
2576
2577 /* Helpers to record a specific task information */
2578 void tracing_record_cmdline(struct task_struct *task)
2579 {
2580         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2581 }
2582
2583 void tracing_record_tgid(struct task_struct *task)
2584 {
2585         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2586 }
2587
2588 /*
2589  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2590  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2591  * simplifies those functions and keeps them in sync.
2592  */
2593 enum print_line_t trace_handle_return(struct trace_seq *s)
2594 {
2595         return trace_seq_has_overflowed(s) ?
2596                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2597 }
2598 EXPORT_SYMBOL_GPL(trace_handle_return);
2599
2600 static unsigned short migration_disable_value(void)
2601 {
2602 #if defined(CONFIG_SMP)
2603         return current->migration_disabled;
2604 #else
2605         return 0;
2606 #endif
2607 }
2608
2609 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2610 {
2611         unsigned int trace_flags = irqs_status;
2612         unsigned int pc;
2613
2614         pc = preempt_count();
2615
2616         if (pc & NMI_MASK)
2617                 trace_flags |= TRACE_FLAG_NMI;
2618         if (pc & HARDIRQ_MASK)
2619                 trace_flags |= TRACE_FLAG_HARDIRQ;
2620         if (in_serving_softirq())
2621                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2622         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2623                 trace_flags |= TRACE_FLAG_BH_OFF;
2624
2625         if (tif_need_resched())
2626                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2627         if (test_preempt_need_resched())
2628                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2629         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2630                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2631 }
2632
2633 struct ring_buffer_event *
2634 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2635                           int type,
2636                           unsigned long len,
2637                           unsigned int trace_ctx)
2638 {
2639         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2640 }
2641
2642 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2643 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2644 static int trace_buffered_event_ref;
2645
2646 /**
2647  * trace_buffered_event_enable - enable buffering events
2648  *
2649  * When events are being filtered, it is quicker to use a temporary
2650  * buffer to write the event data into if there's a likely chance
2651  * that it will not be committed. The discard of the ring buffer
2652  * is not as fast as committing, and is much slower than copying
2653  * a commit.
2654  *
2655  * When an event is to be filtered, allocate per cpu buffers to
2656  * write the event data into, and if the event is filtered and discarded
2657  * it is simply dropped, otherwise, the entire data is to be committed
2658  * in one shot.
2659  */
2660 void trace_buffered_event_enable(void)
2661 {
2662         struct ring_buffer_event *event;
2663         struct page *page;
2664         int cpu;
2665
2666         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2667
2668         if (trace_buffered_event_ref++)
2669                 return;
2670
2671         for_each_tracing_cpu(cpu) {
2672                 page = alloc_pages_node(cpu_to_node(cpu),
2673                                         GFP_KERNEL | __GFP_NORETRY, 0);
2674                 if (!page)
2675                         goto failed;
2676
2677                 event = page_address(page);
2678                 memset(event, 0, sizeof(*event));
2679
2680                 per_cpu(trace_buffered_event, cpu) = event;
2681
2682                 preempt_disable();
2683                 if (cpu == smp_processor_id() &&
2684                     __this_cpu_read(trace_buffered_event) !=
2685                     per_cpu(trace_buffered_event, cpu))
2686                         WARN_ON_ONCE(1);
2687                 preempt_enable();
2688         }
2689
2690         return;
2691  failed:
2692         trace_buffered_event_disable();
2693 }
2694
2695 static void enable_trace_buffered_event(void *data)
2696 {
2697         /* Probably not needed, but do it anyway */
2698         smp_rmb();
2699         this_cpu_dec(trace_buffered_event_cnt);
2700 }
2701
2702 static void disable_trace_buffered_event(void *data)
2703 {
2704         this_cpu_inc(trace_buffered_event_cnt);
2705 }
2706
2707 /**
2708  * trace_buffered_event_disable - disable buffering events
2709  *
2710  * When a filter is removed, it is faster to not use the buffered
2711  * events, and to commit directly into the ring buffer. Free up
2712  * the temp buffers when there are no more users. This requires
2713  * special synchronization with current events.
2714  */
2715 void trace_buffered_event_disable(void)
2716 {
2717         int cpu;
2718
2719         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2720
2721         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2722                 return;
2723
2724         if (--trace_buffered_event_ref)
2725                 return;
2726
2727         preempt_disable();
2728         /* For each CPU, set the buffer as used. */
2729         smp_call_function_many(tracing_buffer_mask,
2730                                disable_trace_buffered_event, NULL, 1);
2731         preempt_enable();
2732
2733         /* Wait for all current users to finish */
2734         synchronize_rcu();
2735
2736         for_each_tracing_cpu(cpu) {
2737                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2738                 per_cpu(trace_buffered_event, cpu) = NULL;
2739         }
2740         /*
2741          * Make sure trace_buffered_event is NULL before clearing
2742          * trace_buffered_event_cnt.
2743          */
2744         smp_wmb();
2745
2746         preempt_disable();
2747         /* Do the work on each cpu */
2748         smp_call_function_many(tracing_buffer_mask,
2749                                enable_trace_buffered_event, NULL, 1);
2750         preempt_enable();
2751 }
2752
2753 static struct trace_buffer *temp_buffer;
2754
2755 struct ring_buffer_event *
2756 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2757                           struct trace_event_file *trace_file,
2758                           int type, unsigned long len,
2759                           unsigned int trace_ctx)
2760 {
2761         struct ring_buffer_event *entry;
2762         struct trace_array *tr = trace_file->tr;
2763         int val;
2764
2765         *current_rb = tr->array_buffer.buffer;
2766
2767         if (!tr->no_filter_buffering_ref &&
2768             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2769                 preempt_disable_notrace();
2770                 /*
2771                  * Filtering is on, so try to use the per cpu buffer first.
2772                  * This buffer will simulate a ring_buffer_event,
2773                  * where the type_len is zero and the array[0] will
2774                  * hold the full length.
2775                  * (see include/linux/ring-buffer.h for details on
2776                  *  how the ring_buffer_event is structured).
2777                  *
2778                  * Using a temp buffer during filtering and copying it
2779                  * on a matched filter is quicker than writing directly
2780                  * into the ring buffer and then discarding it when
2781                  * it doesn't match. That is because the discard
2782                  * requires several atomic operations to get right.
2783                  * Copying on match and doing nothing on a failed match
2784                  * is still quicker than no copy on match, but having
2785                  * to discard out of the ring buffer on a failed match.
2786                  */
2787                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2788                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2789
2790                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2791
2792                         /*
2793                          * Preemption is disabled, but interrupts and NMIs
2794                          * can still come in now. If that happens after
2795                          * the above increment, then it will have to go
2796                          * back to the old method of allocating the event
2797                          * on the ring buffer, and if the filter fails, it
2798                          * will have to call ring_buffer_discard_commit()
2799                          * to remove it.
2800                          *
2801                          * Need to also check the unlikely case that the
2802                          * length is bigger than the temp buffer size.
2803                          * If that happens, then the reserve is pretty much
2804                          * guaranteed to fail, as the ring buffer currently
2805                          * only allows events less than a page. But that may
2806                          * change in the future, so let the ring buffer reserve
2807                          * handle the failure in that case.
2808                          */
2809                         if (val == 1 && likely(len <= max_len)) {
2810                                 trace_event_setup(entry, type, trace_ctx);
2811                                 entry->array[0] = len;
2812                                 /* Return with preemption disabled */
2813                                 return entry;
2814                         }
2815                         this_cpu_dec(trace_buffered_event_cnt);
2816                 }
2817                 /* __trace_buffer_lock_reserve() disables preemption */
2818                 preempt_enable_notrace();
2819         }
2820
2821         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2822                                             trace_ctx);
2823         /*
2824          * If tracing is off, but we have triggers enabled
2825          * we still need to look at the event data. Use the temp_buffer
2826          * to store the trace event for the trigger to use. It's recursive
2827          * safe and will not be recorded anywhere.
2828          */
2829         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2830                 *current_rb = temp_buffer;
2831                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2832                                                     trace_ctx);
2833         }
2834         return entry;
2835 }
2836 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2837
2838 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2839 static DEFINE_MUTEX(tracepoint_printk_mutex);
2840
2841 static void output_printk(struct trace_event_buffer *fbuffer)
2842 {
2843         struct trace_event_call *event_call;
2844         struct trace_event_file *file;
2845         struct trace_event *event;
2846         unsigned long flags;
2847         struct trace_iterator *iter = tracepoint_print_iter;
2848
2849         /* We should never get here if iter is NULL */
2850         if (WARN_ON_ONCE(!iter))
2851                 return;
2852
2853         event_call = fbuffer->trace_file->event_call;
2854         if (!event_call || !event_call->event.funcs ||
2855             !event_call->event.funcs->trace)
2856                 return;
2857
2858         file = fbuffer->trace_file;
2859         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2860             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2861              !filter_match_preds(file->filter, fbuffer->entry)))
2862                 return;
2863
2864         event = &fbuffer->trace_file->event_call->event;
2865
2866         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2867         trace_seq_init(&iter->seq);
2868         iter->ent = fbuffer->entry;
2869         event_call->event.funcs->trace(iter, 0, event);
2870         trace_seq_putc(&iter->seq, 0);
2871         printk("%s", iter->seq.buffer);
2872
2873         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2874 }
2875
2876 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2877                              void *buffer, size_t *lenp,
2878                              loff_t *ppos)
2879 {
2880         int save_tracepoint_printk;
2881         int ret;
2882
2883         mutex_lock(&tracepoint_printk_mutex);
2884         save_tracepoint_printk = tracepoint_printk;
2885
2886         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2887
2888         /*
2889          * This will force exiting early, as tracepoint_printk
2890          * is always zero when tracepoint_printk_iter is not allocated
2891          */
2892         if (!tracepoint_print_iter)
2893                 tracepoint_printk = 0;
2894
2895         if (save_tracepoint_printk == tracepoint_printk)
2896                 goto out;
2897
2898         if (tracepoint_printk)
2899                 static_key_enable(&tracepoint_printk_key.key);
2900         else
2901                 static_key_disable(&tracepoint_printk_key.key);
2902
2903  out:
2904         mutex_unlock(&tracepoint_printk_mutex);
2905
2906         return ret;
2907 }
2908
2909 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2910 {
2911         enum event_trigger_type tt = ETT_NONE;
2912         struct trace_event_file *file = fbuffer->trace_file;
2913
2914         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2915                         fbuffer->entry, &tt))
2916                 goto discard;
2917
2918         if (static_key_false(&tracepoint_printk_key.key))
2919                 output_printk(fbuffer);
2920
2921         if (static_branch_unlikely(&trace_event_exports_enabled))
2922                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2923
2924         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2925                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2926
2927 discard:
2928         if (tt)
2929                 event_triggers_post_call(file, tt);
2930
2931 }
2932 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2933
2934 /*
2935  * Skip 3:
2936  *
2937  *   trace_buffer_unlock_commit_regs()
2938  *   trace_event_buffer_commit()
2939  *   trace_event_raw_event_xxx()
2940  */
2941 # define STACK_SKIP 3
2942
2943 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2944                                      struct trace_buffer *buffer,
2945                                      struct ring_buffer_event *event,
2946                                      unsigned int trace_ctx,
2947                                      struct pt_regs *regs)
2948 {
2949         __buffer_unlock_commit(buffer, event);
2950
2951         /*
2952          * If regs is not set, then skip the necessary functions.
2953          * Note, we can still get here via blktrace, wakeup tracer
2954          * and mmiotrace, but that's ok if they lose a function or
2955          * two. They are not that meaningful.
2956          */
2957         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2958         ftrace_trace_userstack(tr, buffer, trace_ctx);
2959 }
2960
2961 /*
2962  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2963  */
2964 void
2965 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2966                                    struct ring_buffer_event *event)
2967 {
2968         __buffer_unlock_commit(buffer, event);
2969 }
2970
2971 void
2972 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2973                parent_ip, unsigned int trace_ctx)
2974 {
2975         struct trace_event_call *call = &event_function;
2976         struct trace_buffer *buffer = tr->array_buffer.buffer;
2977         struct ring_buffer_event *event;
2978         struct ftrace_entry *entry;
2979
2980         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2981                                             trace_ctx);
2982         if (!event)
2983                 return;
2984         entry   = ring_buffer_event_data(event);
2985         entry->ip                       = ip;
2986         entry->parent_ip                = parent_ip;
2987
2988         if (!call_filter_check_discard(call, entry, buffer, event)) {
2989                 if (static_branch_unlikely(&trace_function_exports_enabled))
2990                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2991                 __buffer_unlock_commit(buffer, event);
2992         }
2993 }
2994
2995 #ifdef CONFIG_STACKTRACE
2996
2997 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2998 #define FTRACE_KSTACK_NESTING   4
2999
3000 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3001
3002 struct ftrace_stack {
3003         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3004 };
3005
3006
3007 struct ftrace_stacks {
3008         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3009 };
3010
3011 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3012 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3013
3014 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3015                                  unsigned int trace_ctx,
3016                                  int skip, struct pt_regs *regs)
3017 {
3018         struct trace_event_call *call = &event_kernel_stack;
3019         struct ring_buffer_event *event;
3020         unsigned int size, nr_entries;
3021         struct ftrace_stack *fstack;
3022         struct stack_entry *entry;
3023         int stackidx;
3024
3025         /*
3026          * Add one, for this function and the call to save_stack_trace()
3027          * If regs is set, then these functions will not be in the way.
3028          */
3029 #ifndef CONFIG_UNWINDER_ORC
3030         if (!regs)
3031                 skip++;
3032 #endif
3033
3034         preempt_disable_notrace();
3035
3036         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3037
3038         /* This should never happen. If it does, yell once and skip */
3039         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3040                 goto out;
3041
3042         /*
3043          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3044          * interrupt will either see the value pre increment or post
3045          * increment. If the interrupt happens pre increment it will have
3046          * restored the counter when it returns.  We just need a barrier to
3047          * keep gcc from moving things around.
3048          */
3049         barrier();
3050
3051         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3052         size = ARRAY_SIZE(fstack->calls);
3053
3054         if (regs) {
3055                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3056                                                    size, skip);
3057         } else {
3058                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3059         }
3060
3061         size = nr_entries * sizeof(unsigned long);
3062         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3063                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3064                                     trace_ctx);
3065         if (!event)
3066                 goto out;
3067         entry = ring_buffer_event_data(event);
3068
3069         memcpy(&entry->caller, fstack->calls, size);
3070         entry->size = nr_entries;
3071
3072         if (!call_filter_check_discard(call, entry, buffer, event))
3073                 __buffer_unlock_commit(buffer, event);
3074
3075  out:
3076         /* Again, don't let gcc optimize things here */
3077         barrier();
3078         __this_cpu_dec(ftrace_stack_reserve);
3079         preempt_enable_notrace();
3080
3081 }
3082
3083 static inline void ftrace_trace_stack(struct trace_array *tr,
3084                                       struct trace_buffer *buffer,
3085                                       unsigned int trace_ctx,
3086                                       int skip, struct pt_regs *regs)
3087 {
3088         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3089                 return;
3090
3091         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3092 }
3093
3094 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3095                    int skip)
3096 {
3097         struct trace_buffer *buffer = tr->array_buffer.buffer;
3098
3099         if (rcu_is_watching()) {
3100                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3101                 return;
3102         }
3103
3104         /*
3105          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3106          * but if the above rcu_is_watching() failed, then the NMI
3107          * triggered someplace critical, and rcu_irq_enter() should
3108          * not be called from NMI.
3109          */
3110         if (unlikely(in_nmi()))
3111                 return;
3112
3113         rcu_irq_enter_irqson();
3114         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3115         rcu_irq_exit_irqson();
3116 }
3117
3118 /**
3119  * trace_dump_stack - record a stack back trace in the trace buffer
3120  * @skip: Number of functions to skip (helper handlers)
3121  */
3122 void trace_dump_stack(int skip)
3123 {
3124         if (tracing_disabled || tracing_selftest_running)
3125                 return;
3126
3127 #ifndef CONFIG_UNWINDER_ORC
3128         /* Skip 1 to skip this function. */
3129         skip++;
3130 #endif
3131         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3132                              tracing_gen_ctx(), skip, NULL);
3133 }
3134 EXPORT_SYMBOL_GPL(trace_dump_stack);
3135
3136 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3137 static DEFINE_PER_CPU(int, user_stack_count);
3138
3139 static void
3140 ftrace_trace_userstack(struct trace_array *tr,
3141                        struct trace_buffer *buffer, unsigned int trace_ctx)
3142 {
3143         struct trace_event_call *call = &event_user_stack;
3144         struct ring_buffer_event *event;
3145         struct userstack_entry *entry;
3146
3147         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3148                 return;
3149
3150         /*
3151          * NMIs can not handle page faults, even with fix ups.
3152          * The save user stack can (and often does) fault.
3153          */
3154         if (unlikely(in_nmi()))
3155                 return;
3156
3157         /*
3158          * prevent recursion, since the user stack tracing may
3159          * trigger other kernel events.
3160          */
3161         preempt_disable();
3162         if (__this_cpu_read(user_stack_count))
3163                 goto out;
3164
3165         __this_cpu_inc(user_stack_count);
3166
3167         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3168                                             sizeof(*entry), trace_ctx);
3169         if (!event)
3170                 goto out_drop_count;
3171         entry   = ring_buffer_event_data(event);
3172
3173         entry->tgid             = current->tgid;
3174         memset(&entry->caller, 0, sizeof(entry->caller));
3175
3176         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3177         if (!call_filter_check_discard(call, entry, buffer, event))
3178                 __buffer_unlock_commit(buffer, event);
3179
3180  out_drop_count:
3181         __this_cpu_dec(user_stack_count);
3182  out:
3183         preempt_enable();
3184 }
3185 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3186 static void ftrace_trace_userstack(struct trace_array *tr,
3187                                    struct trace_buffer *buffer,
3188                                    unsigned int trace_ctx)
3189 {
3190 }
3191 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3192
3193 #endif /* CONFIG_STACKTRACE */
3194
3195 static inline void
3196 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3197                           unsigned long long delta)
3198 {
3199         entry->bottom_delta_ts = delta & U32_MAX;
3200         entry->top_delta_ts = (delta >> 32);
3201 }
3202
3203 void trace_last_func_repeats(struct trace_array *tr,
3204                              struct trace_func_repeats *last_info,
3205                              unsigned int trace_ctx)
3206 {
3207         struct trace_buffer *buffer = tr->array_buffer.buffer;
3208         struct func_repeats_entry *entry;
3209         struct ring_buffer_event *event;
3210         u64 delta;
3211
3212         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3213                                             sizeof(*entry), trace_ctx);
3214         if (!event)
3215                 return;
3216
3217         delta = ring_buffer_event_time_stamp(buffer, event) -
3218                 last_info->ts_last_call;
3219
3220         entry = ring_buffer_event_data(event);
3221         entry->ip = last_info->ip;
3222         entry->parent_ip = last_info->parent_ip;
3223         entry->count = last_info->count;
3224         func_repeats_set_delta_ts(entry, delta);
3225
3226         __buffer_unlock_commit(buffer, event);
3227 }
3228
3229 /* created for use with alloc_percpu */
3230 struct trace_buffer_struct {
3231         int nesting;
3232         char buffer[4][TRACE_BUF_SIZE];
3233 };
3234
3235 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3236
3237 /*
3238  * This allows for lockless recording.  If we're nested too deeply, then
3239  * this returns NULL.
3240  */
3241 static char *get_trace_buf(void)
3242 {
3243         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3244
3245         if (!trace_percpu_buffer || buffer->nesting >= 4)
3246                 return NULL;
3247
3248         buffer->nesting++;
3249
3250         /* Interrupts must see nesting incremented before we use the buffer */
3251         barrier();
3252         return &buffer->buffer[buffer->nesting - 1][0];
3253 }
3254
3255 static void put_trace_buf(void)
3256 {
3257         /* Don't let the decrement of nesting leak before this */
3258         barrier();
3259         this_cpu_dec(trace_percpu_buffer->nesting);
3260 }
3261
3262 static int alloc_percpu_trace_buffer(void)
3263 {
3264         struct trace_buffer_struct __percpu *buffers;
3265
3266         if (trace_percpu_buffer)
3267                 return 0;
3268
3269         buffers = alloc_percpu(struct trace_buffer_struct);
3270         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3271                 return -ENOMEM;
3272
3273         trace_percpu_buffer = buffers;
3274         return 0;
3275 }
3276
3277 static int buffers_allocated;
3278
3279 void trace_printk_init_buffers(void)
3280 {
3281         if (buffers_allocated)
3282                 return;
3283
3284         if (alloc_percpu_trace_buffer())
3285                 return;
3286
3287         /* trace_printk() is for debug use only. Don't use it in production. */
3288
3289         pr_warn("\n");
3290         pr_warn("**********************************************************\n");
3291         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3292         pr_warn("**                                                      **\n");
3293         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3294         pr_warn("**                                                      **\n");
3295         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3296         pr_warn("** unsafe for production use.                           **\n");
3297         pr_warn("**                                                      **\n");
3298         pr_warn("** If you see this message and you are not debugging    **\n");
3299         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3300         pr_warn("**                                                      **\n");
3301         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3302         pr_warn("**********************************************************\n");
3303
3304         /* Expand the buffers to set size */
3305         tracing_update_buffers();
3306
3307         buffers_allocated = 1;
3308
3309         /*
3310          * trace_printk_init_buffers() can be called by modules.
3311          * If that happens, then we need to start cmdline recording
3312          * directly here. If the global_trace.buffer is already
3313          * allocated here, then this was called by module code.
3314          */
3315         if (global_trace.array_buffer.buffer)
3316                 tracing_start_cmdline_record();
3317 }
3318 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3319
3320 void trace_printk_start_comm(void)
3321 {
3322         /* Start tracing comms if trace printk is set */
3323         if (!buffers_allocated)
3324                 return;
3325         tracing_start_cmdline_record();
3326 }
3327
3328 static void trace_printk_start_stop_comm(int enabled)
3329 {
3330         if (!buffers_allocated)
3331                 return;
3332
3333         if (enabled)
3334                 tracing_start_cmdline_record();
3335         else
3336                 tracing_stop_cmdline_record();
3337 }
3338
3339 /**
3340  * trace_vbprintk - write binary msg to tracing buffer
3341  * @ip:    The address of the caller
3342  * @fmt:   The string format to write to the buffer
3343  * @args:  Arguments for @fmt
3344  */
3345 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3346 {
3347         struct trace_event_call *call = &event_bprint;
3348         struct ring_buffer_event *event;
3349         struct trace_buffer *buffer;
3350         struct trace_array *tr = &global_trace;
3351         struct bprint_entry *entry;
3352         unsigned int trace_ctx;
3353         char *tbuffer;
3354         int len = 0, size;
3355
3356         if (unlikely(tracing_selftest_running || tracing_disabled))
3357                 return 0;
3358
3359         /* Don't pollute graph traces with trace_vprintk internals */
3360         pause_graph_tracing();
3361
3362         trace_ctx = tracing_gen_ctx();
3363         preempt_disable_notrace();
3364
3365         tbuffer = get_trace_buf();
3366         if (!tbuffer) {
3367                 len = 0;
3368                 goto out_nobuffer;
3369         }
3370
3371         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3372
3373         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3374                 goto out_put;
3375
3376         size = sizeof(*entry) + sizeof(u32) * len;
3377         buffer = tr->array_buffer.buffer;
3378         ring_buffer_nest_start(buffer);
3379         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3380                                             trace_ctx);
3381         if (!event)
3382                 goto out;
3383         entry = ring_buffer_event_data(event);
3384         entry->ip                       = ip;
3385         entry->fmt                      = fmt;
3386
3387         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3388         if (!call_filter_check_discard(call, entry, buffer, event)) {
3389                 __buffer_unlock_commit(buffer, event);
3390                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3391         }
3392
3393 out:
3394         ring_buffer_nest_end(buffer);
3395 out_put:
3396         put_trace_buf();
3397
3398 out_nobuffer:
3399         preempt_enable_notrace();
3400         unpause_graph_tracing();
3401
3402         return len;
3403 }
3404 EXPORT_SYMBOL_GPL(trace_vbprintk);
3405
3406 __printf(3, 0)
3407 static int
3408 __trace_array_vprintk(struct trace_buffer *buffer,
3409                       unsigned long ip, const char *fmt, va_list args)
3410 {
3411         struct trace_event_call *call = &event_print;
3412         struct ring_buffer_event *event;
3413         int len = 0, size;
3414         struct print_entry *entry;
3415         unsigned int trace_ctx;
3416         char *tbuffer;
3417
3418         if (tracing_disabled || tracing_selftest_running)
3419                 return 0;
3420
3421         /* Don't pollute graph traces with trace_vprintk internals */
3422         pause_graph_tracing();
3423
3424         trace_ctx = tracing_gen_ctx();
3425         preempt_disable_notrace();
3426
3427
3428         tbuffer = get_trace_buf();
3429         if (!tbuffer) {
3430                 len = 0;
3431                 goto out_nobuffer;
3432         }
3433
3434         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3435
3436         size = sizeof(*entry) + len + 1;
3437         ring_buffer_nest_start(buffer);
3438         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3439                                             trace_ctx);
3440         if (!event)
3441                 goto out;
3442         entry = ring_buffer_event_data(event);
3443         entry->ip = ip;
3444
3445         memcpy(&entry->buf, tbuffer, len + 1);
3446         if (!call_filter_check_discard(call, entry, buffer, event)) {
3447                 __buffer_unlock_commit(buffer, event);
3448                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3449         }
3450
3451 out:
3452         ring_buffer_nest_end(buffer);
3453         put_trace_buf();
3454
3455 out_nobuffer:
3456         preempt_enable_notrace();
3457         unpause_graph_tracing();
3458
3459         return len;
3460 }
3461
3462 __printf(3, 0)
3463 int trace_array_vprintk(struct trace_array *tr,
3464                         unsigned long ip, const char *fmt, va_list args)
3465 {
3466         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3467 }
3468
3469 /**
3470  * trace_array_printk - Print a message to a specific instance
3471  * @tr: The instance trace_array descriptor
3472  * @ip: The instruction pointer that this is called from.
3473  * @fmt: The format to print (printf format)
3474  *
3475  * If a subsystem sets up its own instance, they have the right to
3476  * printk strings into their tracing instance buffer using this
3477  * function. Note, this function will not write into the top level
3478  * buffer (use trace_printk() for that), as writing into the top level
3479  * buffer should only have events that can be individually disabled.
3480  * trace_printk() is only used for debugging a kernel, and should not
3481  * be ever incorporated in normal use.
3482  *
3483  * trace_array_printk() can be used, as it will not add noise to the
3484  * top level tracing buffer.
3485  *
3486  * Note, trace_array_init_printk() must be called on @tr before this
3487  * can be used.
3488  */
3489 __printf(3, 0)
3490 int trace_array_printk(struct trace_array *tr,
3491                        unsigned long ip, const char *fmt, ...)
3492 {
3493         int ret;
3494         va_list ap;
3495
3496         if (!tr)
3497                 return -ENOENT;
3498
3499         /* This is only allowed for created instances */
3500         if (tr == &global_trace)
3501                 return 0;
3502
3503         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3504                 return 0;
3505
3506         va_start(ap, fmt);
3507         ret = trace_array_vprintk(tr, ip, fmt, ap);
3508         va_end(ap);
3509         return ret;
3510 }
3511 EXPORT_SYMBOL_GPL(trace_array_printk);
3512
3513 /**
3514  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3515  * @tr: The trace array to initialize the buffers for
3516  *
3517  * As trace_array_printk() only writes into instances, they are OK to
3518  * have in the kernel (unlike trace_printk()). This needs to be called
3519  * before trace_array_printk() can be used on a trace_array.
3520  */
3521 int trace_array_init_printk(struct trace_array *tr)
3522 {
3523         if (!tr)
3524                 return -ENOENT;
3525
3526         /* This is only allowed for created instances */
3527         if (tr == &global_trace)
3528                 return -EINVAL;
3529
3530         return alloc_percpu_trace_buffer();
3531 }
3532 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3533
3534 __printf(3, 4)
3535 int trace_array_printk_buf(struct trace_buffer *buffer,
3536                            unsigned long ip, const char *fmt, ...)
3537 {
3538         int ret;
3539         va_list ap;
3540
3541         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3542                 return 0;
3543
3544         va_start(ap, fmt);
3545         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3546         va_end(ap);
3547         return ret;
3548 }
3549
3550 __printf(2, 0)
3551 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3552 {
3553         return trace_array_vprintk(&global_trace, ip, fmt, args);
3554 }
3555 EXPORT_SYMBOL_GPL(trace_vprintk);
3556
3557 static void trace_iterator_increment(struct trace_iterator *iter)
3558 {
3559         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3560
3561         iter->idx++;
3562         if (buf_iter)
3563                 ring_buffer_iter_advance(buf_iter);
3564 }
3565
3566 static struct trace_entry *
3567 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3568                 unsigned long *lost_events)
3569 {
3570         struct ring_buffer_event *event;
3571         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3572
3573         if (buf_iter) {
3574                 event = ring_buffer_iter_peek(buf_iter, ts);
3575                 if (lost_events)
3576                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3577                                 (unsigned long)-1 : 0;
3578         } else {
3579                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3580                                          lost_events);
3581         }
3582
3583         if (event) {
3584                 iter->ent_size = ring_buffer_event_length(event);
3585                 return ring_buffer_event_data(event);
3586         }
3587         iter->ent_size = 0;
3588         return NULL;
3589 }
3590
3591 static struct trace_entry *
3592 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3593                   unsigned long *missing_events, u64 *ent_ts)
3594 {
3595         struct trace_buffer *buffer = iter->array_buffer->buffer;
3596         struct trace_entry *ent, *next = NULL;
3597         unsigned long lost_events = 0, next_lost = 0;
3598         int cpu_file = iter->cpu_file;
3599         u64 next_ts = 0, ts;
3600         int next_cpu = -1;
3601         int next_size = 0;
3602         int cpu;
3603
3604         /*
3605          * If we are in a per_cpu trace file, don't bother by iterating over
3606          * all cpu and peek directly.
3607          */
3608         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3609                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3610                         return NULL;
3611                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3612                 if (ent_cpu)
3613                         *ent_cpu = cpu_file;
3614
3615                 return ent;
3616         }
3617
3618         for_each_tracing_cpu(cpu) {
3619
3620                 if (ring_buffer_empty_cpu(buffer, cpu))
3621                         continue;
3622
3623                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3624
3625                 /*
3626                  * Pick the entry with the smallest timestamp:
3627                  */
3628                 if (ent && (!next || ts < next_ts)) {
3629                         next = ent;
3630                         next_cpu = cpu;
3631                         next_ts = ts;
3632                         next_lost = lost_events;
3633                         next_size = iter->ent_size;
3634                 }
3635         }
3636
3637         iter->ent_size = next_size;
3638
3639         if (ent_cpu)
3640                 *ent_cpu = next_cpu;
3641
3642         if (ent_ts)
3643                 *ent_ts = next_ts;
3644
3645         if (missing_events)
3646                 *missing_events = next_lost;
3647
3648         return next;
3649 }
3650
3651 #define STATIC_FMT_BUF_SIZE     128
3652 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3653
3654 static char *trace_iter_expand_format(struct trace_iterator *iter)
3655 {
3656         char *tmp;
3657
3658         /*
3659          * iter->tr is NULL when used with tp_printk, which makes
3660          * this get called where it is not safe to call krealloc().
3661          */
3662         if (!iter->tr || iter->fmt == static_fmt_buf)
3663                 return NULL;
3664
3665         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3666                        GFP_KERNEL);
3667         if (tmp) {
3668                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3669                 iter->fmt = tmp;
3670         }
3671
3672         return tmp;
3673 }
3674
3675 /* Returns true if the string is safe to dereference from an event */
3676 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3677                            bool star, int len)
3678 {
3679         unsigned long addr = (unsigned long)str;
3680         struct trace_event *trace_event;
3681         struct trace_event_call *event;
3682
3683         /* Ignore strings with no length */
3684         if (star && !len)
3685                 return true;
3686
3687         /* OK if part of the event data */
3688         if ((addr >= (unsigned long)iter->ent) &&
3689             (addr < (unsigned long)iter->ent + iter->ent_size))
3690                 return true;
3691
3692         /* OK if part of the temp seq buffer */
3693         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3694             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3695                 return true;
3696
3697         /* Core rodata can not be freed */
3698         if (is_kernel_rodata(addr))
3699                 return true;
3700
3701         if (trace_is_tracepoint_string(str))
3702                 return true;
3703
3704         /*
3705          * Now this could be a module event, referencing core module
3706          * data, which is OK.
3707          */
3708         if (!iter->ent)
3709                 return false;
3710
3711         trace_event = ftrace_find_event(iter->ent->type);
3712         if (!trace_event)
3713                 return false;
3714
3715         event = container_of(trace_event, struct trace_event_call, event);
3716         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3717                 return false;
3718
3719         /* Would rather have rodata, but this will suffice */
3720         if (within_module_core(addr, event->module))
3721                 return true;
3722
3723         return false;
3724 }
3725
3726 static const char *show_buffer(struct trace_seq *s)
3727 {
3728         struct seq_buf *seq = &s->seq;
3729
3730         seq_buf_terminate(seq);
3731
3732         return seq->buffer;
3733 }
3734
3735 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3736
3737 static int test_can_verify_check(const char *fmt, ...)
3738 {
3739         char buf[16];
3740         va_list ap;
3741         int ret;
3742
3743         /*
3744          * The verifier is dependent on vsnprintf() modifies the va_list
3745          * passed to it, where it is sent as a reference. Some architectures
3746          * (like x86_32) passes it by value, which means that vsnprintf()
3747          * does not modify the va_list passed to it, and the verifier
3748          * would then need to be able to understand all the values that
3749          * vsnprintf can use. If it is passed by value, then the verifier
3750          * is disabled.
3751          */
3752         va_start(ap, fmt);
3753         vsnprintf(buf, 16, "%d", ap);
3754         ret = va_arg(ap, int);
3755         va_end(ap);
3756
3757         return ret;
3758 }
3759
3760 static void test_can_verify(void)
3761 {
3762         if (!test_can_verify_check("%d %d", 0, 1)) {
3763                 pr_info("trace event string verifier disabled\n");
3764                 static_branch_inc(&trace_no_verify);
3765         }
3766 }
3767
3768 /**
3769  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3770  * @iter: The iterator that holds the seq buffer and the event being printed
3771  * @fmt: The format used to print the event
3772  * @ap: The va_list holding the data to print from @fmt.
3773  *
3774  * This writes the data into the @iter->seq buffer using the data from
3775  * @fmt and @ap. If the format has a %s, then the source of the string
3776  * is examined to make sure it is safe to print, otherwise it will
3777  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3778  * pointer.
3779  */
3780 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3781                          va_list ap)
3782 {
3783         const char *p = fmt;
3784         const char *str;
3785         int i, j;
3786
3787         if (WARN_ON_ONCE(!fmt))
3788                 return;
3789
3790         if (static_branch_unlikely(&trace_no_verify))
3791                 goto print;
3792
3793         /* Don't bother checking when doing a ftrace_dump() */
3794         if (iter->fmt == static_fmt_buf)
3795                 goto print;
3796
3797         while (*p) {
3798                 bool star = false;
3799                 int len = 0;
3800
3801                 j = 0;
3802
3803                 /* We only care about %s and variants */
3804                 for (i = 0; p[i]; i++) {
3805                         if (i + 1 >= iter->fmt_size) {
3806                                 /*
3807                                  * If we can't expand the copy buffer,
3808                                  * just print it.
3809                                  */
3810                                 if (!trace_iter_expand_format(iter))
3811                                         goto print;
3812                         }
3813
3814                         if (p[i] == '\\' && p[i+1]) {
3815                                 i++;
3816                                 continue;
3817                         }
3818                         if (p[i] == '%') {
3819                                 /* Need to test cases like %08.*s */
3820                                 for (j = 1; p[i+j]; j++) {
3821                                         if (isdigit(p[i+j]) ||
3822                                             p[i+j] == '.')
3823                                                 continue;
3824                                         if (p[i+j] == '*') {
3825                                                 star = true;
3826                                                 continue;
3827                                         }
3828                                         break;
3829                                 }
3830                                 if (p[i+j] == 's')
3831                                         break;
3832                                 star = false;
3833                         }
3834                         j = 0;
3835                 }
3836                 /* If no %s found then just print normally */
3837                 if (!p[i])
3838                         break;
3839
3840                 /* Copy up to the %s, and print that */
3841                 strncpy(iter->fmt, p, i);
3842                 iter->fmt[i] = '\0';
3843                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3844
3845                 /*
3846                  * If iter->seq is full, the above call no longer guarantees
3847                  * that ap is in sync with fmt processing, and further calls
3848                  * to va_arg() can return wrong positional arguments.
3849                  *
3850                  * Ensure that ap is no longer used in this case.
3851                  */
3852                 if (iter->seq.full) {
3853                         p = "";
3854                         break;
3855                 }
3856
3857                 if (star)
3858                         len = va_arg(ap, int);
3859
3860                 /* The ap now points to the string data of the %s */
3861                 str = va_arg(ap, const char *);
3862
3863                 /*
3864                  * If you hit this warning, it is likely that the
3865                  * trace event in question used %s on a string that
3866                  * was saved at the time of the event, but may not be
3867                  * around when the trace is read. Use __string(),
3868                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3869                  * instead. See samples/trace_events/trace-events-sample.h
3870                  * for reference.
3871                  */
3872                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3873                               "fmt: '%s' current_buffer: '%s'",
3874                               fmt, show_buffer(&iter->seq))) {
3875                         int ret;
3876
3877                         /* Try to safely read the string */
3878                         if (star) {
3879                                 if (len + 1 > iter->fmt_size)
3880                                         len = iter->fmt_size - 1;
3881                                 if (len < 0)
3882                                         len = 0;
3883                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3884                                 iter->fmt[len] = 0;
3885                                 star = false;
3886                         } else {
3887                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3888                                                                   iter->fmt_size);
3889                         }
3890                         if (ret < 0)
3891                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3892                         else
3893                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3894                                                  str, iter->fmt);
3895                         str = "[UNSAFE-MEMORY]";
3896                         strcpy(iter->fmt, "%s");
3897                 } else {
3898                         strncpy(iter->fmt, p + i, j + 1);
3899                         iter->fmt[j+1] = '\0';
3900                 }
3901                 if (star)
3902                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3903                 else
3904                         trace_seq_printf(&iter->seq, iter->fmt, str);
3905
3906                 p += i + j + 1;
3907         }
3908  print:
3909         if (*p)
3910                 trace_seq_vprintf(&iter->seq, p, ap);
3911 }
3912
3913 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3914 {
3915         const char *p, *new_fmt;
3916         char *q;
3917
3918         if (WARN_ON_ONCE(!fmt))
3919                 return fmt;
3920
3921         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3922                 return fmt;
3923
3924         p = fmt;
3925         new_fmt = q = iter->fmt;
3926         while (*p) {
3927                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3928                         if (!trace_iter_expand_format(iter))
3929                                 return fmt;
3930
3931                         q += iter->fmt - new_fmt;
3932                         new_fmt = iter->fmt;
3933                 }
3934
3935                 *q++ = *p++;
3936
3937                 /* Replace %p with %px */
3938                 if (p[-1] == '%') {
3939                         if (p[0] == '%') {
3940                                 *q++ = *p++;
3941                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3942                                 *q++ = *p++;
3943                                 *q++ = 'x';
3944                         }
3945                 }
3946         }
3947         *q = '\0';
3948
3949         return new_fmt;
3950 }
3951
3952 #define STATIC_TEMP_BUF_SIZE    128
3953 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3954
3955 /* Find the next real entry, without updating the iterator itself */
3956 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3957                                           int *ent_cpu, u64 *ent_ts)
3958 {
3959         /* __find_next_entry will reset ent_size */
3960         int ent_size = iter->ent_size;
3961         struct trace_entry *entry;
3962
3963         /*
3964          * If called from ftrace_dump(), then the iter->temp buffer
3965          * will be the static_temp_buf and not created from kmalloc.
3966          * If the entry size is greater than the buffer, we can
3967          * not save it. Just return NULL in that case. This is only
3968          * used to add markers when two consecutive events' time
3969          * stamps have a large delta. See trace_print_lat_context()
3970          */
3971         if (iter->temp == static_temp_buf &&
3972             STATIC_TEMP_BUF_SIZE < ent_size)
3973                 return NULL;
3974
3975         /*
3976          * The __find_next_entry() may call peek_next_entry(), which may
3977          * call ring_buffer_peek() that may make the contents of iter->ent
3978          * undefined. Need to copy iter->ent now.
3979          */
3980         if (iter->ent && iter->ent != iter->temp) {
3981                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3982                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3983                         void *temp;
3984                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3985                         if (!temp)
3986                                 return NULL;
3987                         kfree(iter->temp);
3988                         iter->temp = temp;
3989                         iter->temp_size = iter->ent_size;
3990                 }
3991                 memcpy(iter->temp, iter->ent, iter->ent_size);
3992                 iter->ent = iter->temp;
3993         }
3994         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3995         /* Put back the original ent_size */
3996         iter->ent_size = ent_size;
3997
3998         return entry;
3999 }
4000
4001 /* Find the next real entry, and increment the iterator to the next entry */
4002 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4003 {
4004         iter->ent = __find_next_entry(iter, &iter->cpu,
4005                                       &iter->lost_events, &iter->ts);
4006
4007         if (iter->ent)
4008                 trace_iterator_increment(iter);
4009
4010         return iter->ent ? iter : NULL;
4011 }
4012
4013 static void trace_consume(struct trace_iterator *iter)
4014 {
4015         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4016                             &iter->lost_events);
4017 }
4018
4019 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4020 {
4021         struct trace_iterator *iter = m->private;
4022         int i = (int)*pos;
4023         void *ent;
4024
4025         WARN_ON_ONCE(iter->leftover);
4026
4027         (*pos)++;
4028
4029         /* can't go backwards */
4030         if (iter->idx > i)
4031                 return NULL;
4032
4033         if (iter->idx < 0)
4034                 ent = trace_find_next_entry_inc(iter);
4035         else
4036                 ent = iter;
4037
4038         while (ent && iter->idx < i)
4039                 ent = trace_find_next_entry_inc(iter);
4040
4041         iter->pos = *pos;
4042
4043         return ent;
4044 }
4045
4046 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4047 {
4048         struct ring_buffer_iter *buf_iter;
4049         unsigned long entries = 0;
4050         u64 ts;
4051
4052         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4053
4054         buf_iter = trace_buffer_iter(iter, cpu);
4055         if (!buf_iter)
4056                 return;
4057
4058         ring_buffer_iter_reset(buf_iter);
4059
4060         /*
4061          * We could have the case with the max latency tracers
4062          * that a reset never took place on a cpu. This is evident
4063          * by the timestamp being before the start of the buffer.
4064          */
4065         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4066                 if (ts >= iter->array_buffer->time_start)
4067                         break;
4068                 entries++;
4069                 ring_buffer_iter_advance(buf_iter);
4070         }
4071
4072         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4073 }
4074
4075 /*
4076  * The current tracer is copied to avoid a global locking
4077  * all around.
4078  */
4079 static void *s_start(struct seq_file *m, loff_t *pos)
4080 {
4081         struct trace_iterator *iter = m->private;
4082         struct trace_array *tr = iter->tr;
4083         int cpu_file = iter->cpu_file;
4084         void *p = NULL;
4085         loff_t l = 0;
4086         int cpu;
4087
4088         /*
4089          * copy the tracer to avoid using a global lock all around.
4090          * iter->trace is a copy of current_trace, the pointer to the
4091          * name may be used instead of a strcmp(), as iter->trace->name
4092          * will point to the same string as current_trace->name.
4093          */
4094         mutex_lock(&trace_types_lock);
4095         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4096                 *iter->trace = *tr->current_trace;
4097         mutex_unlock(&trace_types_lock);
4098
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100         if (iter->snapshot && iter->trace->use_max_tr)
4101                 return ERR_PTR(-EBUSY);
4102 #endif
4103
4104         if (*pos != iter->pos) {
4105                 iter->ent = NULL;
4106                 iter->cpu = 0;
4107                 iter->idx = -1;
4108
4109                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4110                         for_each_tracing_cpu(cpu)
4111                                 tracing_iter_reset(iter, cpu);
4112                 } else
4113                         tracing_iter_reset(iter, cpu_file);
4114
4115                 iter->leftover = 0;
4116                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4117                         ;
4118
4119         } else {
4120                 /*
4121                  * If we overflowed the seq_file before, then we want
4122                  * to just reuse the trace_seq buffer again.
4123                  */
4124                 if (iter->leftover)
4125                         p = iter;
4126                 else {
4127                         l = *pos - 1;
4128                         p = s_next(m, p, &l);
4129                 }
4130         }
4131
4132         trace_event_read_lock();
4133         trace_access_lock(cpu_file);
4134         return p;
4135 }
4136
4137 static void s_stop(struct seq_file *m, void *p)
4138 {
4139         struct trace_iterator *iter = m->private;
4140
4141 #ifdef CONFIG_TRACER_MAX_TRACE
4142         if (iter->snapshot && iter->trace->use_max_tr)
4143                 return;
4144 #endif
4145
4146         trace_access_unlock(iter->cpu_file);
4147         trace_event_read_unlock();
4148 }
4149
4150 static void
4151 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4152                       unsigned long *entries, int cpu)
4153 {
4154         unsigned long count;
4155
4156         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4157         /*
4158          * If this buffer has skipped entries, then we hold all
4159          * entries for the trace and we need to ignore the
4160          * ones before the time stamp.
4161          */
4162         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4163                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4164                 /* total is the same as the entries */
4165                 *total = count;
4166         } else
4167                 *total = count +
4168                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4169         *entries = count;
4170 }
4171
4172 static void
4173 get_total_entries(struct array_buffer *buf,
4174                   unsigned long *total, unsigned long *entries)
4175 {
4176         unsigned long t, e;
4177         int cpu;
4178
4179         *total = 0;
4180         *entries = 0;
4181
4182         for_each_tracing_cpu(cpu) {
4183                 get_total_entries_cpu(buf, &t, &e, cpu);
4184                 *total += t;
4185                 *entries += e;
4186         }
4187 }
4188
4189 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4190 {
4191         unsigned long total, entries;
4192
4193         if (!tr)
4194                 tr = &global_trace;
4195
4196         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4197
4198         return entries;
4199 }
4200
4201 unsigned long trace_total_entries(struct trace_array *tr)
4202 {
4203         unsigned long total, entries;
4204
4205         if (!tr)
4206                 tr = &global_trace;
4207
4208         get_total_entries(&tr->array_buffer, &total, &entries);
4209
4210         return entries;
4211 }
4212
4213 static void print_lat_help_header(struct seq_file *m)
4214 {
4215         seq_puts(m, "#                    _------=> CPU#            \n"
4216                     "#                   / _-----=> irqs-off/BH-disabled\n"
4217                     "#                  | / _----=> need-resched    \n"
4218                     "#                  || / _---=> hardirq/softirq \n"
4219                     "#                  ||| / _--=> preempt-depth   \n"
4220                     "#                  |||| / _-=> migrate-disable \n"
4221                     "#                  ||||| /     delay           \n"
4222                     "#  cmd     pid     |||||| time  |   caller     \n"
4223                     "#     \\   /        ||||||  \\    |    /       \n");
4224 }
4225
4226 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4227 {
4228         unsigned long total;
4229         unsigned long entries;
4230
4231         get_total_entries(buf, &total, &entries);
4232         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4233                    entries, total, num_online_cpus());
4234         seq_puts(m, "#\n");
4235 }
4236
4237 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4238                                    unsigned int flags)
4239 {
4240         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4241
4242         print_event_info(buf, m);
4243
4244         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4245         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4246 }
4247
4248 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4249                                        unsigned int flags)
4250 {
4251         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4252         const char *space = "            ";
4253         int prec = tgid ? 12 : 2;
4254
4255         print_event_info(buf, m);
4256
4257         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4258         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4259         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4260         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4261         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4262         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4263         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4264         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4265 }
4266
4267 void
4268 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4269 {
4270         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4271         struct array_buffer *buf = iter->array_buffer;
4272         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4273         struct tracer *type = iter->trace;
4274         unsigned long entries;
4275         unsigned long total;
4276         const char *name = "preemption";
4277
4278         name = type->name;
4279
4280         get_total_entries(buf, &total, &entries);
4281
4282         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4283                    name, UTS_RELEASE);
4284         seq_puts(m, "# -----------------------------------"
4285                  "---------------------------------\n");
4286         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4287                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4288                    nsecs_to_usecs(data->saved_latency),
4289                    entries,
4290                    total,
4291                    buf->cpu,
4292 #if defined(CONFIG_PREEMPT_NONE)
4293                    "server",
4294 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4295                    "desktop",
4296 #elif defined(CONFIG_PREEMPT)
4297                    "preempt",
4298 #elif defined(CONFIG_PREEMPT_RT)
4299                    "preempt_rt",
4300 #else
4301                    "unknown",
4302 #endif
4303                    /* These are reserved for later use */
4304                    0, 0, 0, 0);
4305 #ifdef CONFIG_SMP
4306         seq_printf(m, " #P:%d)\n", num_online_cpus());
4307 #else
4308         seq_puts(m, ")\n");
4309 #endif
4310         seq_puts(m, "#    -----------------\n");
4311         seq_printf(m, "#    | task: %.16s-%d "
4312                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4313                    data->comm, data->pid,
4314                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4315                    data->policy, data->rt_priority);
4316         seq_puts(m, "#    -----------------\n");
4317
4318         if (data->critical_start) {
4319                 seq_puts(m, "#  => started at: ");
4320                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4321                 trace_print_seq(m, &iter->seq);
4322                 seq_puts(m, "\n#  => ended at:   ");
4323                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4324                 trace_print_seq(m, &iter->seq);
4325                 seq_puts(m, "\n#\n");
4326         }
4327
4328         seq_puts(m, "#\n");
4329 }
4330
4331 static void test_cpu_buff_start(struct trace_iterator *iter)
4332 {
4333         struct trace_seq *s = &iter->seq;
4334         struct trace_array *tr = iter->tr;
4335
4336         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4337                 return;
4338
4339         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4340                 return;
4341
4342         if (cpumask_available(iter->started) &&
4343             cpumask_test_cpu(iter->cpu, iter->started))
4344                 return;
4345
4346         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4347                 return;
4348
4349         if (cpumask_available(iter->started))
4350                 cpumask_set_cpu(iter->cpu, iter->started);
4351
4352         /* Don't print started cpu buffer for the first entry of the trace */
4353         if (iter->idx > 1)
4354                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4355                                 iter->cpu);
4356 }
4357
4358 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4359 {
4360         struct trace_array *tr = iter->tr;
4361         struct trace_seq *s = &iter->seq;
4362         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4363         struct trace_entry *entry;
4364         struct trace_event *event;
4365
4366         entry = iter->ent;
4367
4368         test_cpu_buff_start(iter);
4369
4370         event = ftrace_find_event(entry->type);
4371
4372         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4373                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4374                         trace_print_lat_context(iter);
4375                 else
4376                         trace_print_context(iter);
4377         }
4378
4379         if (trace_seq_has_overflowed(s))
4380                 return TRACE_TYPE_PARTIAL_LINE;
4381
4382         if (event)
4383                 return event->funcs->trace(iter, sym_flags, event);
4384
4385         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4386
4387         return trace_handle_return(s);
4388 }
4389
4390 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4391 {
4392         struct trace_array *tr = iter->tr;
4393         struct trace_seq *s = &iter->seq;
4394         struct trace_entry *entry;
4395         struct trace_event *event;
4396
4397         entry = iter->ent;
4398
4399         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4400                 trace_seq_printf(s, "%d %d %llu ",
4401                                  entry->pid, iter->cpu, iter->ts);
4402
4403         if (trace_seq_has_overflowed(s))
4404                 return TRACE_TYPE_PARTIAL_LINE;
4405
4406         event = ftrace_find_event(entry->type);
4407         if (event)
4408                 return event->funcs->raw(iter, 0, event);
4409
4410         trace_seq_printf(s, "%d ?\n", entry->type);
4411
4412         return trace_handle_return(s);
4413 }
4414
4415 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4416 {
4417         struct trace_array *tr = iter->tr;
4418         struct trace_seq *s = &iter->seq;
4419         unsigned char newline = '\n';
4420         struct trace_entry *entry;
4421         struct trace_event *event;
4422
4423         entry = iter->ent;
4424
4425         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4426                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4427                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4428                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4429                 if (trace_seq_has_overflowed(s))
4430                         return TRACE_TYPE_PARTIAL_LINE;
4431         }
4432
4433         event = ftrace_find_event(entry->type);
4434         if (event) {
4435                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4436                 if (ret != TRACE_TYPE_HANDLED)
4437                         return ret;
4438         }
4439
4440         SEQ_PUT_FIELD(s, newline);
4441
4442         return trace_handle_return(s);
4443 }
4444
4445 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4446 {
4447         struct trace_array *tr = iter->tr;
4448         struct trace_seq *s = &iter->seq;
4449         struct trace_entry *entry;
4450         struct trace_event *event;
4451
4452         entry = iter->ent;
4453
4454         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455                 SEQ_PUT_FIELD(s, entry->pid);
4456                 SEQ_PUT_FIELD(s, iter->cpu);
4457                 SEQ_PUT_FIELD(s, iter->ts);
4458                 if (trace_seq_has_overflowed(s))
4459                         return TRACE_TYPE_PARTIAL_LINE;
4460         }
4461
4462         event = ftrace_find_event(entry->type);
4463         return event ? event->funcs->binary(iter, 0, event) :
4464                 TRACE_TYPE_HANDLED;
4465 }
4466
4467 int trace_empty(struct trace_iterator *iter)
4468 {
4469         struct ring_buffer_iter *buf_iter;
4470         int cpu;
4471
4472         /* If we are looking at one CPU buffer, only check that one */
4473         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4474                 cpu = iter->cpu_file;
4475                 buf_iter = trace_buffer_iter(iter, cpu);
4476                 if (buf_iter) {
4477                         if (!ring_buffer_iter_empty(buf_iter))
4478                                 return 0;
4479                 } else {
4480                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4481                                 return 0;
4482                 }
4483                 return 1;
4484         }
4485
4486         for_each_tracing_cpu(cpu) {
4487                 buf_iter = trace_buffer_iter(iter, cpu);
4488                 if (buf_iter) {
4489                         if (!ring_buffer_iter_empty(buf_iter))
4490                                 return 0;
4491                 } else {
4492                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4493                                 return 0;
4494                 }
4495         }
4496
4497         return 1;
4498 }
4499
4500 /*  Called with trace_event_read_lock() held. */
4501 enum print_line_t print_trace_line(struct trace_iterator *iter)
4502 {
4503         struct trace_array *tr = iter->tr;
4504         unsigned long trace_flags = tr->trace_flags;
4505         enum print_line_t ret;
4506
4507         if (iter->lost_events) {
4508                 if (iter->lost_events == (unsigned long)-1)
4509                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4510                                          iter->cpu);
4511                 else
4512                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4513                                          iter->cpu, iter->lost_events);
4514                 if (trace_seq_has_overflowed(&iter->seq))
4515                         return TRACE_TYPE_PARTIAL_LINE;
4516         }
4517
4518         if (iter->trace && iter->trace->print_line) {
4519                 ret = iter->trace->print_line(iter);
4520                 if (ret != TRACE_TYPE_UNHANDLED)
4521                         return ret;
4522         }
4523
4524         if (iter->ent->type == TRACE_BPUTS &&
4525                         trace_flags & TRACE_ITER_PRINTK &&
4526                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4527                 return trace_print_bputs_msg_only(iter);
4528
4529         if (iter->ent->type == TRACE_BPRINT &&
4530                         trace_flags & TRACE_ITER_PRINTK &&
4531                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532                 return trace_print_bprintk_msg_only(iter);
4533
4534         if (iter->ent->type == TRACE_PRINT &&
4535                         trace_flags & TRACE_ITER_PRINTK &&
4536                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537                 return trace_print_printk_msg_only(iter);
4538
4539         if (trace_flags & TRACE_ITER_BIN)
4540                 return print_bin_fmt(iter);
4541
4542         if (trace_flags & TRACE_ITER_HEX)
4543                 return print_hex_fmt(iter);
4544
4545         if (trace_flags & TRACE_ITER_RAW)
4546                 return print_raw_fmt(iter);
4547
4548         return print_trace_fmt(iter);
4549 }
4550
4551 void trace_latency_header(struct seq_file *m)
4552 {
4553         struct trace_iterator *iter = m->private;
4554         struct trace_array *tr = iter->tr;
4555
4556         /* print nothing if the buffers are empty */
4557         if (trace_empty(iter))
4558                 return;
4559
4560         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4561                 print_trace_header(m, iter);
4562
4563         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4564                 print_lat_help_header(m);
4565 }
4566
4567 void trace_default_header(struct seq_file *m)
4568 {
4569         struct trace_iterator *iter = m->private;
4570         struct trace_array *tr = iter->tr;
4571         unsigned long trace_flags = tr->trace_flags;
4572
4573         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4574                 return;
4575
4576         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4577                 /* print nothing if the buffers are empty */
4578                 if (trace_empty(iter))
4579                         return;
4580                 print_trace_header(m, iter);
4581                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4582                         print_lat_help_header(m);
4583         } else {
4584                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4585                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4586                                 print_func_help_header_irq(iter->array_buffer,
4587                                                            m, trace_flags);
4588                         else
4589                                 print_func_help_header(iter->array_buffer, m,
4590                                                        trace_flags);
4591                 }
4592         }
4593 }
4594
4595 static void test_ftrace_alive(struct seq_file *m)
4596 {
4597         if (!ftrace_is_dead())
4598                 return;
4599         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4600                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4601 }
4602
4603 #ifdef CONFIG_TRACER_MAX_TRACE
4604 static void show_snapshot_main_help(struct seq_file *m)
4605 {
4606         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4607                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4608                     "#                      Takes a snapshot of the main buffer.\n"
4609                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4610                     "#                      (Doesn't have to be '2' works with any number that\n"
4611                     "#                       is not a '0' or '1')\n");
4612 }
4613
4614 static void show_snapshot_percpu_help(struct seq_file *m)
4615 {
4616         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4617 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4618         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4619                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4620 #else
4621         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4622                     "#                     Must use main snapshot file to allocate.\n");
4623 #endif
4624         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4625                     "#                      (Doesn't have to be '2' works with any number that\n"
4626                     "#                       is not a '0' or '1')\n");
4627 }
4628
4629 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4630 {
4631         if (iter->tr->allocated_snapshot)
4632                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4633         else
4634                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4635
4636         seq_puts(m, "# Snapshot commands:\n");
4637         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4638                 show_snapshot_main_help(m);
4639         else
4640                 show_snapshot_percpu_help(m);
4641 }
4642 #else
4643 /* Should never be called */
4644 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4645 #endif
4646
4647 static int s_show(struct seq_file *m, void *v)
4648 {
4649         struct trace_iterator *iter = v;
4650         int ret;
4651
4652         if (iter->ent == NULL) {
4653                 if (iter->tr) {
4654                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4655                         seq_puts(m, "#\n");
4656                         test_ftrace_alive(m);
4657                 }
4658                 if (iter->snapshot && trace_empty(iter))
4659                         print_snapshot_help(m, iter);
4660                 else if (iter->trace && iter->trace->print_header)
4661                         iter->trace->print_header(m);
4662                 else
4663                         trace_default_header(m);
4664
4665         } else if (iter->leftover) {
4666                 /*
4667                  * If we filled the seq_file buffer earlier, we
4668                  * want to just show it now.
4669                  */
4670                 ret = trace_print_seq(m, &iter->seq);
4671
4672                 /* ret should this time be zero, but you never know */
4673                 iter->leftover = ret;
4674
4675         } else {
4676                 print_trace_line(iter);
4677                 ret = trace_print_seq(m, &iter->seq);
4678                 /*
4679                  * If we overflow the seq_file buffer, then it will
4680                  * ask us for this data again at start up.
4681                  * Use that instead.
4682                  *  ret is 0 if seq_file write succeeded.
4683                  *        -1 otherwise.
4684                  */
4685                 iter->leftover = ret;
4686         }
4687
4688         return 0;
4689 }
4690
4691 /*
4692  * Should be used after trace_array_get(), trace_types_lock
4693  * ensures that i_cdev was already initialized.
4694  */
4695 static inline int tracing_get_cpu(struct inode *inode)
4696 {
4697         if (inode->i_cdev) /* See trace_create_cpu_file() */
4698                 return (long)inode->i_cdev - 1;
4699         return RING_BUFFER_ALL_CPUS;
4700 }
4701
4702 static const struct seq_operations tracer_seq_ops = {
4703         .start          = s_start,
4704         .next           = s_next,
4705         .stop           = s_stop,
4706         .show           = s_show,
4707 };
4708
4709 static struct trace_iterator *
4710 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4711 {
4712         struct trace_array *tr = inode->i_private;
4713         struct trace_iterator *iter;
4714         int cpu;
4715
4716         if (tracing_disabled)
4717                 return ERR_PTR(-ENODEV);
4718
4719         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4720         if (!iter)
4721                 return ERR_PTR(-ENOMEM);
4722
4723         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4724                                     GFP_KERNEL);
4725         if (!iter->buffer_iter)
4726                 goto release;
4727
4728         /*
4729          * trace_find_next_entry() may need to save off iter->ent.
4730          * It will place it into the iter->temp buffer. As most
4731          * events are less than 128, allocate a buffer of that size.
4732          * If one is greater, then trace_find_next_entry() will
4733          * allocate a new buffer to adjust for the bigger iter->ent.
4734          * It's not critical if it fails to get allocated here.
4735          */
4736         iter->temp = kmalloc(128, GFP_KERNEL);
4737         if (iter->temp)
4738                 iter->temp_size = 128;
4739
4740         /*
4741          * trace_event_printf() may need to modify given format
4742          * string to replace %p with %px so that it shows real address
4743          * instead of hash value. However, that is only for the event
4744          * tracing, other tracer may not need. Defer the allocation
4745          * until it is needed.
4746          */
4747         iter->fmt = NULL;
4748         iter->fmt_size = 0;
4749
4750         /*
4751          * We make a copy of the current tracer to avoid concurrent
4752          * changes on it while we are reading.
4753          */
4754         mutex_lock(&trace_types_lock);
4755         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4756         if (!iter->trace)
4757                 goto fail;
4758
4759         *iter->trace = *tr->current_trace;
4760
4761         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4762                 goto fail;
4763
4764         iter->tr = tr;
4765
4766 #ifdef CONFIG_TRACER_MAX_TRACE
4767         /* Currently only the top directory has a snapshot */
4768         if (tr->current_trace->print_max || snapshot)
4769                 iter->array_buffer = &tr->max_buffer;
4770         else
4771 #endif
4772                 iter->array_buffer = &tr->array_buffer;
4773         iter->snapshot = snapshot;
4774         iter->pos = -1;
4775         iter->cpu_file = tracing_get_cpu(inode);
4776         mutex_init(&iter->mutex);
4777
4778         /* Notify the tracer early; before we stop tracing. */
4779         if (iter->trace->open)
4780                 iter->trace->open(iter);
4781
4782         /* Annotate start of buffers if we had overruns */
4783         if (ring_buffer_overruns(iter->array_buffer->buffer))
4784                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4785
4786         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4787         if (trace_clocks[tr->clock_id].in_ns)
4788                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4789
4790         /*
4791          * If pause-on-trace is enabled, then stop the trace while
4792          * dumping, unless this is the "snapshot" file
4793          */
4794         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4795                 tracing_stop_tr(tr);
4796
4797         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4798                 for_each_tracing_cpu(cpu) {
4799                         iter->buffer_iter[cpu] =
4800                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4801                                                          cpu, GFP_KERNEL);
4802                 }
4803                 ring_buffer_read_prepare_sync();
4804                 for_each_tracing_cpu(cpu) {
4805                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4806                         tracing_iter_reset(iter, cpu);
4807                 }
4808         } else {
4809                 cpu = iter->cpu_file;
4810                 iter->buffer_iter[cpu] =
4811                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4812                                                  cpu, GFP_KERNEL);
4813                 ring_buffer_read_prepare_sync();
4814                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4815                 tracing_iter_reset(iter, cpu);
4816         }
4817
4818         mutex_unlock(&trace_types_lock);
4819
4820         return iter;
4821
4822  fail:
4823         mutex_unlock(&trace_types_lock);
4824         kfree(iter->trace);
4825         kfree(iter->temp);
4826         kfree(iter->buffer_iter);
4827 release:
4828         seq_release_private(inode, file);
4829         return ERR_PTR(-ENOMEM);
4830 }
4831
4832 int tracing_open_generic(struct inode *inode, struct file *filp)
4833 {
4834         int ret;
4835
4836         ret = tracing_check_open_get_tr(NULL);
4837         if (ret)
4838                 return ret;
4839
4840         filp->private_data = inode->i_private;
4841         return 0;
4842 }
4843
4844 bool tracing_is_disabled(void)
4845 {
4846         return (tracing_disabled) ? true: false;
4847 }
4848
4849 /*
4850  * Open and update trace_array ref count.
4851  * Must have the current trace_array passed to it.
4852  */
4853 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4854 {
4855         struct trace_array *tr = inode->i_private;
4856         int ret;
4857
4858         ret = tracing_check_open_get_tr(tr);
4859         if (ret)
4860                 return ret;
4861
4862         filp->private_data = inode->i_private;
4863
4864         return 0;
4865 }
4866
4867 static int tracing_mark_open(struct inode *inode, struct file *filp)
4868 {
4869         stream_open(inode, filp);
4870         return tracing_open_generic_tr(inode, filp);
4871 }
4872
4873 static int tracing_release(struct inode *inode, struct file *file)
4874 {
4875         struct trace_array *tr = inode->i_private;
4876         struct seq_file *m = file->private_data;
4877         struct trace_iterator *iter;
4878         int cpu;
4879
4880         if (!(file->f_mode & FMODE_READ)) {
4881                 trace_array_put(tr);
4882                 return 0;
4883         }
4884
4885         /* Writes do not use seq_file */
4886         iter = m->private;
4887         mutex_lock(&trace_types_lock);
4888
4889         for_each_tracing_cpu(cpu) {
4890                 if (iter->buffer_iter[cpu])
4891                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4892         }
4893
4894         if (iter->trace && iter->trace->close)
4895                 iter->trace->close(iter);
4896
4897         if (!iter->snapshot && tr->stop_count)
4898                 /* reenable tracing if it was previously enabled */
4899                 tracing_start_tr(tr);
4900
4901         __trace_array_put(tr);
4902
4903         mutex_unlock(&trace_types_lock);
4904
4905         mutex_destroy(&iter->mutex);
4906         free_cpumask_var(iter->started);
4907         kfree(iter->fmt);
4908         kfree(iter->temp);
4909         kfree(iter->trace);
4910         kfree(iter->buffer_iter);
4911         seq_release_private(inode, file);
4912
4913         return 0;
4914 }
4915
4916 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4917 {
4918         struct trace_array *tr = inode->i_private;
4919
4920         trace_array_put(tr);
4921         return 0;
4922 }
4923
4924 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4925 {
4926         struct trace_array *tr = inode->i_private;
4927
4928         trace_array_put(tr);
4929
4930         return single_release(inode, file);
4931 }
4932
4933 static int tracing_open(struct inode *inode, struct file *file)
4934 {
4935         struct trace_array *tr = inode->i_private;
4936         struct trace_iterator *iter;
4937         int ret;
4938
4939         ret = tracing_check_open_get_tr(tr);
4940         if (ret)
4941                 return ret;
4942
4943         /* If this file was open for write, then erase contents */
4944         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4945                 int cpu = tracing_get_cpu(inode);
4946                 struct array_buffer *trace_buf = &tr->array_buffer;
4947
4948 #ifdef CONFIG_TRACER_MAX_TRACE
4949                 if (tr->current_trace->print_max)
4950                         trace_buf = &tr->max_buffer;
4951 #endif
4952
4953                 if (cpu == RING_BUFFER_ALL_CPUS)
4954                         tracing_reset_online_cpus(trace_buf);
4955                 else
4956                         tracing_reset_cpu(trace_buf, cpu);
4957         }
4958
4959         if (file->f_mode & FMODE_READ) {
4960                 iter = __tracing_open(inode, file, false);
4961                 if (IS_ERR(iter))
4962                         ret = PTR_ERR(iter);
4963                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4964                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4965         }
4966
4967         if (ret < 0)
4968                 trace_array_put(tr);
4969
4970         return ret;
4971 }
4972
4973 /*
4974  * Some tracers are not suitable for instance buffers.
4975  * A tracer is always available for the global array (toplevel)
4976  * or if it explicitly states that it is.
4977  */
4978 static bool
4979 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4980 {
4981         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4982 }
4983
4984 /* Find the next tracer that this trace array may use */
4985 static struct tracer *
4986 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4987 {
4988         while (t && !trace_ok_for_array(t, tr))
4989                 t = t->next;
4990
4991         return t;
4992 }
4993
4994 static void *
4995 t_next(struct seq_file *m, void *v, loff_t *pos)
4996 {
4997         struct trace_array *tr = m->private;
4998         struct tracer *t = v;
4999
5000         (*pos)++;
5001
5002         if (t)
5003                 t = get_tracer_for_array(tr, t->next);
5004
5005         return t;
5006 }
5007
5008 static void *t_start(struct seq_file *m, loff_t *pos)
5009 {
5010         struct trace_array *tr = m->private;
5011         struct tracer *t;
5012         loff_t l = 0;
5013
5014         mutex_lock(&trace_types_lock);
5015
5016         t = get_tracer_for_array(tr, trace_types);
5017         for (; t && l < *pos; t = t_next(m, t, &l))
5018                         ;
5019
5020         return t;
5021 }
5022
5023 static void t_stop(struct seq_file *m, void *p)
5024 {
5025         mutex_unlock(&trace_types_lock);
5026 }
5027
5028 static int t_show(struct seq_file *m, void *v)
5029 {
5030         struct tracer *t = v;
5031
5032         if (!t)
5033                 return 0;
5034
5035         seq_puts(m, t->name);
5036         if (t->next)
5037                 seq_putc(m, ' ');
5038         else
5039                 seq_putc(m, '\n');
5040
5041         return 0;
5042 }
5043
5044 static const struct seq_operations show_traces_seq_ops = {
5045         .start          = t_start,
5046         .next           = t_next,
5047         .stop           = t_stop,
5048         .show           = t_show,
5049 };
5050
5051 static int show_traces_open(struct inode *inode, struct file *file)
5052 {
5053         struct trace_array *tr = inode->i_private;
5054         struct seq_file *m;
5055         int ret;
5056
5057         ret = tracing_check_open_get_tr(tr);
5058         if (ret)
5059                 return ret;
5060
5061         ret = seq_open(file, &show_traces_seq_ops);
5062         if (ret) {
5063                 trace_array_put(tr);
5064                 return ret;
5065         }
5066
5067         m = file->private_data;
5068         m->private = tr;
5069
5070         return 0;
5071 }
5072
5073 static int show_traces_release(struct inode *inode, struct file *file)
5074 {
5075         struct trace_array *tr = inode->i_private;
5076
5077         trace_array_put(tr);
5078         return seq_release(inode, file);
5079 }
5080
5081 static ssize_t
5082 tracing_write_stub(struct file *filp, const char __user *ubuf,
5083                    size_t count, loff_t *ppos)
5084 {
5085         return count;
5086 }
5087
5088 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5089 {
5090         int ret;
5091
5092         if (file->f_mode & FMODE_READ)
5093                 ret = seq_lseek(file, offset, whence);
5094         else
5095                 file->f_pos = ret = 0;
5096
5097         return ret;
5098 }
5099
5100 static const struct file_operations tracing_fops = {
5101         .open           = tracing_open,
5102         .read           = seq_read,
5103         .write          = tracing_write_stub,
5104         .llseek         = tracing_lseek,
5105         .release        = tracing_release,
5106 };
5107
5108 static const struct file_operations show_traces_fops = {
5109         .open           = show_traces_open,
5110         .read           = seq_read,
5111         .llseek         = seq_lseek,
5112         .release        = show_traces_release,
5113 };
5114
5115 static ssize_t
5116 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5117                      size_t count, loff_t *ppos)
5118 {
5119         struct trace_array *tr = file_inode(filp)->i_private;
5120         char *mask_str;
5121         int len;
5122
5123         len = snprintf(NULL, 0, "%*pb\n",
5124                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5125         mask_str = kmalloc(len, GFP_KERNEL);
5126         if (!mask_str)
5127                 return -ENOMEM;
5128
5129         len = snprintf(mask_str, len, "%*pb\n",
5130                        cpumask_pr_args(tr->tracing_cpumask));
5131         if (len >= count) {
5132                 count = -EINVAL;
5133                 goto out_err;
5134         }
5135         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5136
5137 out_err:
5138         kfree(mask_str);
5139
5140         return count;
5141 }
5142
5143 int tracing_set_cpumask(struct trace_array *tr,
5144                         cpumask_var_t tracing_cpumask_new)
5145 {
5146         int cpu;
5147
5148         if (!tr)
5149                 return -EINVAL;
5150
5151         local_irq_disable();
5152         arch_spin_lock(&tr->max_lock);
5153         for_each_tracing_cpu(cpu) {
5154                 /*
5155                  * Increase/decrease the disabled counter if we are
5156                  * about to flip a bit in the cpumask:
5157                  */
5158                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5159                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5160                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5161                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5162                 }
5163                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5164                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5165                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5166                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5167                 }
5168         }
5169         arch_spin_unlock(&tr->max_lock);
5170         local_irq_enable();
5171
5172         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5173
5174         return 0;
5175 }
5176
5177 static ssize_t
5178 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5179                       size_t count, loff_t *ppos)
5180 {
5181         struct trace_array *tr = file_inode(filp)->i_private;
5182         cpumask_var_t tracing_cpumask_new;
5183         int err;
5184
5185         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5186                 return -ENOMEM;
5187
5188         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5189         if (err)
5190                 goto err_free;
5191
5192         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5193         if (err)
5194                 goto err_free;
5195
5196         free_cpumask_var(tracing_cpumask_new);
5197
5198         return count;
5199
5200 err_free:
5201         free_cpumask_var(tracing_cpumask_new);
5202
5203         return err;
5204 }
5205
5206 static const struct file_operations tracing_cpumask_fops = {
5207         .open           = tracing_open_generic_tr,
5208         .read           = tracing_cpumask_read,
5209         .write          = tracing_cpumask_write,
5210         .release        = tracing_release_generic_tr,
5211         .llseek         = generic_file_llseek,
5212 };
5213
5214 static int tracing_trace_options_show(struct seq_file *m, void *v)
5215 {
5216         struct tracer_opt *trace_opts;
5217         struct trace_array *tr = m->private;
5218         u32 tracer_flags;
5219         int i;
5220
5221         mutex_lock(&trace_types_lock);
5222         tracer_flags = tr->current_trace->flags->val;
5223         trace_opts = tr->current_trace->flags->opts;
5224
5225         for (i = 0; trace_options[i]; i++) {
5226                 if (tr->trace_flags & (1 << i))
5227                         seq_printf(m, "%s\n", trace_options[i]);
5228                 else
5229                         seq_printf(m, "no%s\n", trace_options[i]);
5230         }
5231
5232         for (i = 0; trace_opts[i].name; i++) {
5233                 if (tracer_flags & trace_opts[i].bit)
5234                         seq_printf(m, "%s\n", trace_opts[i].name);
5235                 else
5236                         seq_printf(m, "no%s\n", trace_opts[i].name);
5237         }
5238         mutex_unlock(&trace_types_lock);
5239
5240         return 0;
5241 }
5242
5243 static int __set_tracer_option(struct trace_array *tr,
5244                                struct tracer_flags *tracer_flags,
5245                                struct tracer_opt *opts, int neg)
5246 {
5247         struct tracer *trace = tracer_flags->trace;
5248         int ret;
5249
5250         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5251         if (ret)
5252                 return ret;
5253
5254         if (neg)
5255                 tracer_flags->val &= ~opts->bit;
5256         else
5257                 tracer_flags->val |= opts->bit;
5258         return 0;
5259 }
5260
5261 /* Try to assign a tracer specific option */
5262 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5263 {
5264         struct tracer *trace = tr->current_trace;
5265         struct tracer_flags *tracer_flags = trace->flags;
5266         struct tracer_opt *opts = NULL;
5267         int i;
5268
5269         for (i = 0; tracer_flags->opts[i].name; i++) {
5270                 opts = &tracer_flags->opts[i];
5271
5272                 if (strcmp(cmp, opts->name) == 0)
5273                         return __set_tracer_option(tr, trace->flags, opts, neg);
5274         }
5275
5276         return -EINVAL;
5277 }
5278
5279 /* Some tracers require overwrite to stay enabled */
5280 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5281 {
5282         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5283                 return -1;
5284
5285         return 0;
5286 }
5287
5288 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5289 {
5290         int *map;
5291
5292         if ((mask == TRACE_ITER_RECORD_TGID) ||
5293             (mask == TRACE_ITER_RECORD_CMD))
5294                 lockdep_assert_held(&event_mutex);
5295
5296         /* do nothing if flag is already set */
5297         if (!!(tr->trace_flags & mask) == !!enabled)
5298                 return 0;
5299
5300         /* Give the tracer a chance to approve the change */
5301         if (tr->current_trace->flag_changed)
5302                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5303                         return -EINVAL;
5304
5305         if (enabled)
5306                 tr->trace_flags |= mask;
5307         else
5308                 tr->trace_flags &= ~mask;
5309
5310         if (mask == TRACE_ITER_RECORD_CMD)
5311                 trace_event_enable_cmd_record(enabled);
5312
5313         if (mask == TRACE_ITER_RECORD_TGID) {
5314                 if (!tgid_map) {
5315                         tgid_map_max = pid_max;
5316                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5317                                        GFP_KERNEL);
5318
5319                         /*
5320                          * Pairs with smp_load_acquire() in
5321                          * trace_find_tgid_ptr() to ensure that if it observes
5322                          * the tgid_map we just allocated then it also observes
5323                          * the corresponding tgid_map_max value.
5324                          */
5325                         smp_store_release(&tgid_map, map);
5326                 }
5327                 if (!tgid_map) {
5328                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5329                         return -ENOMEM;
5330                 }
5331
5332                 trace_event_enable_tgid_record(enabled);
5333         }
5334
5335         if (mask == TRACE_ITER_EVENT_FORK)
5336                 trace_event_follow_fork(tr, enabled);
5337
5338         if (mask == TRACE_ITER_FUNC_FORK)
5339                 ftrace_pid_follow_fork(tr, enabled);
5340
5341         if (mask == TRACE_ITER_OVERWRITE) {
5342                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5343 #ifdef CONFIG_TRACER_MAX_TRACE
5344                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5345 #endif
5346         }
5347
5348         if (mask == TRACE_ITER_PRINTK) {
5349                 trace_printk_start_stop_comm(enabled);
5350                 trace_printk_control(enabled);
5351         }
5352
5353         return 0;
5354 }
5355
5356 int trace_set_options(struct trace_array *tr, char *option)
5357 {
5358         char *cmp;
5359         int neg = 0;
5360         int ret;
5361         size_t orig_len = strlen(option);
5362         int len;
5363
5364         cmp = strstrip(option);
5365
5366         len = str_has_prefix(cmp, "no");
5367         if (len)
5368                 neg = 1;
5369
5370         cmp += len;
5371
5372         mutex_lock(&event_mutex);
5373         mutex_lock(&trace_types_lock);
5374
5375         ret = match_string(trace_options, -1, cmp);
5376         /* If no option could be set, test the specific tracer options */
5377         if (ret < 0)
5378                 ret = set_tracer_option(tr, cmp, neg);
5379         else
5380                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5381
5382         mutex_unlock(&trace_types_lock);
5383         mutex_unlock(&event_mutex);
5384
5385         /*
5386          * If the first trailing whitespace is replaced with '\0' by strstrip,
5387          * turn it back into a space.
5388          */
5389         if (orig_len > strlen(option))
5390                 option[strlen(option)] = ' ';
5391
5392         return ret;
5393 }
5394
5395 static void __init apply_trace_boot_options(void)
5396 {
5397         char *buf = trace_boot_options_buf;
5398         char *option;
5399
5400         while (true) {
5401                 option = strsep(&buf, ",");
5402
5403                 if (!option)
5404                         break;
5405
5406                 if (*option)
5407                         trace_set_options(&global_trace, option);
5408
5409                 /* Put back the comma to allow this to be called again */
5410                 if (buf)
5411                         *(buf - 1) = ',';
5412         }
5413 }
5414
5415 static ssize_t
5416 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5417                         size_t cnt, loff_t *ppos)
5418 {
5419         struct seq_file *m = filp->private_data;
5420         struct trace_array *tr = m->private;
5421         char buf[64];
5422         int ret;
5423
5424         if (cnt >= sizeof(buf))
5425                 return -EINVAL;
5426
5427         if (copy_from_user(buf, ubuf, cnt))
5428                 return -EFAULT;
5429
5430         buf[cnt] = 0;
5431
5432         ret = trace_set_options(tr, buf);
5433         if (ret < 0)
5434                 return ret;
5435
5436         *ppos += cnt;
5437
5438         return cnt;
5439 }
5440
5441 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5442 {
5443         struct trace_array *tr = inode->i_private;
5444         int ret;
5445
5446         ret = tracing_check_open_get_tr(tr);
5447         if (ret)
5448                 return ret;
5449
5450         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5451         if (ret < 0)
5452                 trace_array_put(tr);
5453
5454         return ret;
5455 }
5456
5457 static const struct file_operations tracing_iter_fops = {
5458         .open           = tracing_trace_options_open,
5459         .read           = seq_read,
5460         .llseek         = seq_lseek,
5461         .release        = tracing_single_release_tr,
5462         .write          = tracing_trace_options_write,
5463 };
5464
5465 static const char readme_msg[] =
5466         "tracing mini-HOWTO:\n\n"
5467         "# echo 0 > tracing_on : quick way to disable tracing\n"
5468         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5469         " Important files:\n"
5470         "  trace\t\t\t- The static contents of the buffer\n"
5471         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5472         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5473         "  current_tracer\t- function and latency tracers\n"
5474         "  available_tracers\t- list of configured tracers for current_tracer\n"
5475         "  error_log\t- error log for failed commands (that support it)\n"
5476         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5477         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5478         "  trace_clock\t\t- change the clock used to order events\n"
5479         "       local:   Per cpu clock but may not be synced across CPUs\n"
5480         "      global:   Synced across CPUs but slows tracing down.\n"
5481         "     counter:   Not a clock, but just an increment\n"
5482         "      uptime:   Jiffy counter from time of boot\n"
5483         "        perf:   Same clock that perf events use\n"
5484 #ifdef CONFIG_X86_64
5485         "     x86-tsc:   TSC cycle counter\n"
5486 #endif
5487         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5488         "       delta:   Delta difference against a buffer-wide timestamp\n"
5489         "    absolute:   Absolute (standalone) timestamp\n"
5490         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5491         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5492         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5493         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5494         "\t\t\t  Remove sub-buffer with rmdir\n"
5495         "  trace_options\t\t- Set format or modify how tracing happens\n"
5496         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5497         "\t\t\t  option name\n"
5498         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5499 #ifdef CONFIG_DYNAMIC_FTRACE
5500         "\n  available_filter_functions - list of functions that can be filtered on\n"
5501         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5502         "\t\t\t  functions\n"
5503         "\t     accepts: func_full_name or glob-matching-pattern\n"
5504         "\t     modules: Can select a group via module\n"
5505         "\t      Format: :mod:<module-name>\n"
5506         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5507         "\t    triggers: a command to perform when function is hit\n"
5508         "\t      Format: <function>:<trigger>[:count]\n"
5509         "\t     trigger: traceon, traceoff\n"
5510         "\t\t      enable_event:<system>:<event>\n"
5511         "\t\t      disable_event:<system>:<event>\n"
5512 #ifdef CONFIG_STACKTRACE
5513         "\t\t      stacktrace\n"
5514 #endif
5515 #ifdef CONFIG_TRACER_SNAPSHOT
5516         "\t\t      snapshot\n"
5517 #endif
5518         "\t\t      dump\n"
5519         "\t\t      cpudump\n"
5520         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5521         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5522         "\t     The first one will disable tracing every time do_fault is hit\n"
5523         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5524         "\t       The first time do trap is hit and it disables tracing, the\n"
5525         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5526         "\t       the counter will not decrement. It only decrements when the\n"
5527         "\t       trigger did work\n"
5528         "\t     To remove trigger without count:\n"
5529         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5530         "\t     To remove trigger with a count:\n"
5531         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5532         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5533         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5534         "\t    modules: Can select a group via module command :mod:\n"
5535         "\t    Does not accept triggers\n"
5536 #endif /* CONFIG_DYNAMIC_FTRACE */
5537 #ifdef CONFIG_FUNCTION_TRACER
5538         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5539         "\t\t    (function)\n"
5540         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5541         "\t\t    (function)\n"
5542 #endif
5543 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5544         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5545         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5546         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5547 #endif
5548 #ifdef CONFIG_TRACER_SNAPSHOT
5549         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5550         "\t\t\t  snapshot buffer. Read the contents for more\n"
5551         "\t\t\t  information\n"
5552 #endif
5553 #ifdef CONFIG_STACK_TRACER
5554         "  stack_trace\t\t- Shows the max stack trace when active\n"
5555         "  stack_max_size\t- Shows current max stack size that was traced\n"
5556         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5557         "\t\t\t  new trace)\n"
5558 #ifdef CONFIG_DYNAMIC_FTRACE
5559         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5560         "\t\t\t  traces\n"
5561 #endif
5562 #endif /* CONFIG_STACK_TRACER */
5563 #ifdef CONFIG_DYNAMIC_EVENTS
5564         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5565         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5566 #endif
5567 #ifdef CONFIG_KPROBE_EVENTS
5568         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5569         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5570 #endif
5571 #ifdef CONFIG_UPROBE_EVENTS
5572         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5573         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5574 #endif
5575 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5576         "\t  accepts: event-definitions (one definition per line)\n"
5577         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5578         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5579 #ifdef CONFIG_HIST_TRIGGERS
5580         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5581 #endif
5582         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5583         "\t           -:[<group>/]<event>\n"
5584 #ifdef CONFIG_KPROBE_EVENTS
5585         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5586   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5587 #endif
5588 #ifdef CONFIG_UPROBE_EVENTS
5589   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5590 #endif
5591         "\t     args: <name>=fetcharg[:type]\n"
5592         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5593 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5594         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5595 #else
5596         "\t           $stack<index>, $stack, $retval, $comm,\n"
5597 #endif
5598         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5599         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5600         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5601         "\t           <type>\\[<array-size>\\]\n"
5602 #ifdef CONFIG_HIST_TRIGGERS
5603         "\t    field: <stype> <name>;\n"
5604         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5605         "\t           [unsigned] char/int/long\n"
5606 #endif
5607         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5608         "\t            of the <attached-group>/<attached-event>.\n"
5609 #endif
5610         "  events/\t\t- Directory containing all trace event subsystems:\n"
5611         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5612         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5613         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5614         "\t\t\t  events\n"
5615         "      filter\t\t- If set, only events passing filter are traced\n"
5616         "  events/<system>/<event>/\t- Directory containing control files for\n"
5617         "\t\t\t  <event>:\n"
5618         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5619         "      filter\t\t- If set, only events passing filter are traced\n"
5620         "      trigger\t\t- If set, a command to perform when event is hit\n"
5621         "\t    Format: <trigger>[:count][if <filter>]\n"
5622         "\t   trigger: traceon, traceoff\n"
5623         "\t            enable_event:<system>:<event>\n"
5624         "\t            disable_event:<system>:<event>\n"
5625 #ifdef CONFIG_HIST_TRIGGERS
5626         "\t            enable_hist:<system>:<event>\n"
5627         "\t            disable_hist:<system>:<event>\n"
5628 #endif
5629 #ifdef CONFIG_STACKTRACE
5630         "\t\t    stacktrace\n"
5631 #endif
5632 #ifdef CONFIG_TRACER_SNAPSHOT
5633         "\t\t    snapshot\n"
5634 #endif
5635 #ifdef CONFIG_HIST_TRIGGERS
5636         "\t\t    hist (see below)\n"
5637 #endif
5638         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5639         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5640         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5641         "\t                  events/block/block_unplug/trigger\n"
5642         "\t   The first disables tracing every time block_unplug is hit.\n"
5643         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5644         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5645         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5646         "\t   Like function triggers, the counter is only decremented if it\n"
5647         "\t    enabled or disabled tracing.\n"
5648         "\t   To remove a trigger without a count:\n"
5649         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5650         "\t   To remove a trigger with a count:\n"
5651         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5652         "\t   Filters can be ignored when removing a trigger.\n"
5653 #ifdef CONFIG_HIST_TRIGGERS
5654         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5655         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5656         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5657         "\t            [:values=<field1[,field2,...]>]\n"
5658         "\t            [:sort=<field1[,field2,...]>]\n"
5659         "\t            [:size=#entries]\n"
5660         "\t            [:pause][:continue][:clear]\n"
5661         "\t            [:name=histname1]\n"
5662         "\t            [:<handler>.<action>]\n"
5663         "\t            [if <filter>]\n\n"
5664         "\t    Note, special fields can be used as well:\n"
5665         "\t            common_timestamp - to record current timestamp\n"
5666         "\t            common_cpu - to record the CPU the event happened on\n"
5667         "\n"
5668         "\t    A hist trigger variable can be:\n"
5669         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5670         "\t        - a reference to another variable e.g. y=$x,\n"
5671         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5672         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5673         "\n"
5674         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5675         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5676         "\t    variable reference, field or numeric literal.\n"
5677         "\n"
5678         "\t    When a matching event is hit, an entry is added to a hash\n"
5679         "\t    table using the key(s) and value(s) named, and the value of a\n"
5680         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5681         "\t    correspond to fields in the event's format description.  Keys\n"
5682         "\t    can be any field, or the special string 'stacktrace'.\n"
5683         "\t    Compound keys consisting of up to two fields can be specified\n"
5684         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5685         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5686         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5687         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5688         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5689         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5690         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5691         "\t    its histogram data will be shared with other triggers of the\n"
5692         "\t    same name, and trigger hits will update this common data.\n\n"
5693         "\t    Reading the 'hist' file for the event will dump the hash\n"
5694         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5695         "\t    triggers attached to an event, there will be a table for each\n"
5696         "\t    trigger in the output.  The table displayed for a named\n"
5697         "\t    trigger will be the same as any other instance having the\n"
5698         "\t    same name.  The default format used to display a given field\n"
5699         "\t    can be modified by appending any of the following modifiers\n"
5700         "\t    to the field name, as applicable:\n\n"
5701         "\t            .hex        display a number as a hex value\n"
5702         "\t            .sym        display an address as a symbol\n"
5703         "\t            .sym-offset display an address as a symbol and offset\n"
5704         "\t            .execname   display a common_pid as a program name\n"
5705         "\t            .syscall    display a syscall id as a syscall name\n"
5706         "\t            .log2       display log2 value rather than raw number\n"
5707         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5708         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5709         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5710         "\t    trigger or to start a hist trigger but not log any events\n"
5711         "\t    until told to do so.  'continue' can be used to start or\n"
5712         "\t    restart a paused hist trigger.\n\n"
5713         "\t    The 'clear' parameter will clear the contents of a running\n"
5714         "\t    hist trigger and leave its current paused/active state\n"
5715         "\t    unchanged.\n\n"
5716         "\t    The enable_hist and disable_hist triggers can be used to\n"
5717         "\t    have one event conditionally start and stop another event's\n"
5718         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5719         "\t    the enable_event and disable_event triggers.\n\n"
5720         "\t    Hist trigger handlers and actions are executed whenever a\n"
5721         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5722         "\t        <handler>.<action>\n\n"
5723         "\t    The available handlers are:\n\n"
5724         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5725         "\t        onmax(var)               - invoke if var exceeds current max\n"
5726         "\t        onchange(var)            - invoke action if var changes\n\n"
5727         "\t    The available actions are:\n\n"
5728         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5729         "\t        save(field,...)                      - save current event fields\n"
5730 #ifdef CONFIG_TRACER_SNAPSHOT
5731         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5732 #endif
5733 #ifdef CONFIG_SYNTH_EVENTS
5734         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5735         "\t  Write into this file to define/undefine new synthetic events.\n"
5736         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5737 #endif
5738 #endif
5739 ;
5740
5741 static ssize_t
5742 tracing_readme_read(struct file *filp, char __user *ubuf,
5743                        size_t cnt, loff_t *ppos)
5744 {
5745         return simple_read_from_buffer(ubuf, cnt, ppos,
5746                                         readme_msg, strlen(readme_msg));
5747 }
5748
5749 static const struct file_operations tracing_readme_fops = {
5750         .open           = tracing_open_generic,
5751         .read           = tracing_readme_read,
5752         .llseek         = generic_file_llseek,
5753 };
5754
5755 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5756 {
5757         int pid = ++(*pos);
5758
5759         return trace_find_tgid_ptr(pid);
5760 }
5761
5762 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5763 {
5764         int pid = *pos;
5765
5766         return trace_find_tgid_ptr(pid);
5767 }
5768
5769 static void saved_tgids_stop(struct seq_file *m, void *v)
5770 {
5771 }
5772
5773 static int saved_tgids_show(struct seq_file *m, void *v)
5774 {
5775         int *entry = (int *)v;
5776         int pid = entry - tgid_map;
5777         int tgid = *entry;
5778
5779         if (tgid == 0)
5780                 return SEQ_SKIP;
5781
5782         seq_printf(m, "%d %d\n", pid, tgid);
5783         return 0;
5784 }
5785
5786 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5787         .start          = saved_tgids_start,
5788         .stop           = saved_tgids_stop,
5789         .next           = saved_tgids_next,
5790         .show           = saved_tgids_show,
5791 };
5792
5793 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5794 {
5795         int ret;
5796
5797         ret = tracing_check_open_get_tr(NULL);
5798         if (ret)
5799                 return ret;
5800
5801         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5802 }
5803
5804
5805 static const struct file_operations tracing_saved_tgids_fops = {
5806         .open           = tracing_saved_tgids_open,
5807         .read           = seq_read,
5808         .llseek         = seq_lseek,
5809         .release        = seq_release,
5810 };
5811
5812 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5813 {
5814         unsigned int *ptr = v;
5815
5816         if (*pos || m->count)
5817                 ptr++;
5818
5819         (*pos)++;
5820
5821         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5822              ptr++) {
5823                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5824                         continue;
5825
5826                 return ptr;
5827         }
5828
5829         return NULL;
5830 }
5831
5832 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5833 {
5834         void *v;
5835         loff_t l = 0;
5836
5837         preempt_disable();
5838         arch_spin_lock(&trace_cmdline_lock);
5839
5840         v = &savedcmd->map_cmdline_to_pid[0];
5841         while (l <= *pos) {
5842                 v = saved_cmdlines_next(m, v, &l);
5843                 if (!v)
5844                         return NULL;
5845         }
5846
5847         return v;
5848 }
5849
5850 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5851 {
5852         arch_spin_unlock(&trace_cmdline_lock);
5853         preempt_enable();
5854 }
5855
5856 static int saved_cmdlines_show(struct seq_file *m, void *v)
5857 {
5858         char buf[TASK_COMM_LEN];
5859         unsigned int *pid = v;
5860
5861         __trace_find_cmdline(*pid, buf);
5862         seq_printf(m, "%d %s\n", *pid, buf);
5863         return 0;
5864 }
5865
5866 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5867         .start          = saved_cmdlines_start,
5868         .next           = saved_cmdlines_next,
5869         .stop           = saved_cmdlines_stop,
5870         .show           = saved_cmdlines_show,
5871 };
5872
5873 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5874 {
5875         int ret;
5876
5877         ret = tracing_check_open_get_tr(NULL);
5878         if (ret)
5879                 return ret;
5880
5881         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5882 }
5883
5884 static const struct file_operations tracing_saved_cmdlines_fops = {
5885         .open           = tracing_saved_cmdlines_open,
5886         .read           = seq_read,
5887         .llseek         = seq_lseek,
5888         .release        = seq_release,
5889 };
5890
5891 static ssize_t
5892 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5893                                  size_t cnt, loff_t *ppos)
5894 {
5895         char buf[64];
5896         int r;
5897
5898         arch_spin_lock(&trace_cmdline_lock);
5899         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5900         arch_spin_unlock(&trace_cmdline_lock);
5901
5902         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5903 }
5904
5905 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5906 {
5907         kfree(s->saved_cmdlines);
5908         kfree(s->map_cmdline_to_pid);
5909         kfree(s);
5910 }
5911
5912 static int tracing_resize_saved_cmdlines(unsigned int val)
5913 {
5914         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5915
5916         s = kmalloc(sizeof(*s), GFP_KERNEL);
5917         if (!s)
5918                 return -ENOMEM;
5919
5920         if (allocate_cmdlines_buffer(val, s) < 0) {
5921                 kfree(s);
5922                 return -ENOMEM;
5923         }
5924
5925         arch_spin_lock(&trace_cmdline_lock);
5926         savedcmd_temp = savedcmd;
5927         savedcmd = s;
5928         arch_spin_unlock(&trace_cmdline_lock);
5929         free_saved_cmdlines_buffer(savedcmd_temp);
5930
5931         return 0;
5932 }
5933
5934 static ssize_t
5935 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5936                                   size_t cnt, loff_t *ppos)
5937 {
5938         unsigned long val;
5939         int ret;
5940
5941         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5942         if (ret)
5943                 return ret;
5944
5945         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5946         if (!val || val > PID_MAX_DEFAULT)
5947                 return -EINVAL;
5948
5949         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5950         if (ret < 0)
5951                 return ret;
5952
5953         *ppos += cnt;
5954
5955         return cnt;
5956 }
5957
5958 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5959         .open           = tracing_open_generic,
5960         .read           = tracing_saved_cmdlines_size_read,
5961         .write          = tracing_saved_cmdlines_size_write,
5962 };
5963
5964 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5965 static union trace_eval_map_item *
5966 update_eval_map(union trace_eval_map_item *ptr)
5967 {
5968         if (!ptr->map.eval_string) {
5969                 if (ptr->tail.next) {
5970                         ptr = ptr->tail.next;
5971                         /* Set ptr to the next real item (skip head) */
5972                         ptr++;
5973                 } else
5974                         return NULL;
5975         }
5976         return ptr;
5977 }
5978
5979 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5980 {
5981         union trace_eval_map_item *ptr = v;
5982
5983         /*
5984          * Paranoid! If ptr points to end, we don't want to increment past it.
5985          * This really should never happen.
5986          */
5987         (*pos)++;
5988         ptr = update_eval_map(ptr);
5989         if (WARN_ON_ONCE(!ptr))
5990                 return NULL;
5991
5992         ptr++;
5993         ptr = update_eval_map(ptr);
5994
5995         return ptr;
5996 }
5997
5998 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5999 {
6000         union trace_eval_map_item *v;
6001         loff_t l = 0;
6002
6003         mutex_lock(&trace_eval_mutex);
6004
6005         v = trace_eval_maps;
6006         if (v)
6007                 v++;
6008
6009         while (v && l < *pos) {
6010                 v = eval_map_next(m, v, &l);
6011         }
6012
6013         return v;
6014 }
6015
6016 static void eval_map_stop(struct seq_file *m, void *v)
6017 {
6018         mutex_unlock(&trace_eval_mutex);
6019 }
6020
6021 static int eval_map_show(struct seq_file *m, void *v)
6022 {
6023         union trace_eval_map_item *ptr = v;
6024
6025         seq_printf(m, "%s %ld (%s)\n",
6026                    ptr->map.eval_string, ptr->map.eval_value,
6027                    ptr->map.system);
6028
6029         return 0;
6030 }
6031
6032 static const struct seq_operations tracing_eval_map_seq_ops = {
6033         .start          = eval_map_start,
6034         .next           = eval_map_next,
6035         .stop           = eval_map_stop,
6036         .show           = eval_map_show,
6037 };
6038
6039 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6040 {
6041         int ret;
6042
6043         ret = tracing_check_open_get_tr(NULL);
6044         if (ret)
6045                 return ret;
6046
6047         return seq_open(filp, &tracing_eval_map_seq_ops);
6048 }
6049
6050 static const struct file_operations tracing_eval_map_fops = {
6051         .open           = tracing_eval_map_open,
6052         .read           = seq_read,
6053         .llseek         = seq_lseek,
6054         .release        = seq_release,
6055 };
6056
6057 static inline union trace_eval_map_item *
6058 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6059 {
6060         /* Return tail of array given the head */
6061         return ptr + ptr->head.length + 1;
6062 }
6063
6064 static void
6065 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6066                            int len)
6067 {
6068         struct trace_eval_map **stop;
6069         struct trace_eval_map **map;
6070         union trace_eval_map_item *map_array;
6071         union trace_eval_map_item *ptr;
6072
6073         stop = start + len;
6074
6075         /*
6076          * The trace_eval_maps contains the map plus a head and tail item,
6077          * where the head holds the module and length of array, and the
6078          * tail holds a pointer to the next list.
6079          */
6080         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6081         if (!map_array) {
6082                 pr_warn("Unable to allocate trace eval mapping\n");
6083                 return;
6084         }
6085
6086         mutex_lock(&trace_eval_mutex);
6087
6088         if (!trace_eval_maps)
6089                 trace_eval_maps = map_array;
6090         else {
6091                 ptr = trace_eval_maps;
6092                 for (;;) {
6093                         ptr = trace_eval_jmp_to_tail(ptr);
6094                         if (!ptr->tail.next)
6095                                 break;
6096                         ptr = ptr->tail.next;
6097
6098                 }
6099                 ptr->tail.next = map_array;
6100         }
6101         map_array->head.mod = mod;
6102         map_array->head.length = len;
6103         map_array++;
6104
6105         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6106                 map_array->map = **map;
6107                 map_array++;
6108         }
6109         memset(map_array, 0, sizeof(*map_array));
6110
6111         mutex_unlock(&trace_eval_mutex);
6112 }
6113
6114 static void trace_create_eval_file(struct dentry *d_tracer)
6115 {
6116         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6117                           NULL, &tracing_eval_map_fops);
6118 }
6119
6120 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6121 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6122 static inline void trace_insert_eval_map_file(struct module *mod,
6123                               struct trace_eval_map **start, int len) { }
6124 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6125
6126 static void trace_insert_eval_map(struct module *mod,
6127                                   struct trace_eval_map **start, int len)
6128 {
6129         struct trace_eval_map **map;
6130
6131         if (len <= 0)
6132                 return;
6133
6134         map = start;
6135
6136         trace_event_eval_update(map, len);
6137
6138         trace_insert_eval_map_file(mod, start, len);
6139 }
6140
6141 static ssize_t
6142 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6143                        size_t cnt, loff_t *ppos)
6144 {
6145         struct trace_array *tr = filp->private_data;
6146         char buf[MAX_TRACER_SIZE+2];
6147         int r;
6148
6149         mutex_lock(&trace_types_lock);
6150         r = sprintf(buf, "%s\n", tr->current_trace->name);
6151         mutex_unlock(&trace_types_lock);
6152
6153         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6154 }
6155
6156 int tracer_init(struct tracer *t, struct trace_array *tr)
6157 {
6158         tracing_reset_online_cpus(&tr->array_buffer);
6159         return t->init(tr);
6160 }
6161
6162 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6163 {
6164         int cpu;
6165
6166         for_each_tracing_cpu(cpu)
6167                 per_cpu_ptr(buf->data, cpu)->entries = val;
6168 }
6169
6170 #ifdef CONFIG_TRACER_MAX_TRACE
6171 /* resize @tr's buffer to the size of @size_tr's entries */
6172 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6173                                         struct array_buffer *size_buf, int cpu_id)
6174 {
6175         int cpu, ret = 0;
6176
6177         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6178                 for_each_tracing_cpu(cpu) {
6179                         ret = ring_buffer_resize(trace_buf->buffer,
6180                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6181                         if (ret < 0)
6182                                 break;
6183                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6184                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6185                 }
6186         } else {
6187                 ret = ring_buffer_resize(trace_buf->buffer,
6188                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6189                 if (ret == 0)
6190                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6191                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6192         }
6193
6194         return ret;
6195 }
6196 #endif /* CONFIG_TRACER_MAX_TRACE */
6197
6198 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6199                                         unsigned long size, int cpu)
6200 {
6201         int ret;
6202
6203         /*
6204          * If kernel or user changes the size of the ring buffer
6205          * we use the size that was given, and we can forget about
6206          * expanding it later.
6207          */
6208         ring_buffer_expanded = true;
6209
6210         /* May be called before buffers are initialized */
6211         if (!tr->array_buffer.buffer)
6212                 return 0;
6213
6214         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6215         if (ret < 0)
6216                 return ret;
6217
6218 #ifdef CONFIG_TRACER_MAX_TRACE
6219         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6220             !tr->current_trace->use_max_tr)
6221                 goto out;
6222
6223         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6224         if (ret < 0) {
6225                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6226                                                      &tr->array_buffer, cpu);
6227                 if (r < 0) {
6228                         /*
6229                          * AARGH! We are left with different
6230                          * size max buffer!!!!
6231                          * The max buffer is our "snapshot" buffer.
6232                          * When a tracer needs a snapshot (one of the
6233                          * latency tracers), it swaps the max buffer
6234                          * with the saved snap shot. We succeeded to
6235                          * update the size of the main buffer, but failed to
6236                          * update the size of the max buffer. But when we tried
6237                          * to reset the main buffer to the original size, we
6238                          * failed there too. This is very unlikely to
6239                          * happen, but if it does, warn and kill all
6240                          * tracing.
6241                          */
6242                         WARN_ON(1);
6243                         tracing_disabled = 1;
6244                 }
6245                 return ret;
6246         }
6247
6248         if (cpu == RING_BUFFER_ALL_CPUS)
6249                 set_buffer_entries(&tr->max_buffer, size);
6250         else
6251                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6252
6253  out:
6254 #endif /* CONFIG_TRACER_MAX_TRACE */
6255
6256         if (cpu == RING_BUFFER_ALL_CPUS)
6257                 set_buffer_entries(&tr->array_buffer, size);
6258         else
6259                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6260
6261         return ret;
6262 }
6263
6264 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6265                                   unsigned long size, int cpu_id)
6266 {
6267         int ret;
6268
6269         mutex_lock(&trace_types_lock);
6270
6271         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6272                 /* make sure, this cpu is enabled in the mask */
6273                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6274                         ret = -EINVAL;
6275                         goto out;
6276                 }
6277         }
6278
6279         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6280         if (ret < 0)
6281                 ret = -ENOMEM;
6282
6283 out:
6284         mutex_unlock(&trace_types_lock);
6285
6286         return ret;
6287 }
6288
6289
6290 /**
6291  * tracing_update_buffers - used by tracing facility to expand ring buffers
6292  *
6293  * To save on memory when the tracing is never used on a system with it
6294  * configured in. The ring buffers are set to a minimum size. But once
6295  * a user starts to use the tracing facility, then they need to grow
6296  * to their default size.
6297  *
6298  * This function is to be called when a tracer is about to be used.
6299  */
6300 int tracing_update_buffers(void)
6301 {
6302         int ret = 0;
6303
6304         mutex_lock(&trace_types_lock);
6305         if (!ring_buffer_expanded)
6306                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6307                                                 RING_BUFFER_ALL_CPUS);
6308         mutex_unlock(&trace_types_lock);
6309
6310         return ret;
6311 }
6312
6313 struct trace_option_dentry;
6314
6315 static void
6316 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6317
6318 /*
6319  * Used to clear out the tracer before deletion of an instance.
6320  * Must have trace_types_lock held.
6321  */
6322 static void tracing_set_nop(struct trace_array *tr)
6323 {
6324         if (tr->current_trace == &nop_trace)
6325                 return;
6326         
6327         tr->current_trace->enabled--;
6328
6329         if (tr->current_trace->reset)
6330                 tr->current_trace->reset(tr);
6331
6332         tr->current_trace = &nop_trace;
6333 }
6334
6335 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6336 {
6337         /* Only enable if the directory has been created already. */
6338         if (!tr->dir)
6339                 return;
6340
6341         create_trace_option_files(tr, t);
6342 }
6343
6344 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6345 {
6346         struct tracer *t;
6347 #ifdef CONFIG_TRACER_MAX_TRACE
6348         bool had_max_tr;
6349 #endif
6350         int ret = 0;
6351
6352         mutex_lock(&trace_types_lock);
6353
6354         if (!ring_buffer_expanded) {
6355                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6356                                                 RING_BUFFER_ALL_CPUS);
6357                 if (ret < 0)
6358                         goto out;
6359                 ret = 0;
6360         }
6361
6362         for (t = trace_types; t; t = t->next) {
6363                 if (strcmp(t->name, buf) == 0)
6364                         break;
6365         }
6366         if (!t) {
6367                 ret = -EINVAL;
6368                 goto out;
6369         }
6370         if (t == tr->current_trace)
6371                 goto out;
6372
6373 #ifdef CONFIG_TRACER_SNAPSHOT
6374         if (t->use_max_tr) {
6375                 arch_spin_lock(&tr->max_lock);
6376                 if (tr->cond_snapshot)
6377                         ret = -EBUSY;
6378                 arch_spin_unlock(&tr->max_lock);
6379                 if (ret)
6380                         goto out;
6381         }
6382 #endif
6383         /* Some tracers won't work on kernel command line */
6384         if (system_state < SYSTEM_RUNNING && t->noboot) {
6385                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6386                         t->name);
6387                 goto out;
6388         }
6389
6390         /* Some tracers are only allowed for the top level buffer */
6391         if (!trace_ok_for_array(t, tr)) {
6392                 ret = -EINVAL;
6393                 goto out;
6394         }
6395
6396         /* If trace pipe files are being read, we can't change the tracer */
6397         if (tr->trace_ref) {
6398                 ret = -EBUSY;
6399                 goto out;
6400         }
6401
6402         trace_branch_disable();
6403
6404         tr->current_trace->enabled--;
6405
6406         if (tr->current_trace->reset)
6407                 tr->current_trace->reset(tr);
6408
6409         /* Current trace needs to be nop_trace before synchronize_rcu */
6410         tr->current_trace = &nop_trace;
6411
6412 #ifdef CONFIG_TRACER_MAX_TRACE
6413         had_max_tr = tr->allocated_snapshot;
6414
6415         if (had_max_tr && !t->use_max_tr) {
6416                 /*
6417                  * We need to make sure that the update_max_tr sees that
6418                  * current_trace changed to nop_trace to keep it from
6419                  * swapping the buffers after we resize it.
6420                  * The update_max_tr is called from interrupts disabled
6421                  * so a synchronized_sched() is sufficient.
6422                  */
6423                 synchronize_rcu();
6424                 free_snapshot(tr);
6425         }
6426 #endif
6427
6428 #ifdef CONFIG_TRACER_MAX_TRACE
6429         if (t->use_max_tr && !had_max_tr) {
6430                 ret = tracing_alloc_snapshot_instance(tr);
6431                 if (ret < 0)
6432                         goto out;
6433         }
6434 #endif
6435
6436         if (t->init) {
6437                 ret = tracer_init(t, tr);
6438                 if (ret)
6439                         goto out;
6440         }
6441
6442         tr->current_trace = t;
6443         tr->current_trace->enabled++;
6444         trace_branch_enable(tr);
6445  out:
6446         mutex_unlock(&trace_types_lock);
6447
6448         return ret;
6449 }
6450
6451 static ssize_t
6452 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6453                         size_t cnt, loff_t *ppos)
6454 {
6455         struct trace_array *tr = filp->private_data;
6456         char buf[MAX_TRACER_SIZE+1];
6457         int i;
6458         size_t ret;
6459         int err;
6460
6461         ret = cnt;
6462
6463         if (cnt > MAX_TRACER_SIZE)
6464                 cnt = MAX_TRACER_SIZE;
6465
6466         if (copy_from_user(buf, ubuf, cnt))
6467                 return -EFAULT;
6468
6469         buf[cnt] = 0;
6470
6471         /* strip ending whitespace. */
6472         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6473                 buf[i] = 0;
6474
6475         err = tracing_set_tracer(tr, buf);
6476         if (err)
6477                 return err;
6478
6479         *ppos += ret;
6480
6481         return ret;
6482 }
6483
6484 static ssize_t
6485 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6486                    size_t cnt, loff_t *ppos)
6487 {
6488         char buf[64];
6489         int r;
6490
6491         r = snprintf(buf, sizeof(buf), "%ld\n",
6492                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6493         if (r > sizeof(buf))
6494                 r = sizeof(buf);
6495         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6496 }
6497
6498 static ssize_t
6499 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6500                     size_t cnt, loff_t *ppos)
6501 {
6502         unsigned long val;
6503         int ret;
6504
6505         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6506         if (ret)
6507                 return ret;
6508
6509         *ptr = val * 1000;
6510
6511         return cnt;
6512 }
6513
6514 static ssize_t
6515 tracing_thresh_read(struct file *filp, char __user *ubuf,
6516                     size_t cnt, loff_t *ppos)
6517 {
6518         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6519 }
6520
6521 static ssize_t
6522 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6523                      size_t cnt, loff_t *ppos)
6524 {
6525         struct trace_array *tr = filp->private_data;
6526         int ret;
6527
6528         mutex_lock(&trace_types_lock);
6529         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6530         if (ret < 0)
6531                 goto out;
6532
6533         if (tr->current_trace->update_thresh) {
6534                 ret = tr->current_trace->update_thresh(tr);
6535                 if (ret < 0)
6536                         goto out;
6537         }
6538
6539         ret = cnt;
6540 out:
6541         mutex_unlock(&trace_types_lock);
6542
6543         return ret;
6544 }
6545
6546 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6547
6548 static ssize_t
6549 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6550                      size_t cnt, loff_t *ppos)
6551 {
6552         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6553 }
6554
6555 static ssize_t
6556 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6557                       size_t cnt, loff_t *ppos)
6558 {
6559         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6560 }
6561
6562 #endif
6563
6564 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6565 {
6566         struct trace_array *tr = inode->i_private;
6567         struct trace_iterator *iter;
6568         int ret;
6569
6570         ret = tracing_check_open_get_tr(tr);
6571         if (ret)
6572                 return ret;
6573
6574         mutex_lock(&trace_types_lock);
6575
6576         /* create a buffer to store the information to pass to userspace */
6577         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6578         if (!iter) {
6579                 ret = -ENOMEM;
6580                 __trace_array_put(tr);
6581                 goto out;
6582         }
6583
6584         trace_seq_init(&iter->seq);
6585         iter->trace = tr->current_trace;
6586
6587         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6588                 ret = -ENOMEM;
6589                 goto fail;
6590         }
6591
6592         /* trace pipe does not show start of buffer */
6593         cpumask_setall(iter->started);
6594
6595         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6596                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6597
6598         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6599         if (trace_clocks[tr->clock_id].in_ns)
6600                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6601
6602         iter->tr = tr;
6603         iter->array_buffer = &tr->array_buffer;
6604         iter->cpu_file = tracing_get_cpu(inode);
6605         mutex_init(&iter->mutex);
6606         filp->private_data = iter;
6607
6608         if (iter->trace->pipe_open)
6609                 iter->trace->pipe_open(iter);
6610
6611         nonseekable_open(inode, filp);
6612
6613         tr->trace_ref++;
6614 out:
6615         mutex_unlock(&trace_types_lock);
6616         return ret;
6617
6618 fail:
6619         kfree(iter);
6620         __trace_array_put(tr);
6621         mutex_unlock(&trace_types_lock);
6622         return ret;
6623 }
6624
6625 static int tracing_release_pipe(struct inode *inode, struct file *file)
6626 {
6627         struct trace_iterator *iter = file->private_data;
6628         struct trace_array *tr = inode->i_private;
6629
6630         mutex_lock(&trace_types_lock);
6631
6632         tr->trace_ref--;
6633
6634         if (iter->trace->pipe_close)
6635                 iter->trace->pipe_close(iter);
6636
6637         mutex_unlock(&trace_types_lock);
6638
6639         free_cpumask_var(iter->started);
6640         mutex_destroy(&iter->mutex);
6641         kfree(iter);
6642
6643         trace_array_put(tr);
6644
6645         return 0;
6646 }
6647
6648 static __poll_t
6649 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6650 {
6651         struct trace_array *tr = iter->tr;
6652
6653         /* Iterators are static, they should be filled or empty */
6654         if (trace_buffer_iter(iter, iter->cpu_file))
6655                 return EPOLLIN | EPOLLRDNORM;
6656
6657         if (tr->trace_flags & TRACE_ITER_BLOCK)
6658                 /*
6659                  * Always select as readable when in blocking mode
6660                  */
6661                 return EPOLLIN | EPOLLRDNORM;
6662         else
6663                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6664                                              filp, poll_table);
6665 }
6666
6667 static __poll_t
6668 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6669 {
6670         struct trace_iterator *iter = filp->private_data;
6671
6672         return trace_poll(iter, filp, poll_table);
6673 }
6674
6675 /* Must be called with iter->mutex held. */
6676 static int tracing_wait_pipe(struct file *filp)
6677 {
6678         struct trace_iterator *iter = filp->private_data;
6679         int ret;
6680
6681         while (trace_empty(iter)) {
6682
6683                 if ((filp->f_flags & O_NONBLOCK)) {
6684                         return -EAGAIN;
6685                 }
6686
6687                 /*
6688                  * We block until we read something and tracing is disabled.
6689                  * We still block if tracing is disabled, but we have never
6690                  * read anything. This allows a user to cat this file, and
6691                  * then enable tracing. But after we have read something,
6692                  * we give an EOF when tracing is again disabled.
6693                  *
6694                  * iter->pos will be 0 if we haven't read anything.
6695                  */
6696                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6697                         break;
6698
6699                 mutex_unlock(&iter->mutex);
6700
6701                 ret = wait_on_pipe(iter, 0);
6702
6703                 mutex_lock(&iter->mutex);
6704
6705                 if (ret)
6706                         return ret;
6707         }
6708
6709         return 1;
6710 }
6711
6712 /*
6713  * Consumer reader.
6714  */
6715 static ssize_t
6716 tracing_read_pipe(struct file *filp, char __user *ubuf,
6717                   size_t cnt, loff_t *ppos)
6718 {
6719         struct trace_iterator *iter = filp->private_data;
6720         ssize_t sret;
6721
6722         /*
6723          * Avoid more than one consumer on a single file descriptor
6724          * This is just a matter of traces coherency, the ring buffer itself
6725          * is protected.
6726          */
6727         mutex_lock(&iter->mutex);
6728
6729         /* return any leftover data */
6730         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6731         if (sret != -EBUSY)
6732                 goto out;
6733
6734         trace_seq_init(&iter->seq);
6735
6736         if (iter->trace->read) {
6737                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6738                 if (sret)
6739                         goto out;
6740         }
6741
6742 waitagain:
6743         sret = tracing_wait_pipe(filp);
6744         if (sret <= 0)
6745                 goto out;
6746
6747         /* stop when tracing is finished */
6748         if (trace_empty(iter)) {
6749                 sret = 0;
6750                 goto out;
6751         }
6752
6753         if (cnt >= PAGE_SIZE)
6754                 cnt = PAGE_SIZE - 1;
6755
6756         /* reset all but tr, trace, and overruns */
6757         trace_iterator_reset(iter);
6758         cpumask_clear(iter->started);
6759         trace_seq_init(&iter->seq);
6760
6761         trace_event_read_lock();
6762         trace_access_lock(iter->cpu_file);
6763         while (trace_find_next_entry_inc(iter) != NULL) {
6764                 enum print_line_t ret;
6765                 int save_len = iter->seq.seq.len;
6766
6767                 ret = print_trace_line(iter);
6768                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6769                         /* don't print partial lines */
6770                         iter->seq.seq.len = save_len;
6771                         break;
6772                 }
6773                 if (ret != TRACE_TYPE_NO_CONSUME)
6774                         trace_consume(iter);
6775
6776                 if (trace_seq_used(&iter->seq) >= cnt)
6777                         break;
6778
6779                 /*
6780                  * Setting the full flag means we reached the trace_seq buffer
6781                  * size and we should leave by partial output condition above.
6782                  * One of the trace_seq_* functions is not used properly.
6783                  */
6784                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6785                           iter->ent->type);
6786         }
6787         trace_access_unlock(iter->cpu_file);
6788         trace_event_read_unlock();
6789
6790         /* Now copy what we have to the user */
6791         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6792         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6793                 trace_seq_init(&iter->seq);
6794
6795         /*
6796          * If there was nothing to send to user, in spite of consuming trace
6797          * entries, go back to wait for more entries.
6798          */
6799         if (sret == -EBUSY)
6800                 goto waitagain;
6801
6802 out:
6803         mutex_unlock(&iter->mutex);
6804
6805         return sret;
6806 }
6807
6808 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6809                                      unsigned int idx)
6810 {
6811         __free_page(spd->pages[idx]);
6812 }
6813
6814 static size_t
6815 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6816 {
6817         size_t count;
6818         int save_len;
6819         int ret;
6820
6821         /* Seq buffer is page-sized, exactly what we need. */
6822         for (;;) {
6823                 save_len = iter->seq.seq.len;
6824                 ret = print_trace_line(iter);
6825
6826                 if (trace_seq_has_overflowed(&iter->seq)) {
6827                         iter->seq.seq.len = save_len;
6828                         break;
6829                 }
6830
6831                 /*
6832                  * This should not be hit, because it should only
6833                  * be set if the iter->seq overflowed. But check it
6834                  * anyway to be safe.
6835                  */
6836                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6837                         iter->seq.seq.len = save_len;
6838                         break;
6839                 }
6840
6841                 count = trace_seq_used(&iter->seq) - save_len;
6842                 if (rem < count) {
6843                         rem = 0;
6844                         iter->seq.seq.len = save_len;
6845                         break;
6846                 }
6847
6848                 if (ret != TRACE_TYPE_NO_CONSUME)
6849                         trace_consume(iter);
6850                 rem -= count;
6851                 if (!trace_find_next_entry_inc(iter))   {
6852                         rem = 0;
6853                         iter->ent = NULL;
6854                         break;
6855                 }
6856         }
6857
6858         return rem;
6859 }
6860
6861 static ssize_t tracing_splice_read_pipe(struct file *filp,
6862                                         loff_t *ppos,
6863                                         struct pipe_inode_info *pipe,
6864                                         size_t len,
6865                                         unsigned int flags)
6866 {
6867         struct page *pages_def[PIPE_DEF_BUFFERS];
6868         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6869         struct trace_iterator *iter = filp->private_data;
6870         struct splice_pipe_desc spd = {
6871                 .pages          = pages_def,
6872                 .partial        = partial_def,
6873                 .nr_pages       = 0, /* This gets updated below. */
6874                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6875                 .ops            = &default_pipe_buf_ops,
6876                 .spd_release    = tracing_spd_release_pipe,
6877         };
6878         ssize_t ret;
6879         size_t rem;
6880         unsigned int i;
6881
6882         if (splice_grow_spd(pipe, &spd))
6883                 return -ENOMEM;
6884
6885         mutex_lock(&iter->mutex);
6886
6887         if (iter->trace->splice_read) {
6888                 ret = iter->trace->splice_read(iter, filp,
6889                                                ppos, pipe, len, flags);
6890                 if (ret)
6891                         goto out_err;
6892         }
6893
6894         ret = tracing_wait_pipe(filp);
6895         if (ret <= 0)
6896                 goto out_err;
6897
6898         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6899                 ret = -EFAULT;
6900                 goto out_err;
6901         }
6902
6903         trace_event_read_lock();
6904         trace_access_lock(iter->cpu_file);
6905
6906         /* Fill as many pages as possible. */
6907         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6908                 spd.pages[i] = alloc_page(GFP_KERNEL);
6909                 if (!spd.pages[i])
6910                         break;
6911
6912                 rem = tracing_fill_pipe_page(rem, iter);
6913
6914                 /* Copy the data into the page, so we can start over. */
6915                 ret = trace_seq_to_buffer(&iter->seq,
6916                                           page_address(spd.pages[i]),
6917                                           trace_seq_used(&iter->seq));
6918                 if (ret < 0) {
6919                         __free_page(spd.pages[i]);
6920                         break;
6921                 }
6922                 spd.partial[i].offset = 0;
6923                 spd.partial[i].len = trace_seq_used(&iter->seq);
6924
6925                 trace_seq_init(&iter->seq);
6926         }
6927
6928         trace_access_unlock(iter->cpu_file);
6929         trace_event_read_unlock();
6930         mutex_unlock(&iter->mutex);
6931
6932         spd.nr_pages = i;
6933
6934         if (i)
6935                 ret = splice_to_pipe(pipe, &spd);
6936         else
6937                 ret = 0;
6938 out:
6939         splice_shrink_spd(&spd);
6940         return ret;
6941
6942 out_err:
6943         mutex_unlock(&iter->mutex);
6944         goto out;
6945 }
6946
6947 static ssize_t
6948 tracing_entries_read(struct file *filp, char __user *ubuf,
6949                      size_t cnt, loff_t *ppos)
6950 {
6951         struct inode *inode = file_inode(filp);
6952         struct trace_array *tr = inode->i_private;
6953         int cpu = tracing_get_cpu(inode);
6954         char buf[64];
6955         int r = 0;
6956         ssize_t ret;
6957
6958         mutex_lock(&trace_types_lock);
6959
6960         if (cpu == RING_BUFFER_ALL_CPUS) {
6961                 int cpu, buf_size_same;
6962                 unsigned long size;
6963
6964                 size = 0;
6965                 buf_size_same = 1;
6966                 /* check if all cpu sizes are same */
6967                 for_each_tracing_cpu(cpu) {
6968                         /* fill in the size from first enabled cpu */
6969                         if (size == 0)
6970                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6971                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6972                                 buf_size_same = 0;
6973                                 break;
6974                         }
6975                 }
6976
6977                 if (buf_size_same) {
6978                         if (!ring_buffer_expanded)
6979                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6980                                             size >> 10,
6981                                             trace_buf_size >> 10);
6982                         else
6983                                 r = sprintf(buf, "%lu\n", size >> 10);
6984                 } else
6985                         r = sprintf(buf, "X\n");
6986         } else
6987                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6988
6989         mutex_unlock(&trace_types_lock);
6990
6991         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6992         return ret;
6993 }
6994
6995 static ssize_t
6996 tracing_entries_write(struct file *filp, const char __user *ubuf,
6997                       size_t cnt, loff_t *ppos)
6998 {
6999         struct inode *inode = file_inode(filp);
7000         struct trace_array *tr = inode->i_private;
7001         unsigned long val;
7002         int ret;
7003
7004         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7005         if (ret)
7006                 return ret;
7007
7008         /* must have at least 1 entry */
7009         if (!val)
7010                 return -EINVAL;
7011
7012         /* value is in KB */
7013         val <<= 10;
7014         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7015         if (ret < 0)
7016                 return ret;
7017
7018         *ppos += cnt;
7019
7020         return cnt;
7021 }
7022
7023 static ssize_t
7024 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7025                                 size_t cnt, loff_t *ppos)
7026 {
7027         struct trace_array *tr = filp->private_data;
7028         char buf[64];
7029         int r, cpu;
7030         unsigned long size = 0, expanded_size = 0;
7031
7032         mutex_lock(&trace_types_lock);
7033         for_each_tracing_cpu(cpu) {
7034                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7035                 if (!ring_buffer_expanded)
7036                         expanded_size += trace_buf_size >> 10;
7037         }
7038         if (ring_buffer_expanded)
7039                 r = sprintf(buf, "%lu\n", size);
7040         else
7041                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7042         mutex_unlock(&trace_types_lock);
7043
7044         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7045 }
7046
7047 static ssize_t
7048 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7049                           size_t cnt, loff_t *ppos)
7050 {
7051         /*
7052          * There is no need to read what the user has written, this function
7053          * is just to make sure that there is no error when "echo" is used
7054          */
7055
7056         *ppos += cnt;
7057
7058         return cnt;
7059 }
7060
7061 static int
7062 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7063 {
7064         struct trace_array *tr = inode->i_private;
7065
7066         /* disable tracing ? */
7067         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7068                 tracer_tracing_off(tr);
7069         /* resize the ring buffer to 0 */
7070         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7071
7072         trace_array_put(tr);
7073
7074         return 0;
7075 }
7076
7077 static ssize_t
7078 tracing_mark_write(struct file *filp, const char __user *ubuf,
7079                                         size_t cnt, loff_t *fpos)
7080 {
7081         struct trace_array *tr = filp->private_data;
7082         struct ring_buffer_event *event;
7083         enum event_trigger_type tt = ETT_NONE;
7084         struct trace_buffer *buffer;
7085         struct print_entry *entry;
7086         ssize_t written;
7087         int size;
7088         int len;
7089
7090 /* Used in tracing_mark_raw_write() as well */
7091 #define FAULTED_STR "<faulted>"
7092 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7093
7094         if (tracing_disabled)
7095                 return -EINVAL;
7096
7097         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7098                 return -EINVAL;
7099
7100         if (cnt > TRACE_BUF_SIZE)
7101                 cnt = TRACE_BUF_SIZE;
7102
7103         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7104
7105         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7106
7107         /* If less than "<faulted>", then make sure we can still add that */
7108         if (cnt < FAULTED_SIZE)
7109                 size += FAULTED_SIZE - cnt;
7110
7111         buffer = tr->array_buffer.buffer;
7112         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7113                                             tracing_gen_ctx());
7114         if (unlikely(!event))
7115                 /* Ring buffer disabled, return as if not open for write */
7116                 return -EBADF;
7117
7118         entry = ring_buffer_event_data(event);
7119         entry->ip = _THIS_IP_;
7120
7121         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7122         if (len) {
7123                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7124                 cnt = FAULTED_SIZE;
7125                 written = -EFAULT;
7126         } else
7127                 written = cnt;
7128
7129         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7130                 /* do not add \n before testing triggers, but add \0 */
7131                 entry->buf[cnt] = '\0';
7132                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7133         }
7134
7135         if (entry->buf[cnt - 1] != '\n') {
7136                 entry->buf[cnt] = '\n';
7137                 entry->buf[cnt + 1] = '\0';
7138         } else
7139                 entry->buf[cnt] = '\0';
7140
7141         if (static_branch_unlikely(&trace_marker_exports_enabled))
7142                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7143         __buffer_unlock_commit(buffer, event);
7144
7145         if (tt)
7146                 event_triggers_post_call(tr->trace_marker_file, tt);
7147
7148         return written;
7149 }
7150
7151 /* Limit it for now to 3K (including tag) */
7152 #define RAW_DATA_MAX_SIZE (1024*3)
7153
7154 static ssize_t
7155 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7156                                         size_t cnt, loff_t *fpos)
7157 {
7158         struct trace_array *tr = filp->private_data;
7159         struct ring_buffer_event *event;
7160         struct trace_buffer *buffer;
7161         struct raw_data_entry *entry;
7162         ssize_t written;
7163         int size;
7164         int len;
7165
7166 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7167
7168         if (tracing_disabled)
7169                 return -EINVAL;
7170
7171         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7172                 return -EINVAL;
7173
7174         /* The marker must at least have a tag id */
7175         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7176                 return -EINVAL;
7177
7178         if (cnt > TRACE_BUF_SIZE)
7179                 cnt = TRACE_BUF_SIZE;
7180
7181         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7182
7183         size = sizeof(*entry) + cnt;
7184         if (cnt < FAULT_SIZE_ID)
7185                 size += FAULT_SIZE_ID - cnt;
7186
7187         buffer = tr->array_buffer.buffer;
7188         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7189                                             tracing_gen_ctx());
7190         if (!event)
7191                 /* Ring buffer disabled, return as if not open for write */
7192                 return -EBADF;
7193
7194         entry = ring_buffer_event_data(event);
7195
7196         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7197         if (len) {
7198                 entry->id = -1;
7199                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7200                 written = -EFAULT;
7201         } else
7202                 written = cnt;
7203
7204         __buffer_unlock_commit(buffer, event);
7205
7206         return written;
7207 }
7208
7209 static int tracing_clock_show(struct seq_file *m, void *v)
7210 {
7211         struct trace_array *tr = m->private;
7212         int i;
7213
7214         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7215                 seq_printf(m,
7216                         "%s%s%s%s", i ? " " : "",
7217                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7218                         i == tr->clock_id ? "]" : "");
7219         seq_putc(m, '\n');
7220
7221         return 0;
7222 }
7223
7224 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7225 {
7226         int i;
7227
7228         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7229                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7230                         break;
7231         }
7232         if (i == ARRAY_SIZE(trace_clocks))
7233                 return -EINVAL;
7234
7235         mutex_lock(&trace_types_lock);
7236
7237         tr->clock_id = i;
7238
7239         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7240
7241         /*
7242          * New clock may not be consistent with the previous clock.
7243          * Reset the buffer so that it doesn't have incomparable timestamps.
7244          */
7245         tracing_reset_online_cpus(&tr->array_buffer);
7246
7247 #ifdef CONFIG_TRACER_MAX_TRACE
7248         if (tr->max_buffer.buffer)
7249                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7250         tracing_reset_online_cpus(&tr->max_buffer);
7251 #endif
7252
7253         mutex_unlock(&trace_types_lock);
7254
7255         return 0;
7256 }
7257
7258 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7259                                    size_t cnt, loff_t *fpos)
7260 {
7261         struct seq_file *m = filp->private_data;
7262         struct trace_array *tr = m->private;
7263         char buf[64];
7264         const char *clockstr;
7265         int ret;
7266
7267         if (cnt >= sizeof(buf))
7268                 return -EINVAL;
7269
7270         if (copy_from_user(buf, ubuf, cnt))
7271                 return -EFAULT;
7272
7273         buf[cnt] = 0;
7274
7275         clockstr = strstrip(buf);
7276
7277         ret = tracing_set_clock(tr, clockstr);
7278         if (ret)
7279                 return ret;
7280
7281         *fpos += cnt;
7282
7283         return cnt;
7284 }
7285
7286 static int tracing_clock_open(struct inode *inode, struct file *file)
7287 {
7288         struct trace_array *tr = inode->i_private;
7289         int ret;
7290
7291         ret = tracing_check_open_get_tr(tr);
7292         if (ret)
7293                 return ret;
7294
7295         ret = single_open(file, tracing_clock_show, inode->i_private);
7296         if (ret < 0)
7297                 trace_array_put(tr);
7298
7299         return ret;
7300 }
7301
7302 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7303 {
7304         struct trace_array *tr = m->private;
7305
7306         mutex_lock(&trace_types_lock);
7307
7308         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7309                 seq_puts(m, "delta [absolute]\n");
7310         else
7311                 seq_puts(m, "[delta] absolute\n");
7312
7313         mutex_unlock(&trace_types_lock);
7314
7315         return 0;
7316 }
7317
7318 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7319 {
7320         struct trace_array *tr = inode->i_private;
7321         int ret;
7322
7323         ret = tracing_check_open_get_tr(tr);
7324         if (ret)
7325                 return ret;
7326
7327         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7328         if (ret < 0)
7329                 trace_array_put(tr);
7330
7331         return ret;
7332 }
7333
7334 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7335 {
7336         if (rbe == this_cpu_read(trace_buffered_event))
7337                 return ring_buffer_time_stamp(buffer);
7338
7339         return ring_buffer_event_time_stamp(buffer, rbe);
7340 }
7341
7342 /*
7343  * Set or disable using the per CPU trace_buffer_event when possible.
7344  */
7345 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7346 {
7347         int ret = 0;
7348
7349         mutex_lock(&trace_types_lock);
7350
7351         if (set && tr->no_filter_buffering_ref++)
7352                 goto out;
7353
7354         if (!set) {
7355                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7356                         ret = -EINVAL;
7357                         goto out;
7358                 }
7359
7360                 --tr->no_filter_buffering_ref;
7361         }
7362  out:
7363         mutex_unlock(&trace_types_lock);
7364
7365         return ret;
7366 }
7367
7368 struct ftrace_buffer_info {
7369         struct trace_iterator   iter;
7370         void                    *spare;
7371         unsigned int            spare_cpu;
7372         unsigned int            read;
7373 };
7374
7375 #ifdef CONFIG_TRACER_SNAPSHOT
7376 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7377 {
7378         struct trace_array *tr = inode->i_private;
7379         struct trace_iterator *iter;
7380         struct seq_file *m;
7381         int ret;
7382
7383         ret = tracing_check_open_get_tr(tr);
7384         if (ret)
7385                 return ret;
7386
7387         if (file->f_mode & FMODE_READ) {
7388                 iter = __tracing_open(inode, file, true);
7389                 if (IS_ERR(iter))
7390                         ret = PTR_ERR(iter);
7391         } else {
7392                 /* Writes still need the seq_file to hold the private data */
7393                 ret = -ENOMEM;
7394                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7395                 if (!m)
7396                         goto out;
7397                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7398                 if (!iter) {
7399                         kfree(m);
7400                         goto out;
7401                 }
7402                 ret = 0;
7403
7404                 iter->tr = tr;
7405                 iter->array_buffer = &tr->max_buffer;
7406                 iter->cpu_file = tracing_get_cpu(inode);
7407                 m->private = iter;
7408                 file->private_data = m;
7409         }
7410 out:
7411         if (ret < 0)
7412                 trace_array_put(tr);
7413
7414         return ret;
7415 }
7416
7417 static ssize_t
7418 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7419                        loff_t *ppos)
7420 {
7421         struct seq_file *m = filp->private_data;
7422         struct trace_iterator *iter = m->private;
7423         struct trace_array *tr = iter->tr;
7424         unsigned long val;
7425         int ret;
7426
7427         ret = tracing_update_buffers();
7428         if (ret < 0)
7429                 return ret;
7430
7431         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7432         if (ret)
7433                 return ret;
7434
7435         mutex_lock(&trace_types_lock);
7436
7437         if (tr->current_trace->use_max_tr) {
7438                 ret = -EBUSY;
7439                 goto out;
7440         }
7441
7442         arch_spin_lock(&tr->max_lock);
7443         if (tr->cond_snapshot)
7444                 ret = -EBUSY;
7445         arch_spin_unlock(&tr->max_lock);
7446         if (ret)
7447                 goto out;
7448
7449         switch (val) {
7450         case 0:
7451                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7452                         ret = -EINVAL;
7453                         break;
7454                 }
7455                 if (tr->allocated_snapshot)
7456                         free_snapshot(tr);
7457                 break;
7458         case 1:
7459 /* Only allow per-cpu swap if the ring buffer supports it */
7460 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7461                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7462                         ret = -EINVAL;
7463                         break;
7464                 }
7465 #endif
7466                 if (tr->allocated_snapshot)
7467                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7468                                         &tr->array_buffer, iter->cpu_file);
7469                 else
7470                         ret = tracing_alloc_snapshot_instance(tr);
7471                 if (ret < 0)
7472                         break;
7473                 local_irq_disable();
7474                 /* Now, we're going to swap */
7475                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7476                         update_max_tr(tr, current, smp_processor_id(), NULL);
7477                 else
7478                         update_max_tr_single(tr, current, iter->cpu_file);
7479                 local_irq_enable();
7480                 break;
7481         default:
7482                 if (tr->allocated_snapshot) {
7483                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7484                                 tracing_reset_online_cpus(&tr->max_buffer);
7485                         else
7486                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7487                 }
7488                 break;
7489         }
7490
7491         if (ret >= 0) {
7492                 *ppos += cnt;
7493                 ret = cnt;
7494         }
7495 out:
7496         mutex_unlock(&trace_types_lock);
7497         return ret;
7498 }
7499
7500 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7501 {
7502         struct seq_file *m = file->private_data;
7503         int ret;
7504
7505         ret = tracing_release(inode, file);
7506
7507         if (file->f_mode & FMODE_READ)
7508                 return ret;
7509
7510         /* If write only, the seq_file is just a stub */
7511         if (m)
7512                 kfree(m->private);
7513         kfree(m);
7514
7515         return 0;
7516 }
7517
7518 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7519 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7520                                     size_t count, loff_t *ppos);
7521 static int tracing_buffers_release(struct inode *inode, struct file *file);
7522 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7523                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7524
7525 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7526 {
7527         struct ftrace_buffer_info *info;
7528         int ret;
7529
7530         /* The following checks for tracefs lockdown */
7531         ret = tracing_buffers_open(inode, filp);
7532         if (ret < 0)
7533                 return ret;
7534
7535         info = filp->private_data;
7536
7537         if (info->iter.trace->use_max_tr) {
7538                 tracing_buffers_release(inode, filp);
7539                 return -EBUSY;
7540         }
7541
7542         info->iter.snapshot = true;
7543         info->iter.array_buffer = &info->iter.tr->max_buffer;
7544
7545         return ret;
7546 }
7547
7548 #endif /* CONFIG_TRACER_SNAPSHOT */
7549
7550
7551 static const struct file_operations tracing_thresh_fops = {
7552         .open           = tracing_open_generic,
7553         .read           = tracing_thresh_read,
7554         .write          = tracing_thresh_write,
7555         .llseek         = generic_file_llseek,
7556 };
7557
7558 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7559 static const struct file_operations tracing_max_lat_fops = {
7560         .open           = tracing_open_generic,
7561         .read           = tracing_max_lat_read,
7562         .write          = tracing_max_lat_write,
7563         .llseek         = generic_file_llseek,
7564 };
7565 #endif
7566
7567 static const struct file_operations set_tracer_fops = {
7568         .open           = tracing_open_generic,
7569         .read           = tracing_set_trace_read,
7570         .write          = tracing_set_trace_write,
7571         .llseek         = generic_file_llseek,
7572 };
7573
7574 static const struct file_operations tracing_pipe_fops = {
7575         .open           = tracing_open_pipe,
7576         .poll           = tracing_poll_pipe,
7577         .read           = tracing_read_pipe,
7578         .splice_read    = tracing_splice_read_pipe,
7579         .release        = tracing_release_pipe,
7580         .llseek         = no_llseek,
7581 };
7582
7583 static const struct file_operations tracing_entries_fops = {
7584         .open           = tracing_open_generic_tr,
7585         .read           = tracing_entries_read,
7586         .write          = tracing_entries_write,
7587         .llseek         = generic_file_llseek,
7588         .release        = tracing_release_generic_tr,
7589 };
7590
7591 static const struct file_operations tracing_total_entries_fops = {
7592         .open           = tracing_open_generic_tr,
7593         .read           = tracing_total_entries_read,
7594         .llseek         = generic_file_llseek,
7595         .release        = tracing_release_generic_tr,
7596 };
7597
7598 static const struct file_operations tracing_free_buffer_fops = {
7599         .open           = tracing_open_generic_tr,
7600         .write          = tracing_free_buffer_write,
7601         .release        = tracing_free_buffer_release,
7602 };
7603
7604 static const struct file_operations tracing_mark_fops = {
7605         .open           = tracing_mark_open,
7606         .write          = tracing_mark_write,
7607         .release        = tracing_release_generic_tr,
7608 };
7609
7610 static const struct file_operations tracing_mark_raw_fops = {
7611         .open           = tracing_mark_open,
7612         .write          = tracing_mark_raw_write,
7613         .release        = tracing_release_generic_tr,
7614 };
7615
7616 static const struct file_operations trace_clock_fops = {
7617         .open           = tracing_clock_open,
7618         .read           = seq_read,
7619         .llseek         = seq_lseek,
7620         .release        = tracing_single_release_tr,
7621         .write          = tracing_clock_write,
7622 };
7623
7624 static const struct file_operations trace_time_stamp_mode_fops = {
7625         .open           = tracing_time_stamp_mode_open,
7626         .read           = seq_read,
7627         .llseek         = seq_lseek,
7628         .release        = tracing_single_release_tr,
7629 };
7630
7631 #ifdef CONFIG_TRACER_SNAPSHOT
7632 static const struct file_operations snapshot_fops = {
7633         .open           = tracing_snapshot_open,
7634         .read           = seq_read,
7635         .write          = tracing_snapshot_write,
7636         .llseek         = tracing_lseek,
7637         .release        = tracing_snapshot_release,
7638 };
7639
7640 static const struct file_operations snapshot_raw_fops = {
7641         .open           = snapshot_raw_open,
7642         .read           = tracing_buffers_read,
7643         .release        = tracing_buffers_release,
7644         .splice_read    = tracing_buffers_splice_read,
7645         .llseek         = no_llseek,
7646 };
7647
7648 #endif /* CONFIG_TRACER_SNAPSHOT */
7649
7650 /*
7651  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7652  * @filp: The active open file structure
7653  * @ubuf: The userspace provided buffer to read value into
7654  * @cnt: The maximum number of bytes to read
7655  * @ppos: The current "file" position
7656  *
7657  * This function implements the write interface for a struct trace_min_max_param.
7658  * The filp->private_data must point to a trace_min_max_param structure that
7659  * defines where to write the value, the min and the max acceptable values,
7660  * and a lock to protect the write.
7661  */
7662 static ssize_t
7663 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7664 {
7665         struct trace_min_max_param *param = filp->private_data;
7666         u64 val;
7667         int err;
7668
7669         if (!param)
7670                 return -EFAULT;
7671
7672         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7673         if (err)
7674                 return err;
7675
7676         if (param->lock)
7677                 mutex_lock(param->lock);
7678
7679         if (param->min && val < *param->min)
7680                 err = -EINVAL;
7681
7682         if (param->max && val > *param->max)
7683                 err = -EINVAL;
7684
7685         if (!err)
7686                 *param->val = val;
7687
7688         if (param->lock)
7689                 mutex_unlock(param->lock);
7690
7691         if (err)
7692                 return err;
7693
7694         return cnt;
7695 }
7696
7697 /*
7698  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7699  * @filp: The active open file structure
7700  * @ubuf: The userspace provided buffer to read value into
7701  * @cnt: The maximum number of bytes to read
7702  * @ppos: The current "file" position
7703  *
7704  * This function implements the read interface for a struct trace_min_max_param.
7705  * The filp->private_data must point to a trace_min_max_param struct with valid
7706  * data.
7707  */
7708 static ssize_t
7709 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7710 {
7711         struct trace_min_max_param *param = filp->private_data;
7712         char buf[U64_STR_SIZE];
7713         int len;
7714         u64 val;
7715
7716         if (!param)
7717                 return -EFAULT;
7718
7719         val = *param->val;
7720
7721         if (cnt > sizeof(buf))
7722                 cnt = sizeof(buf);
7723
7724         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7725
7726         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7727 }
7728
7729 const struct file_operations trace_min_max_fops = {
7730         .open           = tracing_open_generic,
7731         .read           = trace_min_max_read,
7732         .write          = trace_min_max_write,
7733 };
7734
7735 #define TRACING_LOG_ERRS_MAX    8
7736 #define TRACING_LOG_LOC_MAX     128
7737
7738 #define CMD_PREFIX "  Command: "
7739
7740 struct err_info {
7741         const char      **errs; /* ptr to loc-specific array of err strings */
7742         u8              type;   /* index into errs -> specific err string */
7743         u16             pos;    /* caret position */
7744         u64             ts;
7745 };
7746
7747 struct tracing_log_err {
7748         struct list_head        list;
7749         struct err_info         info;
7750         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7751         char                    *cmd;                     /* what caused err */
7752 };
7753
7754 static DEFINE_MUTEX(tracing_err_log_lock);
7755
7756 static struct tracing_log_err *alloc_tracing_log_err(int len)
7757 {
7758         struct tracing_log_err *err;
7759
7760         err = kzalloc(sizeof(*err), GFP_KERNEL);
7761         if (!err)
7762                 return ERR_PTR(-ENOMEM);
7763
7764         err->cmd = kzalloc(len, GFP_KERNEL);
7765         if (!err->cmd) {
7766                 kfree(err);
7767                 return ERR_PTR(-ENOMEM);
7768         }
7769
7770         return err;
7771 }
7772
7773 static void free_tracing_log_err(struct tracing_log_err *err)
7774 {
7775         kfree(err->cmd);
7776         kfree(err);
7777 }
7778
7779 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7780                                                    int len)
7781 {
7782         struct tracing_log_err *err;
7783
7784         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7785                 err = alloc_tracing_log_err(len);
7786                 if (PTR_ERR(err) != -ENOMEM)
7787                         tr->n_err_log_entries++;
7788
7789                 return err;
7790         }
7791
7792         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7793         kfree(err->cmd);
7794         err->cmd = kzalloc(len, GFP_KERNEL);
7795         if (!err->cmd)
7796                 return ERR_PTR(-ENOMEM);
7797         list_del(&err->list);
7798
7799         return err;
7800 }
7801
7802 /**
7803  * err_pos - find the position of a string within a command for error careting
7804  * @cmd: The tracing command that caused the error
7805  * @str: The string to position the caret at within @cmd
7806  *
7807  * Finds the position of the first occurrence of @str within @cmd.  The
7808  * return value can be passed to tracing_log_err() for caret placement
7809  * within @cmd.
7810  *
7811  * Returns the index within @cmd of the first occurrence of @str or 0
7812  * if @str was not found.
7813  */
7814 unsigned int err_pos(char *cmd, const char *str)
7815 {
7816         char *found;
7817
7818         if (WARN_ON(!strlen(cmd)))
7819                 return 0;
7820
7821         found = strstr(cmd, str);
7822         if (found)
7823                 return found - cmd;
7824
7825         return 0;
7826 }
7827
7828 /**
7829  * tracing_log_err - write an error to the tracing error log
7830  * @tr: The associated trace array for the error (NULL for top level array)
7831  * @loc: A string describing where the error occurred
7832  * @cmd: The tracing command that caused the error
7833  * @errs: The array of loc-specific static error strings
7834  * @type: The index into errs[], which produces the specific static err string
7835  * @pos: The position the caret should be placed in the cmd
7836  *
7837  * Writes an error into tracing/error_log of the form:
7838  *
7839  * <loc>: error: <text>
7840  *   Command: <cmd>
7841  *              ^
7842  *
7843  * tracing/error_log is a small log file containing the last
7844  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7845  * unless there has been a tracing error, and the error log can be
7846  * cleared and have its memory freed by writing the empty string in
7847  * truncation mode to it i.e. echo > tracing/error_log.
7848  *
7849  * NOTE: the @errs array along with the @type param are used to
7850  * produce a static error string - this string is not copied and saved
7851  * when the error is logged - only a pointer to it is saved.  See
7852  * existing callers for examples of how static strings are typically
7853  * defined for use with tracing_log_err().
7854  */
7855 void tracing_log_err(struct trace_array *tr,
7856                      const char *loc, const char *cmd,
7857                      const char **errs, u8 type, u16 pos)
7858 {
7859         struct tracing_log_err *err;
7860         int len = 0;
7861
7862         if (!tr)
7863                 tr = &global_trace;
7864
7865         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7866
7867         mutex_lock(&tracing_err_log_lock);
7868         err = get_tracing_log_err(tr, len);
7869         if (PTR_ERR(err) == -ENOMEM) {
7870                 mutex_unlock(&tracing_err_log_lock);
7871                 return;
7872         }
7873
7874         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7875         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7876
7877         err->info.errs = errs;
7878         err->info.type = type;
7879         err->info.pos = pos;
7880         err->info.ts = local_clock();
7881
7882         list_add_tail(&err->list, &tr->err_log);
7883         mutex_unlock(&tracing_err_log_lock);
7884 }
7885
7886 static void clear_tracing_err_log(struct trace_array *tr)
7887 {
7888         struct tracing_log_err *err, *next;
7889
7890         mutex_lock(&tracing_err_log_lock);
7891         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7892                 list_del(&err->list);
7893                 free_tracing_log_err(err);
7894         }
7895
7896         tr->n_err_log_entries = 0;
7897         mutex_unlock(&tracing_err_log_lock);
7898 }
7899
7900 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7901 {
7902         struct trace_array *tr = m->private;
7903
7904         mutex_lock(&tracing_err_log_lock);
7905
7906         return seq_list_start(&tr->err_log, *pos);
7907 }
7908
7909 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7910 {
7911         struct trace_array *tr = m->private;
7912
7913         return seq_list_next(v, &tr->err_log, pos);
7914 }
7915
7916 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7917 {
7918         mutex_unlock(&tracing_err_log_lock);
7919 }
7920
7921 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7922 {
7923         u16 i;
7924
7925         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7926                 seq_putc(m, ' ');
7927         for (i = 0; i < pos; i++)
7928                 seq_putc(m, ' ');
7929         seq_puts(m, "^\n");
7930 }
7931
7932 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7933 {
7934         struct tracing_log_err *err = v;
7935
7936         if (err) {
7937                 const char *err_text = err->info.errs[err->info.type];
7938                 u64 sec = err->info.ts;
7939                 u32 nsec;
7940
7941                 nsec = do_div(sec, NSEC_PER_SEC);
7942                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7943                            err->loc, err_text);
7944                 seq_printf(m, "%s", err->cmd);
7945                 tracing_err_log_show_pos(m, err->info.pos);
7946         }
7947
7948         return 0;
7949 }
7950
7951 static const struct seq_operations tracing_err_log_seq_ops = {
7952         .start  = tracing_err_log_seq_start,
7953         .next   = tracing_err_log_seq_next,
7954         .stop   = tracing_err_log_seq_stop,
7955         .show   = tracing_err_log_seq_show
7956 };
7957
7958 static int tracing_err_log_open(struct inode *inode, struct file *file)
7959 {
7960         struct trace_array *tr = inode->i_private;
7961         int ret = 0;
7962
7963         ret = tracing_check_open_get_tr(tr);
7964         if (ret)
7965                 return ret;
7966
7967         /* If this file was opened for write, then erase contents */
7968         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7969                 clear_tracing_err_log(tr);
7970
7971         if (file->f_mode & FMODE_READ) {
7972                 ret = seq_open(file, &tracing_err_log_seq_ops);
7973                 if (!ret) {
7974                         struct seq_file *m = file->private_data;
7975                         m->private = tr;
7976                 } else {
7977                         trace_array_put(tr);
7978                 }
7979         }
7980         return ret;
7981 }
7982
7983 static ssize_t tracing_err_log_write(struct file *file,
7984                                      const char __user *buffer,
7985                                      size_t count, loff_t *ppos)
7986 {
7987         return count;
7988 }
7989
7990 static int tracing_err_log_release(struct inode *inode, struct file *file)
7991 {
7992         struct trace_array *tr = inode->i_private;
7993
7994         trace_array_put(tr);
7995
7996         if (file->f_mode & FMODE_READ)
7997                 seq_release(inode, file);
7998
7999         return 0;
8000 }
8001
8002 static const struct file_operations tracing_err_log_fops = {
8003         .open           = tracing_err_log_open,
8004         .write          = tracing_err_log_write,
8005         .read           = seq_read,
8006         .llseek         = seq_lseek,
8007         .release        = tracing_err_log_release,
8008 };
8009
8010 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8011 {
8012         struct trace_array *tr = inode->i_private;
8013         struct ftrace_buffer_info *info;
8014         int ret;
8015
8016         ret = tracing_check_open_get_tr(tr);
8017         if (ret)
8018                 return ret;
8019
8020         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8021         if (!info) {
8022                 trace_array_put(tr);
8023                 return -ENOMEM;
8024         }
8025
8026         mutex_lock(&trace_types_lock);
8027
8028         info->iter.tr           = tr;
8029         info->iter.cpu_file     = tracing_get_cpu(inode);
8030         info->iter.trace        = tr->current_trace;
8031         info->iter.array_buffer = &tr->array_buffer;
8032         info->spare             = NULL;
8033         /* Force reading ring buffer for first read */
8034         info->read              = (unsigned int)-1;
8035
8036         filp->private_data = info;
8037
8038         tr->trace_ref++;
8039
8040         mutex_unlock(&trace_types_lock);
8041
8042         ret = nonseekable_open(inode, filp);
8043         if (ret < 0)
8044                 trace_array_put(tr);
8045
8046         return ret;
8047 }
8048
8049 static __poll_t
8050 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8051 {
8052         struct ftrace_buffer_info *info = filp->private_data;
8053         struct trace_iterator *iter = &info->iter;
8054
8055         return trace_poll(iter, filp, poll_table);
8056 }
8057
8058 static ssize_t
8059 tracing_buffers_read(struct file *filp, char __user *ubuf,
8060                      size_t count, loff_t *ppos)
8061 {
8062         struct ftrace_buffer_info *info = filp->private_data;
8063         struct trace_iterator *iter = &info->iter;
8064         ssize_t ret = 0;
8065         ssize_t size;
8066
8067         if (!count)
8068                 return 0;
8069
8070 #ifdef CONFIG_TRACER_MAX_TRACE
8071         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8072                 return -EBUSY;
8073 #endif
8074
8075         if (!info->spare) {
8076                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8077                                                           iter->cpu_file);
8078                 if (IS_ERR(info->spare)) {
8079                         ret = PTR_ERR(info->spare);
8080                         info->spare = NULL;
8081                 } else {
8082                         info->spare_cpu = iter->cpu_file;
8083                 }
8084         }
8085         if (!info->spare)
8086                 return ret;
8087
8088         /* Do we have previous read data to read? */
8089         if (info->read < PAGE_SIZE)
8090                 goto read;
8091
8092  again:
8093         trace_access_lock(iter->cpu_file);
8094         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8095                                     &info->spare,
8096                                     count,
8097                                     iter->cpu_file, 0);
8098         trace_access_unlock(iter->cpu_file);
8099
8100         if (ret < 0) {
8101                 if (trace_empty(iter)) {
8102                         if ((filp->f_flags & O_NONBLOCK))
8103                                 return -EAGAIN;
8104
8105                         ret = wait_on_pipe(iter, 0);
8106                         if (ret)
8107                                 return ret;
8108
8109                         goto again;
8110                 }
8111                 return 0;
8112         }
8113
8114         info->read = 0;
8115  read:
8116         size = PAGE_SIZE - info->read;
8117         if (size > count)
8118                 size = count;
8119
8120         ret = copy_to_user(ubuf, info->spare + info->read, size);
8121         if (ret == size)
8122                 return -EFAULT;
8123
8124         size -= ret;
8125
8126         *ppos += size;
8127         info->read += size;
8128
8129         return size;
8130 }
8131
8132 static int tracing_buffers_release(struct inode *inode, struct file *file)
8133 {
8134         struct ftrace_buffer_info *info = file->private_data;
8135         struct trace_iterator *iter = &info->iter;
8136
8137         mutex_lock(&trace_types_lock);
8138
8139         iter->tr->trace_ref--;
8140
8141         __trace_array_put(iter->tr);
8142
8143         if (info->spare)
8144                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8145                                            info->spare_cpu, info->spare);
8146         kvfree(info);
8147
8148         mutex_unlock(&trace_types_lock);
8149
8150         return 0;
8151 }
8152
8153 struct buffer_ref {
8154         struct trace_buffer     *buffer;
8155         void                    *page;
8156         int                     cpu;
8157         refcount_t              refcount;
8158 };
8159
8160 static void buffer_ref_release(struct buffer_ref *ref)
8161 {
8162         if (!refcount_dec_and_test(&ref->refcount))
8163                 return;
8164         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8165         kfree(ref);
8166 }
8167
8168 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8169                                     struct pipe_buffer *buf)
8170 {
8171         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8172
8173         buffer_ref_release(ref);
8174         buf->private = 0;
8175 }
8176
8177 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8178                                 struct pipe_buffer *buf)
8179 {
8180         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8181
8182         if (refcount_read(&ref->refcount) > INT_MAX/2)
8183                 return false;
8184
8185         refcount_inc(&ref->refcount);
8186         return true;
8187 }
8188
8189 /* Pipe buffer operations for a buffer. */
8190 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8191         .release                = buffer_pipe_buf_release,
8192         .get                    = buffer_pipe_buf_get,
8193 };
8194
8195 /*
8196  * Callback from splice_to_pipe(), if we need to release some pages
8197  * at the end of the spd in case we error'ed out in filling the pipe.
8198  */
8199 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8200 {
8201         struct buffer_ref *ref =
8202                 (struct buffer_ref *)spd->partial[i].private;
8203
8204         buffer_ref_release(ref);
8205         spd->partial[i].private = 0;
8206 }
8207
8208 static ssize_t
8209 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8210                             struct pipe_inode_info *pipe, size_t len,
8211                             unsigned int flags)
8212 {
8213         struct ftrace_buffer_info *info = file->private_data;
8214         struct trace_iterator *iter = &info->iter;
8215         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8216         struct page *pages_def[PIPE_DEF_BUFFERS];
8217         struct splice_pipe_desc spd = {
8218                 .pages          = pages_def,
8219                 .partial        = partial_def,
8220                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8221                 .ops            = &buffer_pipe_buf_ops,
8222                 .spd_release    = buffer_spd_release,
8223         };
8224         struct buffer_ref *ref;
8225         int entries, i;
8226         ssize_t ret = 0;
8227
8228 #ifdef CONFIG_TRACER_MAX_TRACE
8229         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8230                 return -EBUSY;
8231 #endif
8232
8233         if (*ppos & (PAGE_SIZE - 1))
8234                 return -EINVAL;
8235
8236         if (len & (PAGE_SIZE - 1)) {
8237                 if (len < PAGE_SIZE)
8238                         return -EINVAL;
8239                 len &= PAGE_MASK;
8240         }
8241
8242         if (splice_grow_spd(pipe, &spd))
8243                 return -ENOMEM;
8244
8245  again:
8246         trace_access_lock(iter->cpu_file);
8247         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8248
8249         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8250                 struct page *page;
8251                 int r;
8252
8253                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8254                 if (!ref) {
8255                         ret = -ENOMEM;
8256                         break;
8257                 }
8258
8259                 refcount_set(&ref->refcount, 1);
8260                 ref->buffer = iter->array_buffer->buffer;
8261                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8262                 if (IS_ERR(ref->page)) {
8263                         ret = PTR_ERR(ref->page);
8264                         ref->page = NULL;
8265                         kfree(ref);
8266                         break;
8267                 }
8268                 ref->cpu = iter->cpu_file;
8269
8270                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8271                                           len, iter->cpu_file, 1);
8272                 if (r < 0) {
8273                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8274                                                    ref->page);
8275                         kfree(ref);
8276                         break;
8277                 }
8278
8279                 page = virt_to_page(ref->page);
8280
8281                 spd.pages[i] = page;
8282                 spd.partial[i].len = PAGE_SIZE;
8283                 spd.partial[i].offset = 0;
8284                 spd.partial[i].private = (unsigned long)ref;
8285                 spd.nr_pages++;
8286                 *ppos += PAGE_SIZE;
8287
8288                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8289         }
8290
8291         trace_access_unlock(iter->cpu_file);
8292         spd.nr_pages = i;
8293
8294         /* did we read anything? */
8295         if (!spd.nr_pages) {
8296                 if (ret)
8297                         goto out;
8298
8299                 ret = -EAGAIN;
8300                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8301                         goto out;
8302
8303                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8304                 if (ret)
8305                         goto out;
8306
8307                 goto again;
8308         }
8309
8310         ret = splice_to_pipe(pipe, &spd);
8311 out:
8312         splice_shrink_spd(&spd);
8313
8314         return ret;
8315 }
8316
8317 static const struct file_operations tracing_buffers_fops = {
8318         .open           = tracing_buffers_open,
8319         .read           = tracing_buffers_read,
8320         .poll           = tracing_buffers_poll,
8321         .release        = tracing_buffers_release,
8322         .splice_read    = tracing_buffers_splice_read,
8323         .llseek         = no_llseek,
8324 };
8325
8326 static ssize_t
8327 tracing_stats_read(struct file *filp, char __user *ubuf,
8328                    size_t count, loff_t *ppos)
8329 {
8330         struct inode *inode = file_inode(filp);
8331         struct trace_array *tr = inode->i_private;
8332         struct array_buffer *trace_buf = &tr->array_buffer;
8333         int cpu = tracing_get_cpu(inode);
8334         struct trace_seq *s;
8335         unsigned long cnt;
8336         unsigned long long t;
8337         unsigned long usec_rem;
8338
8339         s = kmalloc(sizeof(*s), GFP_KERNEL);
8340         if (!s)
8341                 return -ENOMEM;
8342
8343         trace_seq_init(s);
8344
8345         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8346         trace_seq_printf(s, "entries: %ld\n", cnt);
8347
8348         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8349         trace_seq_printf(s, "overrun: %ld\n", cnt);
8350
8351         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8352         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8353
8354         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8355         trace_seq_printf(s, "bytes: %ld\n", cnt);
8356
8357         if (trace_clocks[tr->clock_id].in_ns) {
8358                 /* local or global for trace_clock */
8359                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8360                 usec_rem = do_div(t, USEC_PER_SEC);
8361                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8362                                                                 t, usec_rem);
8363
8364                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8365                 usec_rem = do_div(t, USEC_PER_SEC);
8366                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8367         } else {
8368                 /* counter or tsc mode for trace_clock */
8369                 trace_seq_printf(s, "oldest event ts: %llu\n",
8370                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8371
8372                 trace_seq_printf(s, "now ts: %llu\n",
8373                                 ring_buffer_time_stamp(trace_buf->buffer));
8374         }
8375
8376         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8377         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8378
8379         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8380         trace_seq_printf(s, "read events: %ld\n", cnt);
8381
8382         count = simple_read_from_buffer(ubuf, count, ppos,
8383                                         s->buffer, trace_seq_used(s));
8384
8385         kfree(s);
8386
8387         return count;
8388 }
8389
8390 static const struct file_operations tracing_stats_fops = {
8391         .open           = tracing_open_generic_tr,
8392         .read           = tracing_stats_read,
8393         .llseek         = generic_file_llseek,
8394         .release        = tracing_release_generic_tr,
8395 };
8396
8397 #ifdef CONFIG_DYNAMIC_FTRACE
8398
8399 static ssize_t
8400 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8401                   size_t cnt, loff_t *ppos)
8402 {
8403         ssize_t ret;
8404         char *buf;
8405         int r;
8406
8407         /* 256 should be plenty to hold the amount needed */
8408         buf = kmalloc(256, GFP_KERNEL);
8409         if (!buf)
8410                 return -ENOMEM;
8411
8412         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8413                       ftrace_update_tot_cnt,
8414                       ftrace_number_of_pages,
8415                       ftrace_number_of_groups);
8416
8417         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8418         kfree(buf);
8419         return ret;
8420 }
8421
8422 static const struct file_operations tracing_dyn_info_fops = {
8423         .open           = tracing_open_generic,
8424         .read           = tracing_read_dyn_info,
8425         .llseek         = generic_file_llseek,
8426 };
8427 #endif /* CONFIG_DYNAMIC_FTRACE */
8428
8429 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8430 static void
8431 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8432                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8433                 void *data)
8434 {
8435         tracing_snapshot_instance(tr);
8436 }
8437
8438 static void
8439 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8440                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8441                       void *data)
8442 {
8443         struct ftrace_func_mapper *mapper = data;
8444         long *count = NULL;
8445
8446         if (mapper)
8447                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8448
8449         if (count) {
8450
8451                 if (*count <= 0)
8452                         return;
8453
8454                 (*count)--;
8455         }
8456
8457         tracing_snapshot_instance(tr);
8458 }
8459
8460 static int
8461 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8462                       struct ftrace_probe_ops *ops, void *data)
8463 {
8464         struct ftrace_func_mapper *mapper = data;
8465         long *count = NULL;
8466
8467         seq_printf(m, "%ps:", (void *)ip);
8468
8469         seq_puts(m, "snapshot");
8470
8471         if (mapper)
8472                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8473
8474         if (count)
8475                 seq_printf(m, ":count=%ld\n", *count);
8476         else
8477                 seq_puts(m, ":unlimited\n");
8478
8479         return 0;
8480 }
8481
8482 static int
8483 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8484                      unsigned long ip, void *init_data, void **data)
8485 {
8486         struct ftrace_func_mapper *mapper = *data;
8487
8488         if (!mapper) {
8489                 mapper = allocate_ftrace_func_mapper();
8490                 if (!mapper)
8491                         return -ENOMEM;
8492                 *data = mapper;
8493         }
8494
8495         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8496 }
8497
8498 static void
8499 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8500                      unsigned long ip, void *data)
8501 {
8502         struct ftrace_func_mapper *mapper = data;
8503
8504         if (!ip) {
8505                 if (!mapper)
8506                         return;
8507                 free_ftrace_func_mapper(mapper, NULL);
8508                 return;
8509         }
8510
8511         ftrace_func_mapper_remove_ip(mapper, ip);
8512 }
8513
8514 static struct ftrace_probe_ops snapshot_probe_ops = {
8515         .func                   = ftrace_snapshot,
8516         .print                  = ftrace_snapshot_print,
8517 };
8518
8519 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8520         .func                   = ftrace_count_snapshot,
8521         .print                  = ftrace_snapshot_print,
8522         .init                   = ftrace_snapshot_init,
8523         .free                   = ftrace_snapshot_free,
8524 };
8525
8526 static int
8527 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8528                                char *glob, char *cmd, char *param, int enable)
8529 {
8530         struct ftrace_probe_ops *ops;
8531         void *count = (void *)-1;
8532         char *number;
8533         int ret;
8534
8535         if (!tr)
8536                 return -ENODEV;
8537
8538         /* hash funcs only work with set_ftrace_filter */
8539         if (!enable)
8540                 return -EINVAL;
8541
8542         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8543
8544         if (glob[0] == '!')
8545                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8546
8547         if (!param)
8548                 goto out_reg;
8549
8550         number = strsep(&param, ":");
8551
8552         if (!strlen(number))
8553                 goto out_reg;
8554
8555         /*
8556          * We use the callback data field (which is a pointer)
8557          * as our counter.
8558          */
8559         ret = kstrtoul(number, 0, (unsigned long *)&count);
8560         if (ret)
8561                 return ret;
8562
8563  out_reg:
8564         ret = tracing_alloc_snapshot_instance(tr);
8565         if (ret < 0)
8566                 goto out;
8567
8568         ret = register_ftrace_function_probe(glob, tr, ops, count);
8569
8570  out:
8571         return ret < 0 ? ret : 0;
8572 }
8573
8574 static struct ftrace_func_command ftrace_snapshot_cmd = {
8575         .name                   = "snapshot",
8576         .func                   = ftrace_trace_snapshot_callback,
8577 };
8578
8579 static __init int register_snapshot_cmd(void)
8580 {
8581         return register_ftrace_command(&ftrace_snapshot_cmd);
8582 }
8583 #else
8584 static inline __init int register_snapshot_cmd(void) { return 0; }
8585 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8586
8587 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8588 {
8589         if (WARN_ON(!tr->dir))
8590                 return ERR_PTR(-ENODEV);
8591
8592         /* Top directory uses NULL as the parent */
8593         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8594                 return NULL;
8595
8596         /* All sub buffers have a descriptor */
8597         return tr->dir;
8598 }
8599
8600 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8601 {
8602         struct dentry *d_tracer;
8603
8604         if (tr->percpu_dir)
8605                 return tr->percpu_dir;
8606
8607         d_tracer = tracing_get_dentry(tr);
8608         if (IS_ERR(d_tracer))
8609                 return NULL;
8610
8611         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8612
8613         MEM_FAIL(!tr->percpu_dir,
8614                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8615
8616         return tr->percpu_dir;
8617 }
8618
8619 static struct dentry *
8620 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8621                       void *data, long cpu, const struct file_operations *fops)
8622 {
8623         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8624
8625         if (ret) /* See tracing_get_cpu() */
8626                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8627         return ret;
8628 }
8629
8630 static void
8631 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8632 {
8633         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8634         struct dentry *d_cpu;
8635         char cpu_dir[30]; /* 30 characters should be more than enough */
8636
8637         if (!d_percpu)
8638                 return;
8639
8640         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8641         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8642         if (!d_cpu) {
8643                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8644                 return;
8645         }
8646
8647         /* per cpu trace_pipe */
8648         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8649                                 tr, cpu, &tracing_pipe_fops);
8650
8651         /* per cpu trace */
8652         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8653                                 tr, cpu, &tracing_fops);
8654
8655         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8656                                 tr, cpu, &tracing_buffers_fops);
8657
8658         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8659                                 tr, cpu, &tracing_stats_fops);
8660
8661         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8662                                 tr, cpu, &tracing_entries_fops);
8663
8664 #ifdef CONFIG_TRACER_SNAPSHOT
8665         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8666                                 tr, cpu, &snapshot_fops);
8667
8668         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8669                                 tr, cpu, &snapshot_raw_fops);
8670 #endif
8671 }
8672
8673 #ifdef CONFIG_FTRACE_SELFTEST
8674 /* Let selftest have access to static functions in this file */
8675 #include "trace_selftest.c"
8676 #endif
8677
8678 static ssize_t
8679 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8680                         loff_t *ppos)
8681 {
8682         struct trace_option_dentry *topt = filp->private_data;
8683         char *buf;
8684
8685         if (topt->flags->val & topt->opt->bit)
8686                 buf = "1\n";
8687         else
8688                 buf = "0\n";
8689
8690         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8691 }
8692
8693 static ssize_t
8694 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8695                          loff_t *ppos)
8696 {
8697         struct trace_option_dentry *topt = filp->private_data;
8698         unsigned long val;
8699         int ret;
8700
8701         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8702         if (ret)
8703                 return ret;
8704
8705         if (val != 0 && val != 1)
8706                 return -EINVAL;
8707
8708         if (!!(topt->flags->val & topt->opt->bit) != val) {
8709                 mutex_lock(&trace_types_lock);
8710                 ret = __set_tracer_option(topt->tr, topt->flags,
8711                                           topt->opt, !val);
8712                 mutex_unlock(&trace_types_lock);
8713                 if (ret)
8714                         return ret;
8715         }
8716
8717         *ppos += cnt;
8718
8719         return cnt;
8720 }
8721
8722
8723 static const struct file_operations trace_options_fops = {
8724         .open = tracing_open_generic,
8725         .read = trace_options_read,
8726         .write = trace_options_write,
8727         .llseek = generic_file_llseek,
8728 };
8729
8730 /*
8731  * In order to pass in both the trace_array descriptor as well as the index
8732  * to the flag that the trace option file represents, the trace_array
8733  * has a character array of trace_flags_index[], which holds the index
8734  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8735  * The address of this character array is passed to the flag option file
8736  * read/write callbacks.
8737  *
8738  * In order to extract both the index and the trace_array descriptor,
8739  * get_tr_index() uses the following algorithm.
8740  *
8741  *   idx = *ptr;
8742  *
8743  * As the pointer itself contains the address of the index (remember
8744  * index[1] == 1).
8745  *
8746  * Then to get the trace_array descriptor, by subtracting that index
8747  * from the ptr, we get to the start of the index itself.
8748  *
8749  *   ptr - idx == &index[0]
8750  *
8751  * Then a simple container_of() from that pointer gets us to the
8752  * trace_array descriptor.
8753  */
8754 static void get_tr_index(void *data, struct trace_array **ptr,
8755                          unsigned int *pindex)
8756 {
8757         *pindex = *(unsigned char *)data;
8758
8759         *ptr = container_of(data - *pindex, struct trace_array,
8760                             trace_flags_index);
8761 }
8762
8763 static ssize_t
8764 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8765                         loff_t *ppos)
8766 {
8767         void *tr_index = filp->private_data;
8768         struct trace_array *tr;
8769         unsigned int index;
8770         char *buf;
8771
8772         get_tr_index(tr_index, &tr, &index);
8773
8774         if (tr->trace_flags & (1 << index))
8775                 buf = "1\n";
8776         else
8777                 buf = "0\n";
8778
8779         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8780 }
8781
8782 static ssize_t
8783 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8784                          loff_t *ppos)
8785 {
8786         void *tr_index = filp->private_data;
8787         struct trace_array *tr;
8788         unsigned int index;
8789         unsigned long val;
8790         int ret;
8791
8792         get_tr_index(tr_index, &tr, &index);
8793
8794         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8795         if (ret)
8796                 return ret;
8797
8798         if (val != 0 && val != 1)
8799                 return -EINVAL;
8800
8801         mutex_lock(&event_mutex);
8802         mutex_lock(&trace_types_lock);
8803         ret = set_tracer_flag(tr, 1 << index, val);
8804         mutex_unlock(&trace_types_lock);
8805         mutex_unlock(&event_mutex);
8806
8807         if (ret < 0)
8808                 return ret;
8809
8810         *ppos += cnt;
8811
8812         return cnt;
8813 }
8814
8815 static const struct file_operations trace_options_core_fops = {
8816         .open = tracing_open_generic,
8817         .read = trace_options_core_read,
8818         .write = trace_options_core_write,
8819         .llseek = generic_file_llseek,
8820 };
8821
8822 struct dentry *trace_create_file(const char *name,
8823                                  umode_t mode,
8824                                  struct dentry *parent,
8825                                  void *data,
8826                                  const struct file_operations *fops)
8827 {
8828         struct dentry *ret;
8829
8830         ret = tracefs_create_file(name, mode, parent, data, fops);
8831         if (!ret)
8832                 pr_warn("Could not create tracefs '%s' entry\n", name);
8833
8834         return ret;
8835 }
8836
8837
8838 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8839 {
8840         struct dentry *d_tracer;
8841
8842         if (tr->options)
8843                 return tr->options;
8844
8845         d_tracer = tracing_get_dentry(tr);
8846         if (IS_ERR(d_tracer))
8847                 return NULL;
8848
8849         tr->options = tracefs_create_dir("options", d_tracer);
8850         if (!tr->options) {
8851                 pr_warn("Could not create tracefs directory 'options'\n");
8852                 return NULL;
8853         }
8854
8855         return tr->options;
8856 }
8857
8858 static void
8859 create_trace_option_file(struct trace_array *tr,
8860                          struct trace_option_dentry *topt,
8861                          struct tracer_flags *flags,
8862                          struct tracer_opt *opt)
8863 {
8864         struct dentry *t_options;
8865
8866         t_options = trace_options_init_dentry(tr);
8867         if (!t_options)
8868                 return;
8869
8870         topt->flags = flags;
8871         topt->opt = opt;
8872         topt->tr = tr;
8873
8874         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8875                                         t_options, topt, &trace_options_fops);
8876
8877 }
8878
8879 static void
8880 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8881 {
8882         struct trace_option_dentry *topts;
8883         struct trace_options *tr_topts;
8884         struct tracer_flags *flags;
8885         struct tracer_opt *opts;
8886         int cnt;
8887         int i;
8888
8889         if (!tracer)
8890                 return;
8891
8892         flags = tracer->flags;
8893
8894         if (!flags || !flags->opts)
8895                 return;
8896
8897         /*
8898          * If this is an instance, only create flags for tracers
8899          * the instance may have.
8900          */
8901         if (!trace_ok_for_array(tracer, tr))
8902                 return;
8903
8904         for (i = 0; i < tr->nr_topts; i++) {
8905                 /* Make sure there's no duplicate flags. */
8906                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8907                         return;
8908         }
8909
8910         opts = flags->opts;
8911
8912         for (cnt = 0; opts[cnt].name; cnt++)
8913                 ;
8914
8915         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8916         if (!topts)
8917                 return;
8918
8919         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8920                             GFP_KERNEL);
8921         if (!tr_topts) {
8922                 kfree(topts);
8923                 return;
8924         }
8925
8926         tr->topts = tr_topts;
8927         tr->topts[tr->nr_topts].tracer = tracer;
8928         tr->topts[tr->nr_topts].topts = topts;
8929         tr->nr_topts++;
8930
8931         for (cnt = 0; opts[cnt].name; cnt++) {
8932                 create_trace_option_file(tr, &topts[cnt], flags,
8933                                          &opts[cnt]);
8934                 MEM_FAIL(topts[cnt].entry == NULL,
8935                           "Failed to create trace option: %s",
8936                           opts[cnt].name);
8937         }
8938 }
8939
8940 static struct dentry *
8941 create_trace_option_core_file(struct trace_array *tr,
8942                               const char *option, long index)
8943 {
8944         struct dentry *t_options;
8945
8946         t_options = trace_options_init_dentry(tr);
8947         if (!t_options)
8948                 return NULL;
8949
8950         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8951                                  (void *)&tr->trace_flags_index[index],
8952                                  &trace_options_core_fops);
8953 }
8954
8955 static void create_trace_options_dir(struct trace_array *tr)
8956 {
8957         struct dentry *t_options;
8958         bool top_level = tr == &global_trace;
8959         int i;
8960
8961         t_options = trace_options_init_dentry(tr);
8962         if (!t_options)
8963                 return;
8964
8965         for (i = 0; trace_options[i]; i++) {
8966                 if (top_level ||
8967                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8968                         create_trace_option_core_file(tr, trace_options[i], i);
8969         }
8970 }
8971
8972 static ssize_t
8973 rb_simple_read(struct file *filp, char __user *ubuf,
8974                size_t cnt, loff_t *ppos)
8975 {
8976         struct trace_array *tr = filp->private_data;
8977         char buf[64];
8978         int r;
8979
8980         r = tracer_tracing_is_on(tr);
8981         r = sprintf(buf, "%d\n", r);
8982
8983         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8984 }
8985
8986 static ssize_t
8987 rb_simple_write(struct file *filp, const char __user *ubuf,
8988                 size_t cnt, loff_t *ppos)
8989 {
8990         struct trace_array *tr = filp->private_data;
8991         struct trace_buffer *buffer = tr->array_buffer.buffer;
8992         unsigned long val;
8993         int ret;
8994
8995         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8996         if (ret)
8997                 return ret;
8998
8999         if (buffer) {
9000                 mutex_lock(&trace_types_lock);
9001                 if (!!val == tracer_tracing_is_on(tr)) {
9002                         val = 0; /* do nothing */
9003                 } else if (val) {
9004                         tracer_tracing_on(tr);
9005                         if (tr->current_trace->start)
9006                                 tr->current_trace->start(tr);
9007                 } else {
9008                         tracer_tracing_off(tr);
9009                         if (tr->current_trace->stop)
9010                                 tr->current_trace->stop(tr);
9011                 }
9012                 mutex_unlock(&trace_types_lock);
9013         }
9014
9015         (*ppos)++;
9016
9017         return cnt;
9018 }
9019
9020 static const struct file_operations rb_simple_fops = {
9021         .open           = tracing_open_generic_tr,
9022         .read           = rb_simple_read,
9023         .write          = rb_simple_write,
9024         .release        = tracing_release_generic_tr,
9025         .llseek         = default_llseek,
9026 };
9027
9028 static ssize_t
9029 buffer_percent_read(struct file *filp, char __user *ubuf,
9030                     size_t cnt, loff_t *ppos)
9031 {
9032         struct trace_array *tr = filp->private_data;
9033         char buf[64];
9034         int r;
9035
9036         r = tr->buffer_percent;
9037         r = sprintf(buf, "%d\n", r);
9038
9039         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9040 }
9041
9042 static ssize_t
9043 buffer_percent_write(struct file *filp, const char __user *ubuf,
9044                      size_t cnt, loff_t *ppos)
9045 {
9046         struct trace_array *tr = filp->private_data;
9047         unsigned long val;
9048         int ret;
9049
9050         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9051         if (ret)
9052                 return ret;
9053
9054         if (val > 100)
9055                 return -EINVAL;
9056
9057         if (!val)
9058                 val = 1;
9059
9060         tr->buffer_percent = val;
9061
9062         (*ppos)++;
9063
9064         return cnt;
9065 }
9066
9067 static const struct file_operations buffer_percent_fops = {
9068         .open           = tracing_open_generic_tr,
9069         .read           = buffer_percent_read,
9070         .write          = buffer_percent_write,
9071         .release        = tracing_release_generic_tr,
9072         .llseek         = default_llseek,
9073 };
9074
9075 static struct dentry *trace_instance_dir;
9076
9077 static void
9078 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9079
9080 static int
9081 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9082 {
9083         enum ring_buffer_flags rb_flags;
9084
9085         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9086
9087         buf->tr = tr;
9088
9089         buf->buffer = ring_buffer_alloc(size, rb_flags);
9090         if (!buf->buffer)
9091                 return -ENOMEM;
9092
9093         buf->data = alloc_percpu(struct trace_array_cpu);
9094         if (!buf->data) {
9095                 ring_buffer_free(buf->buffer);
9096                 buf->buffer = NULL;
9097                 return -ENOMEM;
9098         }
9099
9100         /* Allocate the first page for all buffers */
9101         set_buffer_entries(&tr->array_buffer,
9102                            ring_buffer_size(tr->array_buffer.buffer, 0));
9103
9104         return 0;
9105 }
9106
9107 static int allocate_trace_buffers(struct trace_array *tr, int size)
9108 {
9109         int ret;
9110
9111         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9112         if (ret)
9113                 return ret;
9114
9115 #ifdef CONFIG_TRACER_MAX_TRACE
9116         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9117                                     allocate_snapshot ? size : 1);
9118         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9119                 ring_buffer_free(tr->array_buffer.buffer);
9120                 tr->array_buffer.buffer = NULL;
9121                 free_percpu(tr->array_buffer.data);
9122                 tr->array_buffer.data = NULL;
9123                 return -ENOMEM;
9124         }
9125         tr->allocated_snapshot = allocate_snapshot;
9126
9127         /*
9128          * Only the top level trace array gets its snapshot allocated
9129          * from the kernel command line.
9130          */
9131         allocate_snapshot = false;
9132 #endif
9133
9134         return 0;
9135 }
9136
9137 static void free_trace_buffer(struct array_buffer *buf)
9138 {
9139         if (buf->buffer) {
9140                 ring_buffer_free(buf->buffer);
9141                 buf->buffer = NULL;
9142                 free_percpu(buf->data);
9143                 buf->data = NULL;
9144         }
9145 }
9146
9147 static void free_trace_buffers(struct trace_array *tr)
9148 {
9149         if (!tr)
9150                 return;
9151
9152         free_trace_buffer(&tr->array_buffer);
9153
9154 #ifdef CONFIG_TRACER_MAX_TRACE
9155         free_trace_buffer(&tr->max_buffer);
9156 #endif
9157 }
9158
9159 static void init_trace_flags_index(struct trace_array *tr)
9160 {
9161         int i;
9162
9163         /* Used by the trace options files */
9164         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9165                 tr->trace_flags_index[i] = i;
9166 }
9167
9168 static void __update_tracer_options(struct trace_array *tr)
9169 {
9170         struct tracer *t;
9171
9172         for (t = trace_types; t; t = t->next)
9173                 add_tracer_options(tr, t);
9174 }
9175
9176 static void update_tracer_options(struct trace_array *tr)
9177 {
9178         mutex_lock(&trace_types_lock);
9179         __update_tracer_options(tr);
9180         mutex_unlock(&trace_types_lock);
9181 }
9182
9183 /* Must have trace_types_lock held */
9184 struct trace_array *trace_array_find(const char *instance)
9185 {
9186         struct trace_array *tr, *found = NULL;
9187
9188         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9189                 if (tr->name && strcmp(tr->name, instance) == 0) {
9190                         found = tr;
9191                         break;
9192                 }
9193         }
9194
9195         return found;
9196 }
9197
9198 struct trace_array *trace_array_find_get(const char *instance)
9199 {
9200         struct trace_array *tr;
9201
9202         mutex_lock(&trace_types_lock);
9203         tr = trace_array_find(instance);
9204         if (tr)
9205                 tr->ref++;
9206         mutex_unlock(&trace_types_lock);
9207
9208         return tr;
9209 }
9210
9211 static int trace_array_create_dir(struct trace_array *tr)
9212 {
9213         int ret;
9214
9215         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9216         if (!tr->dir)
9217                 return -EINVAL;
9218
9219         ret = event_trace_add_tracer(tr->dir, tr);
9220         if (ret) {
9221                 tracefs_remove(tr->dir);
9222                 return ret;
9223         }
9224
9225         init_tracer_tracefs(tr, tr->dir);
9226         __update_tracer_options(tr);
9227
9228         return ret;
9229 }
9230
9231 static struct trace_array *trace_array_create(const char *name)
9232 {
9233         struct trace_array *tr;
9234         int ret;
9235
9236         ret = -ENOMEM;
9237         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9238         if (!tr)
9239                 return ERR_PTR(ret);
9240
9241         tr->name = kstrdup(name, GFP_KERNEL);
9242         if (!tr->name)
9243                 goto out_free_tr;
9244
9245         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9246                 goto out_free_tr;
9247
9248         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9249
9250         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9251
9252         raw_spin_lock_init(&tr->start_lock);
9253
9254         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9255
9256         tr->current_trace = &nop_trace;
9257
9258         INIT_LIST_HEAD(&tr->systems);
9259         INIT_LIST_HEAD(&tr->events);
9260         INIT_LIST_HEAD(&tr->hist_vars);
9261         INIT_LIST_HEAD(&tr->err_log);
9262
9263         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9264                 goto out_free_tr;
9265
9266         if (ftrace_allocate_ftrace_ops(tr) < 0)
9267                 goto out_free_tr;
9268
9269         ftrace_init_trace_array(tr);
9270
9271         init_trace_flags_index(tr);
9272
9273         if (trace_instance_dir) {
9274                 ret = trace_array_create_dir(tr);
9275                 if (ret)
9276                         goto out_free_tr;
9277         } else
9278                 __trace_early_add_events(tr);
9279
9280         list_add(&tr->list, &ftrace_trace_arrays);
9281
9282         tr->ref++;
9283
9284         return tr;
9285
9286  out_free_tr:
9287         ftrace_free_ftrace_ops(tr);
9288         free_trace_buffers(tr);
9289         free_cpumask_var(tr->tracing_cpumask);
9290         kfree(tr->name);
9291         kfree(tr);
9292
9293         return ERR_PTR(ret);
9294 }
9295
9296 static int instance_mkdir(const char *name)
9297 {
9298         struct trace_array *tr;
9299         int ret;
9300
9301         mutex_lock(&event_mutex);
9302         mutex_lock(&trace_types_lock);
9303
9304         ret = -EEXIST;
9305         if (trace_array_find(name))
9306                 goto out_unlock;
9307
9308         tr = trace_array_create(name);
9309
9310         ret = PTR_ERR_OR_ZERO(tr);
9311
9312 out_unlock:
9313         mutex_unlock(&trace_types_lock);
9314         mutex_unlock(&event_mutex);
9315         return ret;
9316 }
9317
9318 /**
9319  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9320  * @name: The name of the trace array to be looked up/created.
9321  *
9322  * Returns pointer to trace array with given name.
9323  * NULL, if it cannot be created.
9324  *
9325  * NOTE: This function increments the reference counter associated with the
9326  * trace array returned. This makes sure it cannot be freed while in use.
9327  * Use trace_array_put() once the trace array is no longer needed.
9328  * If the trace_array is to be freed, trace_array_destroy() needs to
9329  * be called after the trace_array_put(), or simply let user space delete
9330  * it from the tracefs instances directory. But until the
9331  * trace_array_put() is called, user space can not delete it.
9332  *
9333  */
9334 struct trace_array *trace_array_get_by_name(const char *name)
9335 {
9336         struct trace_array *tr;
9337
9338         mutex_lock(&event_mutex);
9339         mutex_lock(&trace_types_lock);
9340
9341         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9342                 if (tr->name && strcmp(tr->name, name) == 0)
9343                         goto out_unlock;
9344         }
9345
9346         tr = trace_array_create(name);
9347
9348         if (IS_ERR(tr))
9349                 tr = NULL;
9350 out_unlock:
9351         if (tr)
9352                 tr->ref++;
9353
9354         mutex_unlock(&trace_types_lock);
9355         mutex_unlock(&event_mutex);
9356         return tr;
9357 }
9358 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9359
9360 static int __remove_instance(struct trace_array *tr)
9361 {
9362         int i;
9363
9364         /* Reference counter for a newly created trace array = 1. */
9365         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9366                 return -EBUSY;
9367
9368         list_del(&tr->list);
9369
9370         /* Disable all the flags that were enabled coming in */
9371         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9372                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9373                         set_tracer_flag(tr, 1 << i, 0);
9374         }
9375
9376         tracing_set_nop(tr);
9377         clear_ftrace_function_probes(tr);
9378         event_trace_del_tracer(tr);
9379         ftrace_clear_pids(tr);
9380         ftrace_destroy_function_files(tr);
9381         tracefs_remove(tr->dir);
9382         free_percpu(tr->last_func_repeats);
9383         free_trace_buffers(tr);
9384
9385         for (i = 0; i < tr->nr_topts; i++) {
9386                 kfree(tr->topts[i].topts);
9387         }
9388         kfree(tr->topts);
9389
9390         free_cpumask_var(tr->tracing_cpumask);
9391         kfree(tr->name);
9392         kfree(tr);
9393
9394         return 0;
9395 }
9396
9397 int trace_array_destroy(struct trace_array *this_tr)
9398 {
9399         struct trace_array *tr;
9400         int ret;
9401
9402         if (!this_tr)
9403                 return -EINVAL;
9404
9405         mutex_lock(&event_mutex);
9406         mutex_lock(&trace_types_lock);
9407
9408         ret = -ENODEV;
9409
9410         /* Making sure trace array exists before destroying it. */
9411         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9412                 if (tr == this_tr) {
9413                         ret = __remove_instance(tr);
9414                         break;
9415                 }
9416         }
9417
9418         mutex_unlock(&trace_types_lock);
9419         mutex_unlock(&event_mutex);
9420
9421         return ret;
9422 }
9423 EXPORT_SYMBOL_GPL(trace_array_destroy);
9424
9425 static int instance_rmdir(const char *name)
9426 {
9427         struct trace_array *tr;
9428         int ret;
9429
9430         mutex_lock(&event_mutex);
9431         mutex_lock(&trace_types_lock);
9432
9433         ret = -ENODEV;
9434         tr = trace_array_find(name);
9435         if (tr)
9436                 ret = __remove_instance(tr);
9437
9438         mutex_unlock(&trace_types_lock);
9439         mutex_unlock(&event_mutex);
9440
9441         return ret;
9442 }
9443
9444 static __init void create_trace_instances(struct dentry *d_tracer)
9445 {
9446         struct trace_array *tr;
9447
9448         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9449                                                          instance_mkdir,
9450                                                          instance_rmdir);
9451         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9452                 return;
9453
9454         mutex_lock(&event_mutex);
9455         mutex_lock(&trace_types_lock);
9456
9457         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9458                 if (!tr->name)
9459                         continue;
9460                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9461                              "Failed to create instance directory\n"))
9462                         break;
9463         }
9464
9465         mutex_unlock(&trace_types_lock);
9466         mutex_unlock(&event_mutex);
9467 }
9468
9469 static void
9470 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9471 {
9472         struct trace_event_file *file;
9473         int cpu;
9474
9475         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9476                         tr, &show_traces_fops);
9477
9478         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9479                         tr, &set_tracer_fops);
9480
9481         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9482                           tr, &tracing_cpumask_fops);
9483
9484         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9485                           tr, &tracing_iter_fops);
9486
9487         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9488                           tr, &tracing_fops);
9489
9490         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9491                           tr, &tracing_pipe_fops);
9492
9493         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9494                           tr, &tracing_entries_fops);
9495
9496         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9497                           tr, &tracing_total_entries_fops);
9498
9499         trace_create_file("free_buffer", 0200, d_tracer,
9500                           tr, &tracing_free_buffer_fops);
9501
9502         trace_create_file("trace_marker", 0220, d_tracer,
9503                           tr, &tracing_mark_fops);
9504
9505         file = __find_event_file(tr, "ftrace", "print");
9506         if (file && file->dir)
9507                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9508                                   file, &event_trigger_fops);
9509         tr->trace_marker_file = file;
9510
9511         trace_create_file("trace_marker_raw", 0220, d_tracer,
9512                           tr, &tracing_mark_raw_fops);
9513
9514         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9515                           &trace_clock_fops);
9516
9517         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9518                           tr, &rb_simple_fops);
9519
9520         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9521                           &trace_time_stamp_mode_fops);
9522
9523         tr->buffer_percent = 50;
9524
9525         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9526                         tr, &buffer_percent_fops);
9527
9528         create_trace_options_dir(tr);
9529
9530         trace_create_maxlat_file(tr, d_tracer);
9531
9532         if (ftrace_create_function_files(tr, d_tracer))
9533                 MEM_FAIL(1, "Could not allocate function filter files");
9534
9535 #ifdef CONFIG_TRACER_SNAPSHOT
9536         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9537                           tr, &snapshot_fops);
9538 #endif
9539
9540         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9541                           tr, &tracing_err_log_fops);
9542
9543         for_each_tracing_cpu(cpu)
9544                 tracing_init_tracefs_percpu(tr, cpu);
9545
9546         ftrace_init_tracefs(tr, d_tracer);
9547 }
9548
9549 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9550 {
9551         struct vfsmount *mnt;
9552         struct file_system_type *type;
9553
9554         /*
9555          * To maintain backward compatibility for tools that mount
9556          * debugfs to get to the tracing facility, tracefs is automatically
9557          * mounted to the debugfs/tracing directory.
9558          */
9559         type = get_fs_type("tracefs");
9560         if (!type)
9561                 return NULL;
9562         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9563         put_filesystem(type);
9564         if (IS_ERR(mnt))
9565                 return NULL;
9566         mntget(mnt);
9567
9568         return mnt;
9569 }
9570
9571 /**
9572  * tracing_init_dentry - initialize top level trace array
9573  *
9574  * This is called when creating files or directories in the tracing
9575  * directory. It is called via fs_initcall() by any of the boot up code
9576  * and expects to return the dentry of the top level tracing directory.
9577  */
9578 int tracing_init_dentry(void)
9579 {
9580         struct trace_array *tr = &global_trace;
9581
9582         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9583                 pr_warn("Tracing disabled due to lockdown\n");
9584                 return -EPERM;
9585         }
9586
9587         /* The top level trace array uses  NULL as parent */
9588         if (tr->dir)
9589                 return 0;
9590
9591         if (WARN_ON(!tracefs_initialized()))
9592                 return -ENODEV;
9593
9594         /*
9595          * As there may still be users that expect the tracing
9596          * files to exist in debugfs/tracing, we must automount
9597          * the tracefs file system there, so older tools still
9598          * work with the newer kernel.
9599          */
9600         tr->dir = debugfs_create_automount("tracing", NULL,
9601                                            trace_automount, NULL);
9602
9603         return 0;
9604 }
9605
9606 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9607 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9608
9609 static struct workqueue_struct *eval_map_wq __initdata;
9610 static struct work_struct eval_map_work __initdata;
9611
9612 static void __init eval_map_work_func(struct work_struct *work)
9613 {
9614         int len;
9615
9616         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9617         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9618 }
9619
9620 static int __init trace_eval_init(void)
9621 {
9622         INIT_WORK(&eval_map_work, eval_map_work_func);
9623
9624         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9625         if (!eval_map_wq) {
9626                 pr_err("Unable to allocate eval_map_wq\n");
9627                 /* Do work here */
9628                 eval_map_work_func(&eval_map_work);
9629                 return -ENOMEM;
9630         }
9631
9632         queue_work(eval_map_wq, &eval_map_work);
9633         return 0;
9634 }
9635
9636 static int __init trace_eval_sync(void)
9637 {
9638         /* Make sure the eval map updates are finished */
9639         if (eval_map_wq)
9640                 destroy_workqueue(eval_map_wq);
9641         return 0;
9642 }
9643
9644 late_initcall_sync(trace_eval_sync);
9645
9646
9647 #ifdef CONFIG_MODULES
9648 static void trace_module_add_evals(struct module *mod)
9649 {
9650         if (!mod->num_trace_evals)
9651                 return;
9652
9653         /*
9654          * Modules with bad taint do not have events created, do
9655          * not bother with enums either.
9656          */
9657         if (trace_module_has_bad_taint(mod))
9658                 return;
9659
9660         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9661 }
9662
9663 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9664 static void trace_module_remove_evals(struct module *mod)
9665 {
9666         union trace_eval_map_item *map;
9667         union trace_eval_map_item **last = &trace_eval_maps;
9668
9669         if (!mod->num_trace_evals)
9670                 return;
9671
9672         mutex_lock(&trace_eval_mutex);
9673
9674         map = trace_eval_maps;
9675
9676         while (map) {
9677                 if (map->head.mod == mod)
9678                         break;
9679                 map = trace_eval_jmp_to_tail(map);
9680                 last = &map->tail.next;
9681                 map = map->tail.next;
9682         }
9683         if (!map)
9684                 goto out;
9685
9686         *last = trace_eval_jmp_to_tail(map)->tail.next;
9687         kfree(map);
9688  out:
9689         mutex_unlock(&trace_eval_mutex);
9690 }
9691 #else
9692 static inline void trace_module_remove_evals(struct module *mod) { }
9693 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9694
9695 static int trace_module_notify(struct notifier_block *self,
9696                                unsigned long val, void *data)
9697 {
9698         struct module *mod = data;
9699
9700         switch (val) {
9701         case MODULE_STATE_COMING:
9702                 trace_module_add_evals(mod);
9703                 break;
9704         case MODULE_STATE_GOING:
9705                 trace_module_remove_evals(mod);
9706                 break;
9707         }
9708
9709         return NOTIFY_OK;
9710 }
9711
9712 static struct notifier_block trace_module_nb = {
9713         .notifier_call = trace_module_notify,
9714         .priority = 0,
9715 };
9716 #endif /* CONFIG_MODULES */
9717
9718 static __init int tracer_init_tracefs(void)
9719 {
9720         int ret;
9721
9722         trace_access_lock_init();
9723
9724         ret = tracing_init_dentry();
9725         if (ret)
9726                 return 0;
9727
9728         event_trace_init();
9729
9730         init_tracer_tracefs(&global_trace, NULL);
9731         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9732
9733         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9734                         &global_trace, &tracing_thresh_fops);
9735
9736         trace_create_file("README", TRACE_MODE_READ, NULL,
9737                         NULL, &tracing_readme_fops);
9738
9739         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9740                         NULL, &tracing_saved_cmdlines_fops);
9741
9742         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9743                           NULL, &tracing_saved_cmdlines_size_fops);
9744
9745         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9746                         NULL, &tracing_saved_tgids_fops);
9747
9748         trace_eval_init();
9749
9750         trace_create_eval_file(NULL);
9751
9752 #ifdef CONFIG_MODULES
9753         register_module_notifier(&trace_module_nb);
9754 #endif
9755
9756 #ifdef CONFIG_DYNAMIC_FTRACE
9757         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9758                         NULL, &tracing_dyn_info_fops);
9759 #endif
9760
9761         create_trace_instances(NULL);
9762
9763         update_tracer_options(&global_trace);
9764
9765         return 0;
9766 }
9767
9768 fs_initcall(tracer_init_tracefs);
9769
9770 static int trace_panic_handler(struct notifier_block *this,
9771                                unsigned long event, void *unused)
9772 {
9773         if (ftrace_dump_on_oops)
9774                 ftrace_dump(ftrace_dump_on_oops);
9775         return NOTIFY_OK;
9776 }
9777
9778 static struct notifier_block trace_panic_notifier = {
9779         .notifier_call  = trace_panic_handler,
9780         .next           = NULL,
9781         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9782 };
9783
9784 static int trace_die_handler(struct notifier_block *self,
9785                              unsigned long val,
9786                              void *data)
9787 {
9788         switch (val) {
9789         case DIE_OOPS:
9790                 if (ftrace_dump_on_oops)
9791                         ftrace_dump(ftrace_dump_on_oops);
9792                 break;
9793         default:
9794                 break;
9795         }
9796         return NOTIFY_OK;
9797 }
9798
9799 static struct notifier_block trace_die_notifier = {
9800         .notifier_call = trace_die_handler,
9801         .priority = 200
9802 };
9803
9804 /*
9805  * printk is set to max of 1024, we really don't need it that big.
9806  * Nothing should be printing 1000 characters anyway.
9807  */
9808 #define TRACE_MAX_PRINT         1000
9809
9810 /*
9811  * Define here KERN_TRACE so that we have one place to modify
9812  * it if we decide to change what log level the ftrace dump
9813  * should be at.
9814  */
9815 #define KERN_TRACE              KERN_EMERG
9816
9817 void
9818 trace_printk_seq(struct trace_seq *s)
9819 {
9820         /* Probably should print a warning here. */
9821         if (s->seq.len >= TRACE_MAX_PRINT)
9822                 s->seq.len = TRACE_MAX_PRINT;
9823
9824         /*
9825          * More paranoid code. Although the buffer size is set to
9826          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9827          * an extra layer of protection.
9828          */
9829         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9830                 s->seq.len = s->seq.size - 1;
9831
9832         /* should be zero ended, but we are paranoid. */
9833         s->buffer[s->seq.len] = 0;
9834
9835         printk(KERN_TRACE "%s", s->buffer);
9836
9837         trace_seq_init(s);
9838 }
9839
9840 void trace_init_global_iter(struct trace_iterator *iter)
9841 {
9842         iter->tr = &global_trace;
9843         iter->trace = iter->tr->current_trace;
9844         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9845         iter->array_buffer = &global_trace.array_buffer;
9846
9847         if (iter->trace && iter->trace->open)
9848                 iter->trace->open(iter);
9849
9850         /* Annotate start of buffers if we had overruns */
9851         if (ring_buffer_overruns(iter->array_buffer->buffer))
9852                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9853
9854         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9855         if (trace_clocks[iter->tr->clock_id].in_ns)
9856                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9857 }
9858
9859 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9860 {
9861         /* use static because iter can be a bit big for the stack */
9862         static struct trace_iterator iter;
9863         static atomic_t dump_running;
9864         struct trace_array *tr = &global_trace;
9865         unsigned int old_userobj;
9866         unsigned long flags;
9867         int cnt = 0, cpu;
9868
9869         /* Only allow one dump user at a time. */
9870         if (atomic_inc_return(&dump_running) != 1) {
9871                 atomic_dec(&dump_running);
9872                 return;
9873         }
9874
9875         /*
9876          * Always turn off tracing when we dump.
9877          * We don't need to show trace output of what happens
9878          * between multiple crashes.
9879          *
9880          * If the user does a sysrq-z, then they can re-enable
9881          * tracing with echo 1 > tracing_on.
9882          */
9883         tracing_off();
9884
9885         local_irq_save(flags);
9886
9887         /* Simulate the iterator */
9888         trace_init_global_iter(&iter);
9889         /* Can not use kmalloc for iter.temp and iter.fmt */
9890         iter.temp = static_temp_buf;
9891         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9892         iter.fmt = static_fmt_buf;
9893         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9894
9895         for_each_tracing_cpu(cpu) {
9896                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9897         }
9898
9899         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9900
9901         /* don't look at user memory in panic mode */
9902         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9903
9904         switch (oops_dump_mode) {
9905         case DUMP_ALL:
9906                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9907                 break;
9908         case DUMP_ORIG:
9909                 iter.cpu_file = raw_smp_processor_id();
9910                 break;
9911         case DUMP_NONE:
9912                 goto out_enable;
9913         default:
9914                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9915                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9916         }
9917
9918         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9919
9920         /* Did function tracer already get disabled? */
9921         if (ftrace_is_dead()) {
9922                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9923                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9924         }
9925
9926         /*
9927          * We need to stop all tracing on all CPUS to read
9928          * the next buffer. This is a bit expensive, but is
9929          * not done often. We fill all what we can read,
9930          * and then release the locks again.
9931          */
9932
9933         while (!trace_empty(&iter)) {
9934
9935                 if (!cnt)
9936                         printk(KERN_TRACE "---------------------------------\n");
9937
9938                 cnt++;
9939
9940                 trace_iterator_reset(&iter);
9941                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9942
9943                 if (trace_find_next_entry_inc(&iter) != NULL) {
9944                         int ret;
9945
9946                         ret = print_trace_line(&iter);
9947                         if (ret != TRACE_TYPE_NO_CONSUME)
9948                                 trace_consume(&iter);
9949                 }
9950                 touch_nmi_watchdog();
9951
9952                 trace_printk_seq(&iter.seq);
9953         }
9954
9955         if (!cnt)
9956                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9957         else
9958                 printk(KERN_TRACE "---------------------------------\n");
9959
9960  out_enable:
9961         tr->trace_flags |= old_userobj;
9962
9963         for_each_tracing_cpu(cpu) {
9964                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9965         }
9966         atomic_dec(&dump_running);
9967         local_irq_restore(flags);
9968 }
9969 EXPORT_SYMBOL_GPL(ftrace_dump);
9970
9971 #define WRITE_BUFSIZE  4096
9972
9973 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9974                                 size_t count, loff_t *ppos,
9975                                 int (*createfn)(const char *))
9976 {
9977         char *kbuf, *buf, *tmp;
9978         int ret = 0;
9979         size_t done = 0;
9980         size_t size;
9981
9982         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9983         if (!kbuf)
9984                 return -ENOMEM;
9985
9986         while (done < count) {
9987                 size = count - done;
9988
9989                 if (size >= WRITE_BUFSIZE)
9990                         size = WRITE_BUFSIZE - 1;
9991
9992                 if (copy_from_user(kbuf, buffer + done, size)) {
9993                         ret = -EFAULT;
9994                         goto out;
9995                 }
9996                 kbuf[size] = '\0';
9997                 buf = kbuf;
9998                 do {
9999                         tmp = strchr(buf, '\n');
10000                         if (tmp) {
10001                                 *tmp = '\0';
10002                                 size = tmp - buf + 1;
10003                         } else {
10004                                 size = strlen(buf);
10005                                 if (done + size < count) {
10006                                         if (buf != kbuf)
10007                                                 break;
10008                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10009                                         pr_warn("Line length is too long: Should be less than %d\n",
10010                                                 WRITE_BUFSIZE - 2);
10011                                         ret = -EINVAL;
10012                                         goto out;
10013                                 }
10014                         }
10015                         done += size;
10016
10017                         /* Remove comments */
10018                         tmp = strchr(buf, '#');
10019
10020                         if (tmp)
10021                                 *tmp = '\0';
10022
10023                         ret = createfn(buf);
10024                         if (ret)
10025                                 goto out;
10026                         buf += size;
10027
10028                 } while (done < count);
10029         }
10030         ret = done;
10031
10032 out:
10033         kfree(kbuf);
10034
10035         return ret;
10036 }
10037
10038 __init static int tracer_alloc_buffers(void)
10039 {
10040         int ring_buf_size;
10041         int ret = -ENOMEM;
10042
10043
10044         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10045                 pr_warn("Tracing disabled due to lockdown\n");
10046                 return -EPERM;
10047         }
10048
10049         /*
10050          * Make sure we don't accidentally add more trace options
10051          * than we have bits for.
10052          */
10053         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10054
10055         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10056                 goto out;
10057
10058         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10059                 goto out_free_buffer_mask;
10060
10061         /* Only allocate trace_printk buffers if a trace_printk exists */
10062         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10063                 /* Must be called before global_trace.buffer is allocated */
10064                 trace_printk_init_buffers();
10065
10066         /* To save memory, keep the ring buffer size to its minimum */
10067         if (ring_buffer_expanded)
10068                 ring_buf_size = trace_buf_size;
10069         else
10070                 ring_buf_size = 1;
10071
10072         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10073         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10074
10075         raw_spin_lock_init(&global_trace.start_lock);
10076
10077         /*
10078          * The prepare callbacks allocates some memory for the ring buffer. We
10079          * don't free the buffer if the CPU goes down. If we were to free
10080          * the buffer, then the user would lose any trace that was in the
10081          * buffer. The memory will be removed once the "instance" is removed.
10082          */
10083         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10084                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10085                                       NULL);
10086         if (ret < 0)
10087                 goto out_free_cpumask;
10088         /* Used for event triggers */
10089         ret = -ENOMEM;
10090         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10091         if (!temp_buffer)
10092                 goto out_rm_hp_state;
10093
10094         if (trace_create_savedcmd() < 0)
10095                 goto out_free_temp_buffer;
10096
10097         /* TODO: make the number of buffers hot pluggable with CPUS */
10098         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10099                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10100                 goto out_free_savedcmd;
10101         }
10102
10103         if (global_trace.buffer_disabled)
10104                 tracing_off();
10105
10106         if (trace_boot_clock) {
10107                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10108                 if (ret < 0)
10109                         pr_warn("Trace clock %s not defined, going back to default\n",
10110                                 trace_boot_clock);
10111         }
10112
10113         /*
10114          * register_tracer() might reference current_trace, so it
10115          * needs to be set before we register anything. This is
10116          * just a bootstrap of current_trace anyway.
10117          */
10118         global_trace.current_trace = &nop_trace;
10119
10120         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10121
10122         ftrace_init_global_array_ops(&global_trace);
10123
10124         init_trace_flags_index(&global_trace);
10125
10126         register_tracer(&nop_trace);
10127
10128         /* Function tracing may start here (via kernel command line) */
10129         init_function_trace();
10130
10131         /* All seems OK, enable tracing */
10132         tracing_disabled = 0;
10133
10134         atomic_notifier_chain_register(&panic_notifier_list,
10135                                        &trace_panic_notifier);
10136
10137         register_die_notifier(&trace_die_notifier);
10138
10139         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10140
10141         INIT_LIST_HEAD(&global_trace.systems);
10142         INIT_LIST_HEAD(&global_trace.events);
10143         INIT_LIST_HEAD(&global_trace.hist_vars);
10144         INIT_LIST_HEAD(&global_trace.err_log);
10145         list_add(&global_trace.list, &ftrace_trace_arrays);
10146
10147         apply_trace_boot_options();
10148
10149         register_snapshot_cmd();
10150
10151         test_can_verify();
10152
10153         return 0;
10154
10155 out_free_savedcmd:
10156         free_saved_cmdlines_buffer(savedcmd);
10157 out_free_temp_buffer:
10158         ring_buffer_free(temp_buffer);
10159 out_rm_hp_state:
10160         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10161 out_free_cpumask:
10162         free_cpumask_var(global_trace.tracing_cpumask);
10163 out_free_buffer_mask:
10164         free_cpumask_var(tracing_buffer_mask);
10165 out:
10166         return ret;
10167 }
10168
10169 void __init ftrace_boot_snapshot(void)
10170 {
10171         if (snapshot_at_boot) {
10172                 tracing_snapshot();
10173                 internal_trace_puts("** Boot snapshot taken **\n");
10174         }
10175 }
10176
10177 void __init early_trace_init(void)
10178 {
10179         if (tracepoint_printk) {
10180                 tracepoint_print_iter =
10181                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10182                 if (MEM_FAIL(!tracepoint_print_iter,
10183                              "Failed to allocate trace iterator\n"))
10184                         tracepoint_printk = 0;
10185                 else
10186                         static_key_enable(&tracepoint_printk_key.key);
10187         }
10188         tracer_alloc_buffers();
10189 }
10190
10191 void __init trace_init(void)
10192 {
10193         trace_event_init();
10194 }
10195
10196 __init static void clear_boot_tracer(void)
10197 {
10198         /*
10199          * The default tracer at boot buffer is an init section.
10200          * This function is called in lateinit. If we did not
10201          * find the boot tracer, then clear it out, to prevent
10202          * later registration from accessing the buffer that is
10203          * about to be freed.
10204          */
10205         if (!default_bootup_tracer)
10206                 return;
10207
10208         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10209                default_bootup_tracer);
10210         default_bootup_tracer = NULL;
10211 }
10212
10213 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10214 __init static void tracing_set_default_clock(void)
10215 {
10216         /* sched_clock_stable() is determined in late_initcall */
10217         if (!trace_boot_clock && !sched_clock_stable()) {
10218                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10219                         pr_warn("Can not set tracing clock due to lockdown\n");
10220                         return;
10221                 }
10222
10223                 printk(KERN_WARNING
10224                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10225                        "If you want to keep using the local clock, then add:\n"
10226                        "  \"trace_clock=local\"\n"
10227                        "on the kernel command line\n");
10228                 tracing_set_clock(&global_trace, "global");
10229         }
10230 }
10231 #else
10232 static inline void tracing_set_default_clock(void) { }
10233 #endif
10234
10235 __init static int late_trace_init(void)
10236 {
10237         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10238                 static_key_disable(&tracepoint_printk_key.key);
10239                 tracepoint_printk = 0;
10240         }
10241
10242         tracing_set_default_clock();
10243         clear_boot_tracer();
10244         return 0;
10245 }
10246
10247 late_initcall_sync(late_trace_init);