Merge tag 'printk-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/printk...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0 || !trace_parser_loaded(&parser))
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 ret = -EINVAL;
732                 if (kstrtoul(parser.buffer, 0, &val))
733                         break;
734
735                 pid = (pid_t)val;
736
737                 if (trace_pid_list_set(pid_list, pid) < 0) {
738                         ret = -1;
739                         break;
740                 }
741                 nr_pids++;
742
743                 trace_parser_clear(&parser);
744                 ret = 0;
745         }
746         trace_parser_put(&parser);
747
748         if (ret < 0) {
749                 trace_pid_list_free(pid_list);
750                 return ret;
751         }
752
753         if (!nr_pids) {
754                 /* Cleared the list of pids */
755                 trace_pid_list_free(pid_list);
756                 read = ret;
757                 pid_list = NULL;
758         }
759
760         *new_pid_list = pid_list;
761
762         return read;
763 }
764
765 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
766 {
767         u64 ts;
768
769         /* Early boot up does not have a buffer yet */
770         if (!buf->buffer)
771                 return trace_clock_local();
772
773         ts = ring_buffer_time_stamp(buf->buffer);
774         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
775
776         return ts;
777 }
778
779 u64 ftrace_now(int cpu)
780 {
781         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
782 }
783
784 /**
785  * tracing_is_enabled - Show if global_trace has been enabled
786  *
787  * Shows if the global trace has been enabled or not. It uses the
788  * mirror flag "buffer_disabled" to be used in fast paths such as for
789  * the irqsoff tracer. But it may be inaccurate due to races. If you
790  * need to know the accurate state, use tracing_is_on() which is a little
791  * slower, but accurate.
792  */
793 int tracing_is_enabled(void)
794 {
795         /*
796          * For quick access (irqsoff uses this in fast path), just
797          * return the mirror variable of the state of the ring buffer.
798          * It's a little racy, but we don't really care.
799          */
800         smp_rmb();
801         return !global_trace.buffer_disabled;
802 }
803
804 /*
805  * trace_buf_size is the size in bytes that is allocated
806  * for a buffer. Note, the number of bytes is always rounded
807  * to page size.
808  *
809  * This number is purposely set to a low number of 16384.
810  * If the dump on oops happens, it will be much appreciated
811  * to not have to wait for all that output. Anyway this can be
812  * boot time and run time configurable.
813  */
814 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
815
816 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
817
818 /* trace_types holds a link list of available tracers. */
819 static struct tracer            *trace_types __read_mostly;
820
821 /*
822  * trace_types_lock is used to protect the trace_types list.
823  */
824 DEFINE_MUTEX(trace_types_lock);
825
826 /*
827  * serialize the access of the ring buffer
828  *
829  * ring buffer serializes readers, but it is low level protection.
830  * The validity of the events (which returns by ring_buffer_peek() ..etc)
831  * are not protected by ring buffer.
832  *
833  * The content of events may become garbage if we allow other process consumes
834  * these events concurrently:
835  *   A) the page of the consumed events may become a normal page
836  *      (not reader page) in ring buffer, and this page will be rewritten
837  *      by events producer.
838  *   B) The page of the consumed events may become a page for splice_read,
839  *      and this page will be returned to system.
840  *
841  * These primitives allow multi process access to different cpu ring buffer
842  * concurrently.
843  *
844  * These primitives don't distinguish read-only and read-consume access.
845  * Multi read-only access are also serialized.
846  */
847
848 #ifdef CONFIG_SMP
849 static DECLARE_RWSEM(all_cpu_access_lock);
850 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
851
852 static inline void trace_access_lock(int cpu)
853 {
854         if (cpu == RING_BUFFER_ALL_CPUS) {
855                 /* gain it for accessing the whole ring buffer. */
856                 down_write(&all_cpu_access_lock);
857         } else {
858                 /* gain it for accessing a cpu ring buffer. */
859
860                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
861                 down_read(&all_cpu_access_lock);
862
863                 /* Secondly block other access to this @cpu ring buffer. */
864                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
865         }
866 }
867
868 static inline void trace_access_unlock(int cpu)
869 {
870         if (cpu == RING_BUFFER_ALL_CPUS) {
871                 up_write(&all_cpu_access_lock);
872         } else {
873                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
874                 up_read(&all_cpu_access_lock);
875         }
876 }
877
878 static inline void trace_access_lock_init(void)
879 {
880         int cpu;
881
882         for_each_possible_cpu(cpu)
883                 mutex_init(&per_cpu(cpu_access_lock, cpu));
884 }
885
886 #else
887
888 static DEFINE_MUTEX(access_lock);
889
890 static inline void trace_access_lock(int cpu)
891 {
892         (void)cpu;
893         mutex_lock(&access_lock);
894 }
895
896 static inline void trace_access_unlock(int cpu)
897 {
898         (void)cpu;
899         mutex_unlock(&access_lock);
900 }
901
902 static inline void trace_access_lock_init(void)
903 {
904 }
905
906 #endif
907
908 #ifdef CONFIG_STACKTRACE
909 static void __ftrace_trace_stack(struct trace_buffer *buffer,
910                                  unsigned int trace_ctx,
911                                  int skip, struct pt_regs *regs);
912 static inline void ftrace_trace_stack(struct trace_array *tr,
913                                       struct trace_buffer *buffer,
914                                       unsigned int trace_ctx,
915                                       int skip, struct pt_regs *regs);
916
917 #else
918 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                         unsigned int trace_ctx,
920                                         int skip, struct pt_regs *regs)
921 {
922 }
923 static inline void ftrace_trace_stack(struct trace_array *tr,
924                                       struct trace_buffer *buffer,
925                                       unsigned long trace_ctx,
926                                       int skip, struct pt_regs *regs)
927 {
928 }
929
930 #endif
931
932 static __always_inline void
933 trace_event_setup(struct ring_buffer_event *event,
934                   int type, unsigned int trace_ctx)
935 {
936         struct trace_entry *ent = ring_buffer_event_data(event);
937
938         tracing_generic_entry_update(ent, type, trace_ctx);
939 }
940
941 static __always_inline struct ring_buffer_event *
942 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
943                           int type,
944                           unsigned long len,
945                           unsigned int trace_ctx)
946 {
947         struct ring_buffer_event *event;
948
949         event = ring_buffer_lock_reserve(buffer, len);
950         if (event != NULL)
951                 trace_event_setup(event, type, trace_ctx);
952
953         return event;
954 }
955
956 void tracer_tracing_on(struct trace_array *tr)
957 {
958         if (tr->array_buffer.buffer)
959                 ring_buffer_record_on(tr->array_buffer.buffer);
960         /*
961          * This flag is looked at when buffers haven't been allocated
962          * yet, or by some tracers (like irqsoff), that just want to
963          * know if the ring buffer has been disabled, but it can handle
964          * races of where it gets disabled but we still do a record.
965          * As the check is in the fast path of the tracers, it is more
966          * important to be fast than accurate.
967          */
968         tr->buffer_disabled = 0;
969         /* Make the flag seen by readers */
970         smp_wmb();
971 }
972
973 /**
974  * tracing_on - enable tracing buffers
975  *
976  * This function enables tracing buffers that may have been
977  * disabled with tracing_off.
978  */
979 void tracing_on(void)
980 {
981         tracer_tracing_on(&global_trace);
982 }
983 EXPORT_SYMBOL_GPL(tracing_on);
984
985
986 static __always_inline void
987 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
988 {
989         __this_cpu_write(trace_taskinfo_save, true);
990
991         /* If this is the temp buffer, we need to commit fully */
992         if (this_cpu_read(trace_buffered_event) == event) {
993                 /* Length is in event->array[0] */
994                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
995                 /* Release the temp buffer */
996                 this_cpu_dec(trace_buffered_event_cnt);
997                 /* ring_buffer_unlock_commit() enables preemption */
998                 preempt_enable_notrace();
999         } else
1000                 ring_buffer_unlock_commit(buffer, event);
1001 }
1002
1003 /**
1004  * __trace_puts - write a constant string into the trace buffer.
1005  * @ip:    The address of the caller
1006  * @str:   The constant string to write
1007  * @size:  The size of the string.
1008  */
1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011         struct ring_buffer_event *event;
1012         struct trace_buffer *buffer;
1013         struct print_entry *entry;
1014         unsigned int trace_ctx;
1015         int alloc;
1016
1017         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1018                 return 0;
1019
1020         if (unlikely(tracing_selftest_running || tracing_disabled))
1021                 return 0;
1022
1023         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024
1025         trace_ctx = tracing_gen_ctx();
1026         buffer = global_trace.array_buffer.buffer;
1027         ring_buffer_nest_start(buffer);
1028         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1029                                             trace_ctx);
1030         if (!event) {
1031                 size = 0;
1032                 goto out;
1033         }
1034
1035         entry = ring_buffer_event_data(event);
1036         entry->ip = ip;
1037
1038         memcpy(&entry->buf, str, size);
1039
1040         /* Add a newline if necessary */
1041         if (entry->buf[size - 1] != '\n') {
1042                 entry->buf[size] = '\n';
1043                 entry->buf[size + 1] = '\0';
1044         } else
1045                 entry->buf[size] = '\0';
1046
1047         __buffer_unlock_commit(buffer, event);
1048         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049  out:
1050         ring_buffer_nest_end(buffer);
1051         return size;
1052 }
1053 EXPORT_SYMBOL_GPL(__trace_puts);
1054
1055 /**
1056  * __trace_bputs - write the pointer to a constant string into trace buffer
1057  * @ip:    The address of the caller
1058  * @str:   The constant string to write to the buffer to
1059  */
1060 int __trace_bputs(unsigned long ip, const char *str)
1061 {
1062         struct ring_buffer_event *event;
1063         struct trace_buffer *buffer;
1064         struct bputs_entry *entry;
1065         unsigned int trace_ctx;
1066         int size = sizeof(struct bputs_entry);
1067         int ret = 0;
1068
1069         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070                 return 0;
1071
1072         if (unlikely(tracing_selftest_running || tracing_disabled))
1073                 return 0;
1074
1075         trace_ctx = tracing_gen_ctx();
1076         buffer = global_trace.array_buffer.buffer;
1077
1078         ring_buffer_nest_start(buffer);
1079         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1080                                             trace_ctx);
1081         if (!event)
1082                 goto out;
1083
1084         entry = ring_buffer_event_data(event);
1085         entry->ip                       = ip;
1086         entry->str                      = str;
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1090
1091         ret = 1;
1092  out:
1093         ring_buffer_nest_end(buffer);
1094         return ret;
1095 }
1096 EXPORT_SYMBOL_GPL(__trace_bputs);
1097
1098 #ifdef CONFIG_TRACER_SNAPSHOT
1099 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1100                                            void *cond_data)
1101 {
1102         struct tracer *tracer = tr->current_trace;
1103         unsigned long flags;
1104
1105         if (in_nmi()) {
1106                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1107                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1108                 return;
1109         }
1110
1111         if (!tr->allocated_snapshot) {
1112                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1113                 internal_trace_puts("*** stopping trace here!   ***\n");
1114                 tracing_off();
1115                 return;
1116         }
1117
1118         /* Note, snapshot can not be used when the tracer uses it */
1119         if (tracer->use_max_tr) {
1120                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1121                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1122                 return;
1123         }
1124
1125         local_irq_save(flags);
1126         update_max_tr(tr, current, smp_processor_id(), cond_data);
1127         local_irq_restore(flags);
1128 }
1129
1130 void tracing_snapshot_instance(struct trace_array *tr)
1131 {
1132         tracing_snapshot_instance_cond(tr, NULL);
1133 }
1134
1135 /**
1136  * tracing_snapshot - take a snapshot of the current buffer.
1137  *
1138  * This causes a swap between the snapshot buffer and the current live
1139  * tracing buffer. You can use this to take snapshots of the live
1140  * trace when some condition is triggered, but continue to trace.
1141  *
1142  * Note, make sure to allocate the snapshot with either
1143  * a tracing_snapshot_alloc(), or by doing it manually
1144  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145  *
1146  * If the snapshot buffer is not allocated, it will stop tracing.
1147  * Basically making a permanent snapshot.
1148  */
1149 void tracing_snapshot(void)
1150 {
1151         struct trace_array *tr = &global_trace;
1152
1153         tracing_snapshot_instance(tr);
1154 }
1155 EXPORT_SYMBOL_GPL(tracing_snapshot);
1156
1157 /**
1158  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1159  * @tr:         The tracing instance to snapshot
1160  * @cond_data:  The data to be tested conditionally, and possibly saved
1161  *
1162  * This is the same as tracing_snapshot() except that the snapshot is
1163  * conditional - the snapshot will only happen if the
1164  * cond_snapshot.update() implementation receiving the cond_data
1165  * returns true, which means that the trace array's cond_snapshot
1166  * update() operation used the cond_data to determine whether the
1167  * snapshot should be taken, and if it was, presumably saved it along
1168  * with the snapshot.
1169  */
1170 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 {
1172         tracing_snapshot_instance_cond(tr, cond_data);
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1175
1176 /**
1177  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1178  * @tr:         The tracing instance
1179  *
1180  * When the user enables a conditional snapshot using
1181  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1182  * with the snapshot.  This accessor is used to retrieve it.
1183  *
1184  * Should not be called from cond_snapshot.update(), since it takes
1185  * the tr->max_lock lock, which the code calling
1186  * cond_snapshot.update() has already done.
1187  *
1188  * Returns the cond_data associated with the trace array's snapshot.
1189  */
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192         void *cond_data = NULL;
1193
1194         arch_spin_lock(&tr->max_lock);
1195
1196         if (tr->cond_snapshot)
1197                 cond_data = tr->cond_snapshot->cond_data;
1198
1199         arch_spin_unlock(&tr->max_lock);
1200
1201         return cond_data;
1202 }
1203 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204
1205 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1206                                         struct array_buffer *size_buf, int cpu_id);
1207 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208
1209 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1210 {
1211         int ret;
1212
1213         if (!tr->allocated_snapshot) {
1214
1215                 /* allocate spare buffer */
1216                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1217                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1218                 if (ret < 0)
1219                         return ret;
1220
1221                 tr->allocated_snapshot = true;
1222         }
1223
1224         return 0;
1225 }
1226
1227 static void free_snapshot(struct trace_array *tr)
1228 {
1229         /*
1230          * We don't free the ring buffer. instead, resize it because
1231          * The max_tr ring buffer has some state (e.g. ring->clock) and
1232          * we want preserve it.
1233          */
1234         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1235         set_buffer_entries(&tr->max_buffer, 1);
1236         tracing_reset_online_cpus(&tr->max_buffer);
1237         tr->allocated_snapshot = false;
1238 }
1239
1240 /**
1241  * tracing_alloc_snapshot - allocate snapshot buffer.
1242  *
1243  * This only allocates the snapshot buffer if it isn't already
1244  * allocated - it doesn't also take a snapshot.
1245  *
1246  * This is meant to be used in cases where the snapshot buffer needs
1247  * to be set up for events that can't sleep but need to be able to
1248  * trigger a snapshot.
1249  */
1250 int tracing_alloc_snapshot(void)
1251 {
1252         struct trace_array *tr = &global_trace;
1253         int ret;
1254
1255         ret = tracing_alloc_snapshot_instance(tr);
1256         WARN_ON(ret < 0);
1257
1258         return ret;
1259 }
1260 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1261
1262 /**
1263  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264  *
1265  * This is similar to tracing_snapshot(), but it will allocate the
1266  * snapshot buffer if it isn't already allocated. Use this only
1267  * where it is safe to sleep, as the allocation may sleep.
1268  *
1269  * This causes a swap between the snapshot buffer and the current live
1270  * tracing buffer. You can use this to take snapshots of the live
1271  * trace when some condition is triggered, but continue to trace.
1272  */
1273 void tracing_snapshot_alloc(void)
1274 {
1275         int ret;
1276
1277         ret = tracing_alloc_snapshot();
1278         if (ret < 0)
1279                 return;
1280
1281         tracing_snapshot();
1282 }
1283 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1284
1285 /**
1286  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1287  * @tr:         The tracing instance
1288  * @cond_data:  User data to associate with the snapshot
1289  * @update:     Implementation of the cond_snapshot update function
1290  *
1291  * Check whether the conditional snapshot for the given instance has
1292  * already been enabled, or if the current tracer is already using a
1293  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1294  * save the cond_data and update function inside.
1295  *
1296  * Returns 0 if successful, error otherwise.
1297  */
1298 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1299                                  cond_update_fn_t update)
1300 {
1301         struct cond_snapshot *cond_snapshot;
1302         int ret = 0;
1303
1304         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1305         if (!cond_snapshot)
1306                 return -ENOMEM;
1307
1308         cond_snapshot->cond_data = cond_data;
1309         cond_snapshot->update = update;
1310
1311         mutex_lock(&trace_types_lock);
1312
1313         ret = tracing_alloc_snapshot_instance(tr);
1314         if (ret)
1315                 goto fail_unlock;
1316
1317         if (tr->current_trace->use_max_tr) {
1318                 ret = -EBUSY;
1319                 goto fail_unlock;
1320         }
1321
1322         /*
1323          * The cond_snapshot can only change to NULL without the
1324          * trace_types_lock. We don't care if we race with it going
1325          * to NULL, but we want to make sure that it's not set to
1326          * something other than NULL when we get here, which we can
1327          * do safely with only holding the trace_types_lock and not
1328          * having to take the max_lock.
1329          */
1330         if (tr->cond_snapshot) {
1331                 ret = -EBUSY;
1332                 goto fail_unlock;
1333         }
1334
1335         arch_spin_lock(&tr->max_lock);
1336         tr->cond_snapshot = cond_snapshot;
1337         arch_spin_unlock(&tr->max_lock);
1338
1339         mutex_unlock(&trace_types_lock);
1340
1341         return ret;
1342
1343  fail_unlock:
1344         mutex_unlock(&trace_types_lock);
1345         kfree(cond_snapshot);
1346         return ret;
1347 }
1348 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1349
1350 /**
1351  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1352  * @tr:         The tracing instance
1353  *
1354  * Check whether the conditional snapshot for the given instance is
1355  * enabled; if so, free the cond_snapshot associated with it,
1356  * otherwise return -EINVAL.
1357  *
1358  * Returns 0 if successful, error otherwise.
1359  */
1360 int tracing_snapshot_cond_disable(struct trace_array *tr)
1361 {
1362         int ret = 0;
1363
1364         arch_spin_lock(&tr->max_lock);
1365
1366         if (!tr->cond_snapshot)
1367                 ret = -EINVAL;
1368         else {
1369                 kfree(tr->cond_snapshot);
1370                 tr->cond_snapshot = NULL;
1371         }
1372
1373         arch_spin_unlock(&tr->max_lock);
1374
1375         return ret;
1376 }
1377 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 #else
1379 void tracing_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 }
1383 EXPORT_SYMBOL_GPL(tracing_snapshot);
1384 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 {
1386         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1389 int tracing_alloc_snapshot(void)
1390 {
1391         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1392         return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1395 void tracing_snapshot_alloc(void)
1396 {
1397         /* Give warning */
1398         tracing_snapshot();
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1401 void *tracing_cond_snapshot_data(struct trace_array *tr)
1402 {
1403         return NULL;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1406 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1407 {
1408         return -ENODEV;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1411 int tracing_snapshot_cond_disable(struct trace_array *tr)
1412 {
1413         return false;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1416 #endif /* CONFIG_TRACER_SNAPSHOT */
1417
1418 void tracer_tracing_off(struct trace_array *tr)
1419 {
1420         if (tr->array_buffer.buffer)
1421                 ring_buffer_record_off(tr->array_buffer.buffer);
1422         /*
1423          * This flag is looked at when buffers haven't been allocated
1424          * yet, or by some tracers (like irqsoff), that just want to
1425          * know if the ring buffer has been disabled, but it can handle
1426          * races of where it gets disabled but we still do a record.
1427          * As the check is in the fast path of the tracers, it is more
1428          * important to be fast than accurate.
1429          */
1430         tr->buffer_disabled = 1;
1431         /* Make the flag seen by readers */
1432         smp_wmb();
1433 }
1434
1435 /**
1436  * tracing_off - turn off tracing buffers
1437  *
1438  * This function stops the tracing buffers from recording data.
1439  * It does not disable any overhead the tracers themselves may
1440  * be causing. This function simply causes all recording to
1441  * the ring buffers to fail.
1442  */
1443 void tracing_off(void)
1444 {
1445         tracer_tracing_off(&global_trace);
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_off);
1448
1449 void disable_trace_on_warning(void)
1450 {
1451         if (__disable_trace_on_warning) {
1452                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1453                         "Disabling tracing due to warning\n");
1454                 tracing_off();
1455         }
1456 }
1457
1458 /**
1459  * tracer_tracing_is_on - show real state of ring buffer enabled
1460  * @tr : the trace array to know if ring buffer is enabled
1461  *
1462  * Shows real state of the ring buffer if it is enabled or not.
1463  */
1464 bool tracer_tracing_is_on(struct trace_array *tr)
1465 {
1466         if (tr->array_buffer.buffer)
1467                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1468         return !tr->buffer_disabled;
1469 }
1470
1471 /**
1472  * tracing_is_on - show state of ring buffers enabled
1473  */
1474 int tracing_is_on(void)
1475 {
1476         return tracer_tracing_is_on(&global_trace);
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_is_on);
1479
1480 static int __init set_buf_size(char *str)
1481 {
1482         unsigned long buf_size;
1483
1484         if (!str)
1485                 return 0;
1486         buf_size = memparse(str, &str);
1487         /*
1488          * nr_entries can not be zero and the startup
1489          * tests require some buffer space. Therefore
1490          * ensure we have at least 4096 bytes of buffer.
1491          */
1492         trace_buf_size = max(4096UL, buf_size);
1493         return 1;
1494 }
1495 __setup("trace_buf_size=", set_buf_size);
1496
1497 static int __init set_tracing_thresh(char *str)
1498 {
1499         unsigned long threshold;
1500         int ret;
1501
1502         if (!str)
1503                 return 0;
1504         ret = kstrtoul(str, 0, &threshold);
1505         if (ret < 0)
1506                 return 0;
1507         tracing_thresh = threshold * 1000;
1508         return 1;
1509 }
1510 __setup("tracing_thresh=", set_tracing_thresh);
1511
1512 unsigned long nsecs_to_usecs(unsigned long nsecs)
1513 {
1514         return nsecs / 1000;
1515 }
1516
1517 /*
1518  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1519  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1520  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1521  * of strings in the order that the evals (enum) were defined.
1522  */
1523 #undef C
1524 #define C(a, b) b
1525
1526 /* These must match the bit positions in trace_iterator_flags */
1527 static const char *trace_options[] = {
1528         TRACE_FLAGS
1529         NULL
1530 };
1531
1532 static struct {
1533         u64 (*func)(void);
1534         const char *name;
1535         int in_ns;              /* is this clock in nanoseconds? */
1536 } trace_clocks[] = {
1537         { trace_clock_local,            "local",        1 },
1538         { trace_clock_global,           "global",       1 },
1539         { trace_clock_counter,          "counter",      0 },
1540         { trace_clock_jiffies,          "uptime",       0 },
1541         { trace_clock,                  "perf",         1 },
1542         { ktime_get_mono_fast_ns,       "mono",         1 },
1543         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1544         { ktime_get_boot_fast_ns,       "boot",         1 },
1545         ARCH_TRACE_CLOCKS
1546 };
1547
1548 bool trace_clock_in_ns(struct trace_array *tr)
1549 {
1550         if (trace_clocks[tr->clock_id].in_ns)
1551                 return true;
1552
1553         return false;
1554 }
1555
1556 /*
1557  * trace_parser_get_init - gets the buffer for trace parser
1558  */
1559 int trace_parser_get_init(struct trace_parser *parser, int size)
1560 {
1561         memset(parser, 0, sizeof(*parser));
1562
1563         parser->buffer = kmalloc(size, GFP_KERNEL);
1564         if (!parser->buffer)
1565                 return 1;
1566
1567         parser->size = size;
1568         return 0;
1569 }
1570
1571 /*
1572  * trace_parser_put - frees the buffer for trace parser
1573  */
1574 void trace_parser_put(struct trace_parser *parser)
1575 {
1576         kfree(parser->buffer);
1577         parser->buffer = NULL;
1578 }
1579
1580 /*
1581  * trace_get_user - reads the user input string separated by  space
1582  * (matched by isspace(ch))
1583  *
1584  * For each string found the 'struct trace_parser' is updated,
1585  * and the function returns.
1586  *
1587  * Returns number of bytes read.
1588  *
1589  * See kernel/trace/trace.h for 'struct trace_parser' details.
1590  */
1591 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1592         size_t cnt, loff_t *ppos)
1593 {
1594         char ch;
1595         size_t read = 0;
1596         ssize_t ret;
1597
1598         if (!*ppos)
1599                 trace_parser_clear(parser);
1600
1601         ret = get_user(ch, ubuf++);
1602         if (ret)
1603                 goto out;
1604
1605         read++;
1606         cnt--;
1607
1608         /*
1609          * The parser is not finished with the last write,
1610          * continue reading the user input without skipping spaces.
1611          */
1612         if (!parser->cont) {
1613                 /* skip white space */
1614                 while (cnt && isspace(ch)) {
1615                         ret = get_user(ch, ubuf++);
1616                         if (ret)
1617                                 goto out;
1618                         read++;
1619                         cnt--;
1620                 }
1621
1622                 parser->idx = 0;
1623
1624                 /* only spaces were written */
1625                 if (isspace(ch) || !ch) {
1626                         *ppos += read;
1627                         ret = read;
1628                         goto out;
1629                 }
1630         }
1631
1632         /* read the non-space input */
1633         while (cnt && !isspace(ch) && ch) {
1634                 if (parser->idx < parser->size - 1)
1635                         parser->buffer[parser->idx++] = ch;
1636                 else {
1637                         ret = -EINVAL;
1638                         goto out;
1639                 }
1640                 ret = get_user(ch, ubuf++);
1641                 if (ret)
1642                         goto out;
1643                 read++;
1644                 cnt--;
1645         }
1646
1647         /* We either got finished input or we have to wait for another call. */
1648         if (isspace(ch) || !ch) {
1649                 parser->buffer[parser->idx] = 0;
1650                 parser->cont = false;
1651         } else if (parser->idx < parser->size - 1) {
1652                 parser->cont = true;
1653                 parser->buffer[parser->idx++] = ch;
1654                 /* Make sure the parsed string always terminates with '\0'. */
1655                 parser->buffer[parser->idx] = 0;
1656         } else {
1657                 ret = -EINVAL;
1658                 goto out;
1659         }
1660
1661         *ppos += read;
1662         ret = read;
1663
1664 out:
1665         return ret;
1666 }
1667
1668 /* TODO add a seq_buf_to_buffer() */
1669 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 {
1671         int len;
1672
1673         if (trace_seq_used(s) <= s->seq.readpos)
1674                 return -EBUSY;
1675
1676         len = trace_seq_used(s) - s->seq.readpos;
1677         if (cnt > len)
1678                 cnt = len;
1679         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1680
1681         s->seq.readpos += cnt;
1682         return cnt;
1683 }
1684
1685 unsigned long __read_mostly     tracing_thresh;
1686 static const struct file_operations tracing_max_lat_fops;
1687
1688 #ifdef LATENCY_FS_NOTIFY
1689
1690 static struct workqueue_struct *fsnotify_wq;
1691
1692 static void latency_fsnotify_workfn(struct work_struct *work)
1693 {
1694         struct trace_array *tr = container_of(work, struct trace_array,
1695                                               fsnotify_work);
1696         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1697 }
1698
1699 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1700 {
1701         struct trace_array *tr = container_of(iwork, struct trace_array,
1702                                               fsnotify_irqwork);
1703         queue_work(fsnotify_wq, &tr->fsnotify_work);
1704 }
1705
1706 static void trace_create_maxlat_file(struct trace_array *tr,
1707                                      struct dentry *d_tracer)
1708 {
1709         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1710         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1711         tr->d_max_latency = trace_create_file("tracing_max_latency",
1712                                               TRACE_MODE_WRITE,
1713                                               d_tracer, &tr->max_latency,
1714                                               &tracing_max_lat_fops);
1715 }
1716
1717 __init static int latency_fsnotify_init(void)
1718 {
1719         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1720                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1721         if (!fsnotify_wq) {
1722                 pr_err("Unable to allocate tr_max_lat_wq\n");
1723                 return -ENOMEM;
1724         }
1725         return 0;
1726 }
1727
1728 late_initcall_sync(latency_fsnotify_init);
1729
1730 void latency_fsnotify(struct trace_array *tr)
1731 {
1732         if (!fsnotify_wq)
1733                 return;
1734         /*
1735          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1736          * possible that we are called from __schedule() or do_idle(), which
1737          * could cause a deadlock.
1738          */
1739         irq_work_queue(&tr->fsnotify_irqwork);
1740 }
1741
1742 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1743         || defined(CONFIG_OSNOISE_TRACER)
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1747                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #else
1750 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1751 #endif
1752
1753 #ifdef CONFIG_TRACER_MAX_TRACE
1754 /*
1755  * Copy the new maximum trace into the separate maximum-trace
1756  * structure. (this way the maximum trace is permanently saved,
1757  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1758  */
1759 static void
1760 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1761 {
1762         struct array_buffer *trace_buf = &tr->array_buffer;
1763         struct array_buffer *max_buf = &tr->max_buffer;
1764         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1765         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1766
1767         max_buf->cpu = cpu;
1768         max_buf->time_start = data->preempt_timestamp;
1769
1770         max_data->saved_latency = tr->max_latency;
1771         max_data->critical_start = data->critical_start;
1772         max_data->critical_end = data->critical_end;
1773
1774         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1775         max_data->pid = tsk->pid;
1776         /*
1777          * If tsk == current, then use current_uid(), as that does not use
1778          * RCU. The irq tracer can be called out of RCU scope.
1779          */
1780         if (tsk == current)
1781                 max_data->uid = current_uid();
1782         else
1783                 max_data->uid = task_uid(tsk);
1784
1785         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1786         max_data->policy = tsk->policy;
1787         max_data->rt_priority = tsk->rt_priority;
1788
1789         /* record this tasks comm */
1790         tracing_record_cmdline(tsk);
1791         latency_fsnotify(tr);
1792 }
1793
1794 /**
1795  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1796  * @tr: tracer
1797  * @tsk: the task with the latency
1798  * @cpu: The cpu that initiated the trace.
1799  * @cond_data: User data associated with a conditional snapshot
1800  *
1801  * Flip the buffers between the @tr and the max_tr and record information
1802  * about which task was the cause of this latency.
1803  */
1804 void
1805 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1806               void *cond_data)
1807 {
1808         if (tr->stop_count)
1809                 return;
1810
1811         WARN_ON_ONCE(!irqs_disabled());
1812
1813         if (!tr->allocated_snapshot) {
1814                 /* Only the nop tracer should hit this when disabling */
1815                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1816                 return;
1817         }
1818
1819         arch_spin_lock(&tr->max_lock);
1820
1821         /* Inherit the recordable setting from array_buffer */
1822         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1823                 ring_buffer_record_on(tr->max_buffer.buffer);
1824         else
1825                 ring_buffer_record_off(tr->max_buffer.buffer);
1826
1827 #ifdef CONFIG_TRACER_SNAPSHOT
1828         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1829                 goto out_unlock;
1830 #endif
1831         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1832
1833         __update_max_tr(tr, tsk, cpu);
1834
1835  out_unlock:
1836         arch_spin_unlock(&tr->max_lock);
1837 }
1838
1839 /**
1840  * update_max_tr_single - only copy one trace over, and reset the rest
1841  * @tr: tracer
1842  * @tsk: task with the latency
1843  * @cpu: the cpu of the buffer to copy.
1844  *
1845  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1846  */
1847 void
1848 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1849 {
1850         int ret;
1851
1852         if (tr->stop_count)
1853                 return;
1854
1855         WARN_ON_ONCE(!irqs_disabled());
1856         if (!tr->allocated_snapshot) {
1857                 /* Only the nop tracer should hit this when disabling */
1858                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1859                 return;
1860         }
1861
1862         arch_spin_lock(&tr->max_lock);
1863
1864         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1865
1866         if (ret == -EBUSY) {
1867                 /*
1868                  * We failed to swap the buffer due to a commit taking
1869                  * place on this CPU. We fail to record, but we reset
1870                  * the max trace buffer (no one writes directly to it)
1871                  * and flag that it failed.
1872                  */
1873                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1874                         "Failed to swap buffers due to commit in progress\n");
1875         }
1876
1877         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1878
1879         __update_max_tr(tr, tsk, cpu);
1880         arch_spin_unlock(&tr->max_lock);
1881 }
1882 #endif /* CONFIG_TRACER_MAX_TRACE */
1883
1884 static int wait_on_pipe(struct trace_iterator *iter, int full)
1885 {
1886         /* Iterators are static, they should be filled or empty */
1887         if (trace_buffer_iter(iter, iter->cpu_file))
1888                 return 0;
1889
1890         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1891                                 full);
1892 }
1893
1894 #ifdef CONFIG_FTRACE_STARTUP_TEST
1895 static bool selftests_can_run;
1896
1897 struct trace_selftests {
1898         struct list_head                list;
1899         struct tracer                   *type;
1900 };
1901
1902 static LIST_HEAD(postponed_selftests);
1903
1904 static int save_selftest(struct tracer *type)
1905 {
1906         struct trace_selftests *selftest;
1907
1908         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1909         if (!selftest)
1910                 return -ENOMEM;
1911
1912         selftest->type = type;
1913         list_add(&selftest->list, &postponed_selftests);
1914         return 0;
1915 }
1916
1917 static int run_tracer_selftest(struct tracer *type)
1918 {
1919         struct trace_array *tr = &global_trace;
1920         struct tracer *saved_tracer = tr->current_trace;
1921         int ret;
1922
1923         if (!type->selftest || tracing_selftest_disabled)
1924                 return 0;
1925
1926         /*
1927          * If a tracer registers early in boot up (before scheduling is
1928          * initialized and such), then do not run its selftests yet.
1929          * Instead, run it a little later in the boot process.
1930          */
1931         if (!selftests_can_run)
1932                 return save_selftest(type);
1933
1934         if (!tracing_is_on()) {
1935                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1936                         type->name);
1937                 return 0;
1938         }
1939
1940         /*
1941          * Run a selftest on this tracer.
1942          * Here we reset the trace buffer, and set the current
1943          * tracer to be this tracer. The tracer can then run some
1944          * internal tracing to verify that everything is in order.
1945          * If we fail, we do not register this tracer.
1946          */
1947         tracing_reset_online_cpus(&tr->array_buffer);
1948
1949         tr->current_trace = type;
1950
1951 #ifdef CONFIG_TRACER_MAX_TRACE
1952         if (type->use_max_tr) {
1953                 /* If we expanded the buffers, make sure the max is expanded too */
1954                 if (ring_buffer_expanded)
1955                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1956                                            RING_BUFFER_ALL_CPUS);
1957                 tr->allocated_snapshot = true;
1958         }
1959 #endif
1960
1961         /* the test is responsible for initializing and enabling */
1962         pr_info("Testing tracer %s: ", type->name);
1963         ret = type->selftest(type, tr);
1964         /* the test is responsible for resetting too */
1965         tr->current_trace = saved_tracer;
1966         if (ret) {
1967                 printk(KERN_CONT "FAILED!\n");
1968                 /* Add the warning after printing 'FAILED' */
1969                 WARN_ON(1);
1970                 return -1;
1971         }
1972         /* Only reset on passing, to avoid touching corrupted buffers */
1973         tracing_reset_online_cpus(&tr->array_buffer);
1974
1975 #ifdef CONFIG_TRACER_MAX_TRACE
1976         if (type->use_max_tr) {
1977                 tr->allocated_snapshot = false;
1978
1979                 /* Shrink the max buffer again */
1980                 if (ring_buffer_expanded)
1981                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1982                                            RING_BUFFER_ALL_CPUS);
1983         }
1984 #endif
1985
1986         printk(KERN_CONT "PASSED\n");
1987         return 0;
1988 }
1989
1990 static __init int init_trace_selftests(void)
1991 {
1992         struct trace_selftests *p, *n;
1993         struct tracer *t, **last;
1994         int ret;
1995
1996         selftests_can_run = true;
1997
1998         mutex_lock(&trace_types_lock);
1999
2000         if (list_empty(&postponed_selftests))
2001                 goto out;
2002
2003         pr_info("Running postponed tracer tests:\n");
2004
2005         tracing_selftest_running = true;
2006         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2007                 /* This loop can take minutes when sanitizers are enabled, so
2008                  * lets make sure we allow RCU processing.
2009                  */
2010                 cond_resched();
2011                 ret = run_tracer_selftest(p->type);
2012                 /* If the test fails, then warn and remove from available_tracers */
2013                 if (ret < 0) {
2014                         WARN(1, "tracer: %s failed selftest, disabling\n",
2015                              p->type->name);
2016                         last = &trace_types;
2017                         for (t = trace_types; t; t = t->next) {
2018                                 if (t == p->type) {
2019                                         *last = t->next;
2020                                         break;
2021                                 }
2022                                 last = &t->next;
2023                         }
2024                 }
2025                 list_del(&p->list);
2026                 kfree(p);
2027         }
2028         tracing_selftest_running = false;
2029
2030  out:
2031         mutex_unlock(&trace_types_lock);
2032
2033         return 0;
2034 }
2035 core_initcall(init_trace_selftests);
2036 #else
2037 static inline int run_tracer_selftest(struct tracer *type)
2038 {
2039         return 0;
2040 }
2041 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2042
2043 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2044
2045 static void __init apply_trace_boot_options(void);
2046
2047 /**
2048  * register_tracer - register a tracer with the ftrace system.
2049  * @type: the plugin for the tracer
2050  *
2051  * Register a new plugin tracer.
2052  */
2053 int __init register_tracer(struct tracer *type)
2054 {
2055         struct tracer *t;
2056         int ret = 0;
2057
2058         if (!type->name) {
2059                 pr_info("Tracer must have a name\n");
2060                 return -1;
2061         }
2062
2063         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2064                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2065                 return -1;
2066         }
2067
2068         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2069                 pr_warn("Can not register tracer %s due to lockdown\n",
2070                            type->name);
2071                 return -EPERM;
2072         }
2073
2074         mutex_lock(&trace_types_lock);
2075
2076         tracing_selftest_running = true;
2077
2078         for (t = trace_types; t; t = t->next) {
2079                 if (strcmp(type->name, t->name) == 0) {
2080                         /* already found */
2081                         pr_info("Tracer %s already registered\n",
2082                                 type->name);
2083                         ret = -1;
2084                         goto out;
2085                 }
2086         }
2087
2088         if (!type->set_flag)
2089                 type->set_flag = &dummy_set_flag;
2090         if (!type->flags) {
2091                 /*allocate a dummy tracer_flags*/
2092                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2093                 if (!type->flags) {
2094                         ret = -ENOMEM;
2095                         goto out;
2096                 }
2097                 type->flags->val = 0;
2098                 type->flags->opts = dummy_tracer_opt;
2099         } else
2100                 if (!type->flags->opts)
2101                         type->flags->opts = dummy_tracer_opt;
2102
2103         /* store the tracer for __set_tracer_option */
2104         type->flags->trace = type;
2105
2106         ret = run_tracer_selftest(type);
2107         if (ret < 0)
2108                 goto out;
2109
2110         type->next = trace_types;
2111         trace_types = type;
2112         add_tracer_options(&global_trace, type);
2113
2114  out:
2115         tracing_selftest_running = false;
2116         mutex_unlock(&trace_types_lock);
2117
2118         if (ret || !default_bootup_tracer)
2119                 goto out_unlock;
2120
2121         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2122                 goto out_unlock;
2123
2124         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2125         /* Do we want this tracer to start on bootup? */
2126         tracing_set_tracer(&global_trace, type->name);
2127         default_bootup_tracer = NULL;
2128
2129         apply_trace_boot_options();
2130
2131         /* disable other selftests, since this will break it. */
2132         disable_tracing_selftest("running a tracer");
2133
2134  out_unlock:
2135         return ret;
2136 }
2137
2138 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2139 {
2140         struct trace_buffer *buffer = buf->buffer;
2141
2142         if (!buffer)
2143                 return;
2144
2145         ring_buffer_record_disable(buffer);
2146
2147         /* Make sure all commits have finished */
2148         synchronize_rcu();
2149         ring_buffer_reset_cpu(buffer, cpu);
2150
2151         ring_buffer_record_enable(buffer);
2152 }
2153
2154 void tracing_reset_online_cpus(struct array_buffer *buf)
2155 {
2156         struct trace_buffer *buffer = buf->buffer;
2157
2158         if (!buffer)
2159                 return;
2160
2161         ring_buffer_record_disable(buffer);
2162
2163         /* Make sure all commits have finished */
2164         synchronize_rcu();
2165
2166         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2167
2168         ring_buffer_reset_online_cpus(buffer);
2169
2170         ring_buffer_record_enable(buffer);
2171 }
2172
2173 /* Must have trace_types_lock held */
2174 void tracing_reset_all_online_cpus(void)
2175 {
2176         struct trace_array *tr;
2177
2178         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2179                 if (!tr->clear_trace)
2180                         continue;
2181                 tr->clear_trace = false;
2182                 tracing_reset_online_cpus(&tr->array_buffer);
2183 #ifdef CONFIG_TRACER_MAX_TRACE
2184                 tracing_reset_online_cpus(&tr->max_buffer);
2185 #endif
2186         }
2187 }
2188
2189 /*
2190  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2191  * is the tgid last observed corresponding to pid=i.
2192  */
2193 static int *tgid_map;
2194
2195 /* The maximum valid index into tgid_map. */
2196 static size_t tgid_map_max;
2197
2198 #define SAVED_CMDLINES_DEFAULT 128
2199 #define NO_CMDLINE_MAP UINT_MAX
2200 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2201 struct saved_cmdlines_buffer {
2202         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2203         unsigned *map_cmdline_to_pid;
2204         unsigned cmdline_num;
2205         int cmdline_idx;
2206         char *saved_cmdlines;
2207 };
2208 static struct saved_cmdlines_buffer *savedcmd;
2209
2210 static inline char *get_saved_cmdlines(int idx)
2211 {
2212         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2213 }
2214
2215 static inline void set_cmdline(int idx, const char *cmdline)
2216 {
2217         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2218 }
2219
2220 static int allocate_cmdlines_buffer(unsigned int val,
2221                                     struct saved_cmdlines_buffer *s)
2222 {
2223         s->map_cmdline_to_pid = kmalloc_array(val,
2224                                               sizeof(*s->map_cmdline_to_pid),
2225                                               GFP_KERNEL);
2226         if (!s->map_cmdline_to_pid)
2227                 return -ENOMEM;
2228
2229         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2230         if (!s->saved_cmdlines) {
2231                 kfree(s->map_cmdline_to_pid);
2232                 return -ENOMEM;
2233         }
2234
2235         s->cmdline_idx = 0;
2236         s->cmdline_num = val;
2237         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2238                sizeof(s->map_pid_to_cmdline));
2239         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2240                val * sizeof(*s->map_cmdline_to_pid));
2241
2242         return 0;
2243 }
2244
2245 static int trace_create_savedcmd(void)
2246 {
2247         int ret;
2248
2249         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2250         if (!savedcmd)
2251                 return -ENOMEM;
2252
2253         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2254         if (ret < 0) {
2255                 kfree(savedcmd);
2256                 savedcmd = NULL;
2257                 return -ENOMEM;
2258         }
2259
2260         return 0;
2261 }
2262
2263 int is_tracing_stopped(void)
2264 {
2265         return global_trace.stop_count;
2266 }
2267
2268 /**
2269  * tracing_start - quick start of the tracer
2270  *
2271  * If tracing is enabled but was stopped by tracing_stop,
2272  * this will start the tracer back up.
2273  */
2274 void tracing_start(void)
2275 {
2276         struct trace_buffer *buffer;
2277         unsigned long flags;
2278
2279         if (tracing_disabled)
2280                 return;
2281
2282         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2283         if (--global_trace.stop_count) {
2284                 if (global_trace.stop_count < 0) {
2285                         /* Someone screwed up their debugging */
2286                         WARN_ON_ONCE(1);
2287                         global_trace.stop_count = 0;
2288                 }
2289                 goto out;
2290         }
2291
2292         /* Prevent the buffers from switching */
2293         arch_spin_lock(&global_trace.max_lock);
2294
2295         buffer = global_trace.array_buffer.buffer;
2296         if (buffer)
2297                 ring_buffer_record_enable(buffer);
2298
2299 #ifdef CONFIG_TRACER_MAX_TRACE
2300         buffer = global_trace.max_buffer.buffer;
2301         if (buffer)
2302                 ring_buffer_record_enable(buffer);
2303 #endif
2304
2305         arch_spin_unlock(&global_trace.max_lock);
2306
2307  out:
2308         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2309 }
2310
2311 static void tracing_start_tr(struct trace_array *tr)
2312 {
2313         struct trace_buffer *buffer;
2314         unsigned long flags;
2315
2316         if (tracing_disabled)
2317                 return;
2318
2319         /* If global, we need to also start the max tracer */
2320         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2321                 return tracing_start();
2322
2323         raw_spin_lock_irqsave(&tr->start_lock, flags);
2324
2325         if (--tr->stop_count) {
2326                 if (tr->stop_count < 0) {
2327                         /* Someone screwed up their debugging */
2328                         WARN_ON_ONCE(1);
2329                         tr->stop_count = 0;
2330                 }
2331                 goto out;
2332         }
2333
2334         buffer = tr->array_buffer.buffer;
2335         if (buffer)
2336                 ring_buffer_record_enable(buffer);
2337
2338  out:
2339         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2340 }
2341
2342 /**
2343  * tracing_stop - quick stop of the tracer
2344  *
2345  * Light weight way to stop tracing. Use in conjunction with
2346  * tracing_start.
2347  */
2348 void tracing_stop(void)
2349 {
2350         struct trace_buffer *buffer;
2351         unsigned long flags;
2352
2353         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2354         if (global_trace.stop_count++)
2355                 goto out;
2356
2357         /* Prevent the buffers from switching */
2358         arch_spin_lock(&global_trace.max_lock);
2359
2360         buffer = global_trace.array_buffer.buffer;
2361         if (buffer)
2362                 ring_buffer_record_disable(buffer);
2363
2364 #ifdef CONFIG_TRACER_MAX_TRACE
2365         buffer = global_trace.max_buffer.buffer;
2366         if (buffer)
2367                 ring_buffer_record_disable(buffer);
2368 #endif
2369
2370         arch_spin_unlock(&global_trace.max_lock);
2371
2372  out:
2373         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2374 }
2375
2376 static void tracing_stop_tr(struct trace_array *tr)
2377 {
2378         struct trace_buffer *buffer;
2379         unsigned long flags;
2380
2381         /* If global, we need to also stop the max tracer */
2382         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2383                 return tracing_stop();
2384
2385         raw_spin_lock_irqsave(&tr->start_lock, flags);
2386         if (tr->stop_count++)
2387                 goto out;
2388
2389         buffer = tr->array_buffer.buffer;
2390         if (buffer)
2391                 ring_buffer_record_disable(buffer);
2392
2393  out:
2394         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2395 }
2396
2397 static int trace_save_cmdline(struct task_struct *tsk)
2398 {
2399         unsigned tpid, idx;
2400
2401         /* treat recording of idle task as a success */
2402         if (!tsk->pid)
2403                 return 1;
2404
2405         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2406
2407         /*
2408          * It's not the end of the world if we don't get
2409          * the lock, but we also don't want to spin
2410          * nor do we want to disable interrupts,
2411          * so if we miss here, then better luck next time.
2412          */
2413         if (!arch_spin_trylock(&trace_cmdline_lock))
2414                 return 0;
2415
2416         idx = savedcmd->map_pid_to_cmdline[tpid];
2417         if (idx == NO_CMDLINE_MAP) {
2418                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2419
2420                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2421                 savedcmd->cmdline_idx = idx;
2422         }
2423
2424         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2425         set_cmdline(idx, tsk->comm);
2426
2427         arch_spin_unlock(&trace_cmdline_lock);
2428
2429         return 1;
2430 }
2431
2432 static void __trace_find_cmdline(int pid, char comm[])
2433 {
2434         unsigned map;
2435         int tpid;
2436
2437         if (!pid) {
2438                 strcpy(comm, "<idle>");
2439                 return;
2440         }
2441
2442         if (WARN_ON_ONCE(pid < 0)) {
2443                 strcpy(comm, "<XXX>");
2444                 return;
2445         }
2446
2447         tpid = pid & (PID_MAX_DEFAULT - 1);
2448         map = savedcmd->map_pid_to_cmdline[tpid];
2449         if (map != NO_CMDLINE_MAP) {
2450                 tpid = savedcmd->map_cmdline_to_pid[map];
2451                 if (tpid == pid) {
2452                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2453                         return;
2454                 }
2455         }
2456         strcpy(comm, "<...>");
2457 }
2458
2459 void trace_find_cmdline(int pid, char comm[])
2460 {
2461         preempt_disable();
2462         arch_spin_lock(&trace_cmdline_lock);
2463
2464         __trace_find_cmdline(pid, comm);
2465
2466         arch_spin_unlock(&trace_cmdline_lock);
2467         preempt_enable();
2468 }
2469
2470 static int *trace_find_tgid_ptr(int pid)
2471 {
2472         /*
2473          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2474          * if we observe a non-NULL tgid_map then we also observe the correct
2475          * tgid_map_max.
2476          */
2477         int *map = smp_load_acquire(&tgid_map);
2478
2479         if (unlikely(!map || pid > tgid_map_max))
2480                 return NULL;
2481
2482         return &map[pid];
2483 }
2484
2485 int trace_find_tgid(int pid)
2486 {
2487         int *ptr = trace_find_tgid_ptr(pid);
2488
2489         return ptr ? *ptr : 0;
2490 }
2491
2492 static int trace_save_tgid(struct task_struct *tsk)
2493 {
2494         int *ptr;
2495
2496         /* treat recording of idle task as a success */
2497         if (!tsk->pid)
2498                 return 1;
2499
2500         ptr = trace_find_tgid_ptr(tsk->pid);
2501         if (!ptr)
2502                 return 0;
2503
2504         *ptr = tsk->tgid;
2505         return 1;
2506 }
2507
2508 static bool tracing_record_taskinfo_skip(int flags)
2509 {
2510         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2511                 return true;
2512         if (!__this_cpu_read(trace_taskinfo_save))
2513                 return true;
2514         return false;
2515 }
2516
2517 /**
2518  * tracing_record_taskinfo - record the task info of a task
2519  *
2520  * @task:  task to record
2521  * @flags: TRACE_RECORD_CMDLINE for recording comm
2522  *         TRACE_RECORD_TGID for recording tgid
2523  */
2524 void tracing_record_taskinfo(struct task_struct *task, int flags)
2525 {
2526         bool done;
2527
2528         if (tracing_record_taskinfo_skip(flags))
2529                 return;
2530
2531         /*
2532          * Record as much task information as possible. If some fail, continue
2533          * to try to record the others.
2534          */
2535         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2536         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2537
2538         /* If recording any information failed, retry again soon. */
2539         if (!done)
2540                 return;
2541
2542         __this_cpu_write(trace_taskinfo_save, false);
2543 }
2544
2545 /**
2546  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2547  *
2548  * @prev: previous task during sched_switch
2549  * @next: next task during sched_switch
2550  * @flags: TRACE_RECORD_CMDLINE for recording comm
2551  *         TRACE_RECORD_TGID for recording tgid
2552  */
2553 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2554                                           struct task_struct *next, int flags)
2555 {
2556         bool done;
2557
2558         if (tracing_record_taskinfo_skip(flags))
2559                 return;
2560
2561         /*
2562          * Record as much task information as possible. If some fail, continue
2563          * to try to record the others.
2564          */
2565         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2566         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2567         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2568         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2569
2570         /* If recording any information failed, retry again soon. */
2571         if (!done)
2572                 return;
2573
2574         __this_cpu_write(trace_taskinfo_save, false);
2575 }
2576
2577 /* Helpers to record a specific task information */
2578 void tracing_record_cmdline(struct task_struct *task)
2579 {
2580         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2581 }
2582
2583 void tracing_record_tgid(struct task_struct *task)
2584 {
2585         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2586 }
2587
2588 /*
2589  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2590  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2591  * simplifies those functions and keeps them in sync.
2592  */
2593 enum print_line_t trace_handle_return(struct trace_seq *s)
2594 {
2595         return trace_seq_has_overflowed(s) ?
2596                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2597 }
2598 EXPORT_SYMBOL_GPL(trace_handle_return);
2599
2600 static unsigned short migration_disable_value(void)
2601 {
2602 #if defined(CONFIG_SMP)
2603         return current->migration_disabled;
2604 #else
2605         return 0;
2606 #endif
2607 }
2608
2609 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2610 {
2611         unsigned int trace_flags = irqs_status;
2612         unsigned int pc;
2613
2614         pc = preempt_count();
2615
2616         if (pc & NMI_MASK)
2617                 trace_flags |= TRACE_FLAG_NMI;
2618         if (pc & HARDIRQ_MASK)
2619                 trace_flags |= TRACE_FLAG_HARDIRQ;
2620         if (in_serving_softirq())
2621                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2622         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2623                 trace_flags |= TRACE_FLAG_BH_OFF;
2624
2625         if (tif_need_resched())
2626                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2627         if (test_preempt_need_resched())
2628                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2629         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2630                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2631 }
2632
2633 struct ring_buffer_event *
2634 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2635                           int type,
2636                           unsigned long len,
2637                           unsigned int trace_ctx)
2638 {
2639         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2640 }
2641
2642 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2643 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2644 static int trace_buffered_event_ref;
2645
2646 /**
2647  * trace_buffered_event_enable - enable buffering events
2648  *
2649  * When events are being filtered, it is quicker to use a temporary
2650  * buffer to write the event data into if there's a likely chance
2651  * that it will not be committed. The discard of the ring buffer
2652  * is not as fast as committing, and is much slower than copying
2653  * a commit.
2654  *
2655  * When an event is to be filtered, allocate per cpu buffers to
2656  * write the event data into, and if the event is filtered and discarded
2657  * it is simply dropped, otherwise, the entire data is to be committed
2658  * in one shot.
2659  */
2660 void trace_buffered_event_enable(void)
2661 {
2662         struct ring_buffer_event *event;
2663         struct page *page;
2664         int cpu;
2665
2666         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2667
2668         if (trace_buffered_event_ref++)
2669                 return;
2670
2671         for_each_tracing_cpu(cpu) {
2672                 page = alloc_pages_node(cpu_to_node(cpu),
2673                                         GFP_KERNEL | __GFP_NORETRY, 0);
2674                 if (!page)
2675                         goto failed;
2676
2677                 event = page_address(page);
2678                 memset(event, 0, sizeof(*event));
2679
2680                 per_cpu(trace_buffered_event, cpu) = event;
2681
2682                 preempt_disable();
2683                 if (cpu == smp_processor_id() &&
2684                     __this_cpu_read(trace_buffered_event) !=
2685                     per_cpu(trace_buffered_event, cpu))
2686                         WARN_ON_ONCE(1);
2687                 preempt_enable();
2688         }
2689
2690         return;
2691  failed:
2692         trace_buffered_event_disable();
2693 }
2694
2695 static void enable_trace_buffered_event(void *data)
2696 {
2697         /* Probably not needed, but do it anyway */
2698         smp_rmb();
2699         this_cpu_dec(trace_buffered_event_cnt);
2700 }
2701
2702 static void disable_trace_buffered_event(void *data)
2703 {
2704         this_cpu_inc(trace_buffered_event_cnt);
2705 }
2706
2707 /**
2708  * trace_buffered_event_disable - disable buffering events
2709  *
2710  * When a filter is removed, it is faster to not use the buffered
2711  * events, and to commit directly into the ring buffer. Free up
2712  * the temp buffers when there are no more users. This requires
2713  * special synchronization with current events.
2714  */
2715 void trace_buffered_event_disable(void)
2716 {
2717         int cpu;
2718
2719         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2720
2721         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2722                 return;
2723
2724         if (--trace_buffered_event_ref)
2725                 return;
2726
2727         preempt_disable();
2728         /* For each CPU, set the buffer as used. */
2729         smp_call_function_many(tracing_buffer_mask,
2730                                disable_trace_buffered_event, NULL, 1);
2731         preempt_enable();
2732
2733         /* Wait for all current users to finish */
2734         synchronize_rcu();
2735
2736         for_each_tracing_cpu(cpu) {
2737                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2738                 per_cpu(trace_buffered_event, cpu) = NULL;
2739         }
2740         /*
2741          * Make sure trace_buffered_event is NULL before clearing
2742          * trace_buffered_event_cnt.
2743          */
2744         smp_wmb();
2745
2746         preempt_disable();
2747         /* Do the work on each cpu */
2748         smp_call_function_many(tracing_buffer_mask,
2749                                enable_trace_buffered_event, NULL, 1);
2750         preempt_enable();
2751 }
2752
2753 static struct trace_buffer *temp_buffer;
2754
2755 struct ring_buffer_event *
2756 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2757                           struct trace_event_file *trace_file,
2758                           int type, unsigned long len,
2759                           unsigned int trace_ctx)
2760 {
2761         struct ring_buffer_event *entry;
2762         struct trace_array *tr = trace_file->tr;
2763         int val;
2764
2765         *current_rb = tr->array_buffer.buffer;
2766
2767         if (!tr->no_filter_buffering_ref &&
2768             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2769                 preempt_disable_notrace();
2770                 /*
2771                  * Filtering is on, so try to use the per cpu buffer first.
2772                  * This buffer will simulate a ring_buffer_event,
2773                  * where the type_len is zero and the array[0] will
2774                  * hold the full length.
2775                  * (see include/linux/ring-buffer.h for details on
2776                  *  how the ring_buffer_event is structured).
2777                  *
2778                  * Using a temp buffer during filtering and copying it
2779                  * on a matched filter is quicker than writing directly
2780                  * into the ring buffer and then discarding it when
2781                  * it doesn't match. That is because the discard
2782                  * requires several atomic operations to get right.
2783                  * Copying on match and doing nothing on a failed match
2784                  * is still quicker than no copy on match, but having
2785                  * to discard out of the ring buffer on a failed match.
2786                  */
2787                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2788                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2789
2790                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2791
2792                         /*
2793                          * Preemption is disabled, but interrupts and NMIs
2794                          * can still come in now. If that happens after
2795                          * the above increment, then it will have to go
2796                          * back to the old method of allocating the event
2797                          * on the ring buffer, and if the filter fails, it
2798                          * will have to call ring_buffer_discard_commit()
2799                          * to remove it.
2800                          *
2801                          * Need to also check the unlikely case that the
2802                          * length is bigger than the temp buffer size.
2803                          * If that happens, then the reserve is pretty much
2804                          * guaranteed to fail, as the ring buffer currently
2805                          * only allows events less than a page. But that may
2806                          * change in the future, so let the ring buffer reserve
2807                          * handle the failure in that case.
2808                          */
2809                         if (val == 1 && likely(len <= max_len)) {
2810                                 trace_event_setup(entry, type, trace_ctx);
2811                                 entry->array[0] = len;
2812                                 /* Return with preemption disabled */
2813                                 return entry;
2814                         }
2815                         this_cpu_dec(trace_buffered_event_cnt);
2816                 }
2817                 /* __trace_buffer_lock_reserve() disables preemption */
2818                 preempt_enable_notrace();
2819         }
2820
2821         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2822                                             trace_ctx);
2823         /*
2824          * If tracing is off, but we have triggers enabled
2825          * we still need to look at the event data. Use the temp_buffer
2826          * to store the trace event for the trigger to use. It's recursive
2827          * safe and will not be recorded anywhere.
2828          */
2829         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2830                 *current_rb = temp_buffer;
2831                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2832                                                     trace_ctx);
2833         }
2834         return entry;
2835 }
2836 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2837
2838 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2839 static DEFINE_MUTEX(tracepoint_printk_mutex);
2840
2841 static void output_printk(struct trace_event_buffer *fbuffer)
2842 {
2843         struct trace_event_call *event_call;
2844         struct trace_event_file *file;
2845         struct trace_event *event;
2846         unsigned long flags;
2847         struct trace_iterator *iter = tracepoint_print_iter;
2848
2849         /* We should never get here if iter is NULL */
2850         if (WARN_ON_ONCE(!iter))
2851                 return;
2852
2853         event_call = fbuffer->trace_file->event_call;
2854         if (!event_call || !event_call->event.funcs ||
2855             !event_call->event.funcs->trace)
2856                 return;
2857
2858         file = fbuffer->trace_file;
2859         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2860             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2861              !filter_match_preds(file->filter, fbuffer->entry)))
2862                 return;
2863
2864         event = &fbuffer->trace_file->event_call->event;
2865
2866         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2867         trace_seq_init(&iter->seq);
2868         iter->ent = fbuffer->entry;
2869         event_call->event.funcs->trace(iter, 0, event);
2870         trace_seq_putc(&iter->seq, 0);
2871         printk("%s", iter->seq.buffer);
2872
2873         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2874 }
2875
2876 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2877                              void *buffer, size_t *lenp,
2878                              loff_t *ppos)
2879 {
2880         int save_tracepoint_printk;
2881         int ret;
2882
2883         mutex_lock(&tracepoint_printk_mutex);
2884         save_tracepoint_printk = tracepoint_printk;
2885
2886         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2887
2888         /*
2889          * This will force exiting early, as tracepoint_printk
2890          * is always zero when tracepoint_printk_iter is not allocated
2891          */
2892         if (!tracepoint_print_iter)
2893                 tracepoint_printk = 0;
2894
2895         if (save_tracepoint_printk == tracepoint_printk)
2896                 goto out;
2897
2898         if (tracepoint_printk)
2899                 static_key_enable(&tracepoint_printk_key.key);
2900         else
2901                 static_key_disable(&tracepoint_printk_key.key);
2902
2903  out:
2904         mutex_unlock(&tracepoint_printk_mutex);
2905
2906         return ret;
2907 }
2908
2909 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2910 {
2911         enum event_trigger_type tt = ETT_NONE;
2912         struct trace_event_file *file = fbuffer->trace_file;
2913
2914         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2915                         fbuffer->entry, &tt))
2916                 goto discard;
2917
2918         if (static_key_false(&tracepoint_printk_key.key))
2919                 output_printk(fbuffer);
2920
2921         if (static_branch_unlikely(&trace_event_exports_enabled))
2922                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2923
2924         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2925                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2926
2927 discard:
2928         if (tt)
2929                 event_triggers_post_call(file, tt);
2930
2931 }
2932 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2933
2934 /*
2935  * Skip 3:
2936  *
2937  *   trace_buffer_unlock_commit_regs()
2938  *   trace_event_buffer_commit()
2939  *   trace_event_raw_event_xxx()
2940  */
2941 # define STACK_SKIP 3
2942
2943 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2944                                      struct trace_buffer *buffer,
2945                                      struct ring_buffer_event *event,
2946                                      unsigned int trace_ctx,
2947                                      struct pt_regs *regs)
2948 {
2949         __buffer_unlock_commit(buffer, event);
2950
2951         /*
2952          * If regs is not set, then skip the necessary functions.
2953          * Note, we can still get here via blktrace, wakeup tracer
2954          * and mmiotrace, but that's ok if they lose a function or
2955          * two. They are not that meaningful.
2956          */
2957         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2958         ftrace_trace_userstack(tr, buffer, trace_ctx);
2959 }
2960
2961 /*
2962  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2963  */
2964 void
2965 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2966                                    struct ring_buffer_event *event)
2967 {
2968         __buffer_unlock_commit(buffer, event);
2969 }
2970
2971 void
2972 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2973                parent_ip, unsigned int trace_ctx)
2974 {
2975         struct trace_event_call *call = &event_function;
2976         struct trace_buffer *buffer = tr->array_buffer.buffer;
2977         struct ring_buffer_event *event;
2978         struct ftrace_entry *entry;
2979
2980         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2981                                             trace_ctx);
2982         if (!event)
2983                 return;
2984         entry   = ring_buffer_event_data(event);
2985         entry->ip                       = ip;
2986         entry->parent_ip                = parent_ip;
2987
2988         if (!call_filter_check_discard(call, entry, buffer, event)) {
2989                 if (static_branch_unlikely(&trace_function_exports_enabled))
2990                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2991                 __buffer_unlock_commit(buffer, event);
2992         }
2993 }
2994
2995 #ifdef CONFIG_STACKTRACE
2996
2997 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2998 #define FTRACE_KSTACK_NESTING   4
2999
3000 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3001
3002 struct ftrace_stack {
3003         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3004 };
3005
3006
3007 struct ftrace_stacks {
3008         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3009 };
3010
3011 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3012 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3013
3014 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3015                                  unsigned int trace_ctx,
3016                                  int skip, struct pt_regs *regs)
3017 {
3018         struct trace_event_call *call = &event_kernel_stack;
3019         struct ring_buffer_event *event;
3020         unsigned int size, nr_entries;
3021         struct ftrace_stack *fstack;
3022         struct stack_entry *entry;
3023         int stackidx;
3024
3025         /*
3026          * Add one, for this function and the call to save_stack_trace()
3027          * If regs is set, then these functions will not be in the way.
3028          */
3029 #ifndef CONFIG_UNWINDER_ORC
3030         if (!regs)
3031                 skip++;
3032 #endif
3033
3034         preempt_disable_notrace();
3035
3036         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3037
3038         /* This should never happen. If it does, yell once and skip */
3039         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3040                 goto out;
3041
3042         /*
3043          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3044          * interrupt will either see the value pre increment or post
3045          * increment. If the interrupt happens pre increment it will have
3046          * restored the counter when it returns.  We just need a barrier to
3047          * keep gcc from moving things around.
3048          */
3049         barrier();
3050
3051         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3052         size = ARRAY_SIZE(fstack->calls);
3053
3054         if (regs) {
3055                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3056                                                    size, skip);
3057         } else {
3058                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3059         }
3060
3061         size = nr_entries * sizeof(unsigned long);
3062         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3063                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3064                                     trace_ctx);
3065         if (!event)
3066                 goto out;
3067         entry = ring_buffer_event_data(event);
3068
3069         memcpy(&entry->caller, fstack->calls, size);
3070         entry->size = nr_entries;
3071
3072         if (!call_filter_check_discard(call, entry, buffer, event))
3073                 __buffer_unlock_commit(buffer, event);
3074
3075  out:
3076         /* Again, don't let gcc optimize things here */
3077         barrier();
3078         __this_cpu_dec(ftrace_stack_reserve);
3079         preempt_enable_notrace();
3080
3081 }
3082
3083 static inline void ftrace_trace_stack(struct trace_array *tr,
3084                                       struct trace_buffer *buffer,
3085                                       unsigned int trace_ctx,
3086                                       int skip, struct pt_regs *regs)
3087 {
3088         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3089                 return;
3090
3091         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3092 }
3093
3094 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3095                    int skip)
3096 {
3097         struct trace_buffer *buffer = tr->array_buffer.buffer;
3098
3099         if (rcu_is_watching()) {
3100                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3101                 return;
3102         }
3103
3104         /*
3105          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3106          * but if the above rcu_is_watching() failed, then the NMI
3107          * triggered someplace critical, and rcu_irq_enter() should
3108          * not be called from NMI.
3109          */
3110         if (unlikely(in_nmi()))
3111                 return;
3112
3113         rcu_irq_enter_irqson();
3114         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3115         rcu_irq_exit_irqson();
3116 }
3117
3118 /**
3119  * trace_dump_stack - record a stack back trace in the trace buffer
3120  * @skip: Number of functions to skip (helper handlers)
3121  */
3122 void trace_dump_stack(int skip)
3123 {
3124         if (tracing_disabled || tracing_selftest_running)
3125                 return;
3126
3127 #ifndef CONFIG_UNWINDER_ORC
3128         /* Skip 1 to skip this function. */
3129         skip++;
3130 #endif
3131         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3132                              tracing_gen_ctx(), skip, NULL);
3133 }
3134 EXPORT_SYMBOL_GPL(trace_dump_stack);
3135
3136 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3137 static DEFINE_PER_CPU(int, user_stack_count);
3138
3139 static void
3140 ftrace_trace_userstack(struct trace_array *tr,
3141                        struct trace_buffer *buffer, unsigned int trace_ctx)
3142 {
3143         struct trace_event_call *call = &event_user_stack;
3144         struct ring_buffer_event *event;
3145         struct userstack_entry *entry;
3146
3147         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3148                 return;
3149
3150         /*
3151          * NMIs can not handle page faults, even with fix ups.
3152          * The save user stack can (and often does) fault.
3153          */
3154         if (unlikely(in_nmi()))
3155                 return;
3156
3157         /*
3158          * prevent recursion, since the user stack tracing may
3159          * trigger other kernel events.
3160          */
3161         preempt_disable();
3162         if (__this_cpu_read(user_stack_count))
3163                 goto out;
3164
3165         __this_cpu_inc(user_stack_count);
3166
3167         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3168                                             sizeof(*entry), trace_ctx);
3169         if (!event)
3170                 goto out_drop_count;
3171         entry   = ring_buffer_event_data(event);
3172
3173         entry->tgid             = current->tgid;
3174         memset(&entry->caller, 0, sizeof(entry->caller));
3175
3176         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3177         if (!call_filter_check_discard(call, entry, buffer, event))
3178                 __buffer_unlock_commit(buffer, event);
3179
3180  out_drop_count:
3181         __this_cpu_dec(user_stack_count);
3182  out:
3183         preempt_enable();
3184 }
3185 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3186 static void ftrace_trace_userstack(struct trace_array *tr,
3187                                    struct trace_buffer *buffer,
3188                                    unsigned int trace_ctx)
3189 {
3190 }
3191 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3192
3193 #endif /* CONFIG_STACKTRACE */
3194
3195 static inline void
3196 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3197                           unsigned long long delta)
3198 {
3199         entry->bottom_delta_ts = delta & U32_MAX;
3200         entry->top_delta_ts = (delta >> 32);
3201 }
3202
3203 void trace_last_func_repeats(struct trace_array *tr,
3204                              struct trace_func_repeats *last_info,
3205                              unsigned int trace_ctx)
3206 {
3207         struct trace_buffer *buffer = tr->array_buffer.buffer;
3208         struct func_repeats_entry *entry;
3209         struct ring_buffer_event *event;
3210         u64 delta;
3211
3212         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3213                                             sizeof(*entry), trace_ctx);
3214         if (!event)
3215                 return;
3216
3217         delta = ring_buffer_event_time_stamp(buffer, event) -
3218                 last_info->ts_last_call;
3219
3220         entry = ring_buffer_event_data(event);
3221         entry->ip = last_info->ip;
3222         entry->parent_ip = last_info->parent_ip;
3223         entry->count = last_info->count;
3224         func_repeats_set_delta_ts(entry, delta);
3225
3226         __buffer_unlock_commit(buffer, event);
3227 }
3228
3229 /* created for use with alloc_percpu */
3230 struct trace_buffer_struct {
3231         int nesting;
3232         char buffer[4][TRACE_BUF_SIZE];
3233 };
3234
3235 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3236
3237 /*
3238  * This allows for lockless recording.  If we're nested too deeply, then
3239  * this returns NULL.
3240  */
3241 static char *get_trace_buf(void)
3242 {
3243         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3244
3245         if (!trace_percpu_buffer || buffer->nesting >= 4)
3246                 return NULL;
3247
3248         buffer->nesting++;
3249
3250         /* Interrupts must see nesting incremented before we use the buffer */
3251         barrier();
3252         return &buffer->buffer[buffer->nesting - 1][0];
3253 }
3254
3255 static void put_trace_buf(void)
3256 {
3257         /* Don't let the decrement of nesting leak before this */
3258         barrier();
3259         this_cpu_dec(trace_percpu_buffer->nesting);
3260 }
3261
3262 static int alloc_percpu_trace_buffer(void)
3263 {
3264         struct trace_buffer_struct __percpu *buffers;
3265
3266         if (trace_percpu_buffer)
3267                 return 0;
3268
3269         buffers = alloc_percpu(struct trace_buffer_struct);
3270         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3271                 return -ENOMEM;
3272
3273         trace_percpu_buffer = buffers;
3274         return 0;
3275 }
3276
3277 static int buffers_allocated;
3278
3279 void trace_printk_init_buffers(void)
3280 {
3281         if (buffers_allocated)
3282                 return;
3283
3284         if (alloc_percpu_trace_buffer())
3285                 return;
3286
3287         /* trace_printk() is for debug use only. Don't use it in production. */
3288
3289         pr_warn("\n");
3290         pr_warn("**********************************************************\n");
3291         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3292         pr_warn("**                                                      **\n");
3293         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3294         pr_warn("**                                                      **\n");
3295         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3296         pr_warn("** unsafe for production use.                           **\n");
3297         pr_warn("**                                                      **\n");
3298         pr_warn("** If you see this message and you are not debugging    **\n");
3299         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3300         pr_warn("**                                                      **\n");
3301         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3302         pr_warn("**********************************************************\n");
3303
3304         /* Expand the buffers to set size */
3305         tracing_update_buffers();
3306
3307         buffers_allocated = 1;
3308
3309         /*
3310          * trace_printk_init_buffers() can be called by modules.
3311          * If that happens, then we need to start cmdline recording
3312          * directly here. If the global_trace.buffer is already
3313          * allocated here, then this was called by module code.
3314          */
3315         if (global_trace.array_buffer.buffer)
3316                 tracing_start_cmdline_record();
3317 }
3318 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3319
3320 void trace_printk_start_comm(void)
3321 {
3322         /* Start tracing comms if trace printk is set */
3323         if (!buffers_allocated)
3324                 return;
3325         tracing_start_cmdline_record();
3326 }
3327
3328 static void trace_printk_start_stop_comm(int enabled)
3329 {
3330         if (!buffers_allocated)
3331                 return;
3332
3333         if (enabled)
3334                 tracing_start_cmdline_record();
3335         else
3336                 tracing_stop_cmdline_record();
3337 }
3338
3339 /**
3340  * trace_vbprintk - write binary msg to tracing buffer
3341  * @ip:    The address of the caller
3342  * @fmt:   The string format to write to the buffer
3343  * @args:  Arguments for @fmt
3344  */
3345 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3346 {
3347         struct trace_event_call *call = &event_bprint;
3348         struct ring_buffer_event *event;
3349         struct trace_buffer *buffer;
3350         struct trace_array *tr = &global_trace;
3351         struct bprint_entry *entry;
3352         unsigned int trace_ctx;
3353         char *tbuffer;
3354         int len = 0, size;
3355
3356         if (unlikely(tracing_selftest_running || tracing_disabled))
3357                 return 0;
3358
3359         /* Don't pollute graph traces with trace_vprintk internals */
3360         pause_graph_tracing();
3361
3362         trace_ctx = tracing_gen_ctx();
3363         preempt_disable_notrace();
3364
3365         tbuffer = get_trace_buf();
3366         if (!tbuffer) {
3367                 len = 0;
3368                 goto out_nobuffer;
3369         }
3370
3371         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3372
3373         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3374                 goto out_put;
3375
3376         size = sizeof(*entry) + sizeof(u32) * len;
3377         buffer = tr->array_buffer.buffer;
3378         ring_buffer_nest_start(buffer);
3379         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3380                                             trace_ctx);
3381         if (!event)
3382                 goto out;
3383         entry = ring_buffer_event_data(event);
3384         entry->ip                       = ip;
3385         entry->fmt                      = fmt;
3386
3387         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3388         if (!call_filter_check_discard(call, entry, buffer, event)) {
3389                 __buffer_unlock_commit(buffer, event);
3390                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3391         }
3392
3393 out:
3394         ring_buffer_nest_end(buffer);
3395 out_put:
3396         put_trace_buf();
3397
3398 out_nobuffer:
3399         preempt_enable_notrace();
3400         unpause_graph_tracing();
3401
3402         return len;
3403 }
3404 EXPORT_SYMBOL_GPL(trace_vbprintk);
3405
3406 __printf(3, 0)
3407 static int
3408 __trace_array_vprintk(struct trace_buffer *buffer,
3409                       unsigned long ip, const char *fmt, va_list args)
3410 {
3411         struct trace_event_call *call = &event_print;
3412         struct ring_buffer_event *event;
3413         int len = 0, size;
3414         struct print_entry *entry;
3415         unsigned int trace_ctx;
3416         char *tbuffer;
3417
3418         if (tracing_disabled || tracing_selftest_running)
3419                 return 0;
3420
3421         /* Don't pollute graph traces with trace_vprintk internals */
3422         pause_graph_tracing();
3423
3424         trace_ctx = tracing_gen_ctx();
3425         preempt_disable_notrace();
3426
3427
3428         tbuffer = get_trace_buf();
3429         if (!tbuffer) {
3430                 len = 0;
3431                 goto out_nobuffer;
3432         }
3433
3434         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3435
3436         size = sizeof(*entry) + len + 1;
3437         ring_buffer_nest_start(buffer);
3438         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3439                                             trace_ctx);
3440         if (!event)
3441                 goto out;
3442         entry = ring_buffer_event_data(event);
3443         entry->ip = ip;
3444
3445         memcpy(&entry->buf, tbuffer, len + 1);
3446         if (!call_filter_check_discard(call, entry, buffer, event)) {
3447                 __buffer_unlock_commit(buffer, event);
3448                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3449         }
3450
3451 out:
3452         ring_buffer_nest_end(buffer);
3453         put_trace_buf();
3454
3455 out_nobuffer:
3456         preempt_enable_notrace();
3457         unpause_graph_tracing();
3458
3459         return len;
3460 }
3461
3462 __printf(3, 0)
3463 int trace_array_vprintk(struct trace_array *tr,
3464                         unsigned long ip, const char *fmt, va_list args)
3465 {
3466         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3467 }
3468
3469 /**
3470  * trace_array_printk - Print a message to a specific instance
3471  * @tr: The instance trace_array descriptor
3472  * @ip: The instruction pointer that this is called from.
3473  * @fmt: The format to print (printf format)
3474  *
3475  * If a subsystem sets up its own instance, they have the right to
3476  * printk strings into their tracing instance buffer using this
3477  * function. Note, this function will not write into the top level
3478  * buffer (use trace_printk() for that), as writing into the top level
3479  * buffer should only have events that can be individually disabled.
3480  * trace_printk() is only used for debugging a kernel, and should not
3481  * be ever incorporated in normal use.
3482  *
3483  * trace_array_printk() can be used, as it will not add noise to the
3484  * top level tracing buffer.
3485  *
3486  * Note, trace_array_init_printk() must be called on @tr before this
3487  * can be used.
3488  */
3489 __printf(3, 0)
3490 int trace_array_printk(struct trace_array *tr,
3491                        unsigned long ip, const char *fmt, ...)
3492 {
3493         int ret;
3494         va_list ap;
3495
3496         if (!tr)
3497                 return -ENOENT;
3498
3499         /* This is only allowed for created instances */
3500         if (tr == &global_trace)
3501                 return 0;
3502
3503         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3504                 return 0;
3505
3506         va_start(ap, fmt);
3507         ret = trace_array_vprintk(tr, ip, fmt, ap);
3508         va_end(ap);
3509         return ret;
3510 }
3511 EXPORT_SYMBOL_GPL(trace_array_printk);
3512
3513 /**
3514  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3515  * @tr: The trace array to initialize the buffers for
3516  *
3517  * As trace_array_printk() only writes into instances, they are OK to
3518  * have in the kernel (unlike trace_printk()). This needs to be called
3519  * before trace_array_printk() can be used on a trace_array.
3520  */
3521 int trace_array_init_printk(struct trace_array *tr)
3522 {
3523         if (!tr)
3524                 return -ENOENT;
3525
3526         /* This is only allowed for created instances */
3527         if (tr == &global_trace)
3528                 return -EINVAL;
3529
3530         return alloc_percpu_trace_buffer();
3531 }
3532 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3533
3534 __printf(3, 4)
3535 int trace_array_printk_buf(struct trace_buffer *buffer,
3536                            unsigned long ip, const char *fmt, ...)
3537 {
3538         int ret;
3539         va_list ap;
3540
3541         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3542                 return 0;
3543
3544         va_start(ap, fmt);
3545         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3546         va_end(ap);
3547         return ret;
3548 }
3549
3550 __printf(2, 0)
3551 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3552 {
3553         return trace_array_vprintk(&global_trace, ip, fmt, args);
3554 }
3555 EXPORT_SYMBOL_GPL(trace_vprintk);
3556
3557 static void trace_iterator_increment(struct trace_iterator *iter)
3558 {
3559         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3560
3561         iter->idx++;
3562         if (buf_iter)
3563                 ring_buffer_iter_advance(buf_iter);
3564 }
3565
3566 static struct trace_entry *
3567 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3568                 unsigned long *lost_events)
3569 {
3570         struct ring_buffer_event *event;
3571         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3572
3573         if (buf_iter) {
3574                 event = ring_buffer_iter_peek(buf_iter, ts);
3575                 if (lost_events)
3576                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3577                                 (unsigned long)-1 : 0;
3578         } else {
3579                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3580                                          lost_events);
3581         }
3582
3583         if (event) {
3584                 iter->ent_size = ring_buffer_event_length(event);
3585                 return ring_buffer_event_data(event);
3586         }
3587         iter->ent_size = 0;
3588         return NULL;
3589 }
3590
3591 static struct trace_entry *
3592 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3593                   unsigned long *missing_events, u64 *ent_ts)
3594 {
3595         struct trace_buffer *buffer = iter->array_buffer->buffer;
3596         struct trace_entry *ent, *next = NULL;
3597         unsigned long lost_events = 0, next_lost = 0;
3598         int cpu_file = iter->cpu_file;
3599         u64 next_ts = 0, ts;
3600         int next_cpu = -1;
3601         int next_size = 0;
3602         int cpu;
3603
3604         /*
3605          * If we are in a per_cpu trace file, don't bother by iterating over
3606          * all cpu and peek directly.
3607          */
3608         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3609                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3610                         return NULL;
3611                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3612                 if (ent_cpu)
3613                         *ent_cpu = cpu_file;
3614
3615                 return ent;
3616         }
3617
3618         for_each_tracing_cpu(cpu) {
3619
3620                 if (ring_buffer_empty_cpu(buffer, cpu))
3621                         continue;
3622
3623                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3624
3625                 /*
3626                  * Pick the entry with the smallest timestamp:
3627                  */
3628                 if (ent && (!next || ts < next_ts)) {
3629                         next = ent;
3630                         next_cpu = cpu;
3631                         next_ts = ts;
3632                         next_lost = lost_events;
3633                         next_size = iter->ent_size;
3634                 }
3635         }
3636
3637         iter->ent_size = next_size;
3638
3639         if (ent_cpu)
3640                 *ent_cpu = next_cpu;
3641
3642         if (ent_ts)
3643                 *ent_ts = next_ts;
3644
3645         if (missing_events)
3646                 *missing_events = next_lost;
3647
3648         return next;
3649 }
3650
3651 #define STATIC_FMT_BUF_SIZE     128
3652 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3653
3654 static char *trace_iter_expand_format(struct trace_iterator *iter)
3655 {
3656         char *tmp;
3657
3658         /*
3659          * iter->tr is NULL when used with tp_printk, which makes
3660          * this get called where it is not safe to call krealloc().
3661          */
3662         if (!iter->tr || iter->fmt == static_fmt_buf)
3663                 return NULL;
3664
3665         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3666                        GFP_KERNEL);
3667         if (tmp) {
3668                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3669                 iter->fmt = tmp;
3670         }
3671
3672         return tmp;
3673 }
3674
3675 /* Returns true if the string is safe to dereference from an event */
3676 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3677                            bool star, int len)
3678 {
3679         unsigned long addr = (unsigned long)str;
3680         struct trace_event *trace_event;
3681         struct trace_event_call *event;
3682
3683         /* Ignore strings with no length */
3684         if (star && !len)
3685                 return true;
3686
3687         /* OK if part of the event data */
3688         if ((addr >= (unsigned long)iter->ent) &&
3689             (addr < (unsigned long)iter->ent + iter->ent_size))
3690                 return true;
3691
3692         /* OK if part of the temp seq buffer */
3693         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3694             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3695                 return true;
3696
3697         /* Core rodata can not be freed */
3698         if (is_kernel_rodata(addr))
3699                 return true;
3700
3701         if (trace_is_tracepoint_string(str))
3702                 return true;
3703
3704         /*
3705          * Now this could be a module event, referencing core module
3706          * data, which is OK.
3707          */
3708         if (!iter->ent)
3709                 return false;
3710
3711         trace_event = ftrace_find_event(iter->ent->type);
3712         if (!trace_event)
3713                 return false;
3714
3715         event = container_of(trace_event, struct trace_event_call, event);
3716         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3717                 return false;
3718
3719         /* Would rather have rodata, but this will suffice */
3720         if (within_module_core(addr, event->module))
3721                 return true;
3722
3723         return false;
3724 }
3725
3726 static const char *show_buffer(struct trace_seq *s)
3727 {
3728         struct seq_buf *seq = &s->seq;
3729
3730         seq_buf_terminate(seq);
3731
3732         return seq->buffer;
3733 }
3734
3735 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3736
3737 static int test_can_verify_check(const char *fmt, ...)
3738 {
3739         char buf[16];
3740         va_list ap;
3741         int ret;
3742
3743         /*
3744          * The verifier is dependent on vsnprintf() modifies the va_list
3745          * passed to it, where it is sent as a reference. Some architectures
3746          * (like x86_32) passes it by value, which means that vsnprintf()
3747          * does not modify the va_list passed to it, and the verifier
3748          * would then need to be able to understand all the values that
3749          * vsnprintf can use. If it is passed by value, then the verifier
3750          * is disabled.
3751          */
3752         va_start(ap, fmt);
3753         vsnprintf(buf, 16, "%d", ap);
3754         ret = va_arg(ap, int);
3755         va_end(ap);
3756
3757         return ret;
3758 }
3759
3760 static void test_can_verify(void)
3761 {
3762         if (!test_can_verify_check("%d %d", 0, 1)) {
3763                 pr_info("trace event string verifier disabled\n");
3764                 static_branch_inc(&trace_no_verify);
3765         }
3766 }
3767
3768 /**
3769  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3770  * @iter: The iterator that holds the seq buffer and the event being printed
3771  * @fmt: The format used to print the event
3772  * @ap: The va_list holding the data to print from @fmt.
3773  *
3774  * This writes the data into the @iter->seq buffer using the data from
3775  * @fmt and @ap. If the format has a %s, then the source of the string
3776  * is examined to make sure it is safe to print, otherwise it will
3777  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3778  * pointer.
3779  */
3780 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3781                          va_list ap)
3782 {
3783         const char *p = fmt;
3784         const char *str;
3785         int i, j;
3786
3787         if (WARN_ON_ONCE(!fmt))
3788                 return;
3789
3790         if (static_branch_unlikely(&trace_no_verify))
3791                 goto print;
3792
3793         /* Don't bother checking when doing a ftrace_dump() */
3794         if (iter->fmt == static_fmt_buf)
3795                 goto print;
3796
3797         while (*p) {
3798                 bool star = false;
3799                 int len = 0;
3800
3801                 j = 0;
3802
3803                 /* We only care about %s and variants */
3804                 for (i = 0; p[i]; i++) {
3805                         if (i + 1 >= iter->fmt_size) {
3806                                 /*
3807                                  * If we can't expand the copy buffer,
3808                                  * just print it.
3809                                  */
3810                                 if (!trace_iter_expand_format(iter))
3811                                         goto print;
3812                         }
3813
3814                         if (p[i] == '\\' && p[i+1]) {
3815                                 i++;
3816                                 continue;
3817                         }
3818                         if (p[i] == '%') {
3819                                 /* Need to test cases like %08.*s */
3820                                 for (j = 1; p[i+j]; j++) {
3821                                         if (isdigit(p[i+j]) ||
3822                                             p[i+j] == '.')
3823                                                 continue;
3824                                         if (p[i+j] == '*') {
3825                                                 star = true;
3826                                                 continue;
3827                                         }
3828                                         break;
3829                                 }
3830                                 if (p[i+j] == 's')
3831                                         break;
3832                                 star = false;
3833                         }
3834                         j = 0;
3835                 }
3836                 /* If no %s found then just print normally */
3837                 if (!p[i])
3838                         break;
3839
3840                 /* Copy up to the %s, and print that */
3841                 strncpy(iter->fmt, p, i);
3842                 iter->fmt[i] = '\0';
3843                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3844
3845                 /*
3846                  * If iter->seq is full, the above call no longer guarantees
3847                  * that ap is in sync with fmt processing, and further calls
3848                  * to va_arg() can return wrong positional arguments.
3849                  *
3850                  * Ensure that ap is no longer used in this case.
3851                  */
3852                 if (iter->seq.full) {
3853                         p = "";
3854                         break;
3855                 }
3856
3857                 if (star)
3858                         len = va_arg(ap, int);
3859
3860                 /* The ap now points to the string data of the %s */
3861                 str = va_arg(ap, const char *);
3862
3863                 /*
3864                  * If you hit this warning, it is likely that the
3865                  * trace event in question used %s on a string that
3866                  * was saved at the time of the event, but may not be
3867                  * around when the trace is read. Use __string(),
3868                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3869                  * instead. See samples/trace_events/trace-events-sample.h
3870                  * for reference.
3871                  */
3872                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3873                               "fmt: '%s' current_buffer: '%s'",
3874                               fmt, show_buffer(&iter->seq))) {
3875                         int ret;
3876
3877                         /* Try to safely read the string */
3878                         if (star) {
3879                                 if (len + 1 > iter->fmt_size)
3880                                         len = iter->fmt_size - 1;
3881                                 if (len < 0)
3882                                         len = 0;
3883                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3884                                 iter->fmt[len] = 0;
3885                                 star = false;
3886                         } else {
3887                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3888                                                                   iter->fmt_size);
3889                         }
3890                         if (ret < 0)
3891                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3892                         else
3893                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3894                                                  str, iter->fmt);
3895                         str = "[UNSAFE-MEMORY]";
3896                         strcpy(iter->fmt, "%s");
3897                 } else {
3898                         strncpy(iter->fmt, p + i, j + 1);
3899                         iter->fmt[j+1] = '\0';
3900                 }
3901                 if (star)
3902                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3903                 else
3904                         trace_seq_printf(&iter->seq, iter->fmt, str);
3905
3906                 p += i + j + 1;
3907         }
3908  print:
3909         if (*p)
3910                 trace_seq_vprintf(&iter->seq, p, ap);
3911 }
3912
3913 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3914 {
3915         const char *p, *new_fmt;
3916         char *q;
3917
3918         if (WARN_ON_ONCE(!fmt))
3919                 return fmt;
3920
3921         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3922                 return fmt;
3923
3924         p = fmt;
3925         new_fmt = q = iter->fmt;
3926         while (*p) {
3927                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3928                         if (!trace_iter_expand_format(iter))
3929                                 return fmt;
3930
3931                         q += iter->fmt - new_fmt;
3932                         new_fmt = iter->fmt;
3933                 }
3934
3935                 *q++ = *p++;
3936
3937                 /* Replace %p with %px */
3938                 if (p[-1] == '%') {
3939                         if (p[0] == '%') {
3940                                 *q++ = *p++;
3941                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3942                                 *q++ = *p++;
3943                                 *q++ = 'x';
3944                         }
3945                 }
3946         }
3947         *q = '\0';
3948
3949         return new_fmt;
3950 }
3951
3952 #define STATIC_TEMP_BUF_SIZE    128
3953 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3954
3955 /* Find the next real entry, without updating the iterator itself */
3956 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3957                                           int *ent_cpu, u64 *ent_ts)
3958 {
3959         /* __find_next_entry will reset ent_size */
3960         int ent_size = iter->ent_size;
3961         struct trace_entry *entry;
3962
3963         /*
3964          * If called from ftrace_dump(), then the iter->temp buffer
3965          * will be the static_temp_buf and not created from kmalloc.
3966          * If the entry size is greater than the buffer, we can
3967          * not save it. Just return NULL in that case. This is only
3968          * used to add markers when two consecutive events' time
3969          * stamps have a large delta. See trace_print_lat_context()
3970          */
3971         if (iter->temp == static_temp_buf &&
3972             STATIC_TEMP_BUF_SIZE < ent_size)
3973                 return NULL;
3974
3975         /*
3976          * The __find_next_entry() may call peek_next_entry(), which may
3977          * call ring_buffer_peek() that may make the contents of iter->ent
3978          * undefined. Need to copy iter->ent now.
3979          */
3980         if (iter->ent && iter->ent != iter->temp) {
3981                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3982                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3983                         void *temp;
3984                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3985                         if (!temp)
3986                                 return NULL;
3987                         kfree(iter->temp);
3988                         iter->temp = temp;
3989                         iter->temp_size = iter->ent_size;
3990                 }
3991                 memcpy(iter->temp, iter->ent, iter->ent_size);
3992                 iter->ent = iter->temp;
3993         }
3994         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3995         /* Put back the original ent_size */
3996         iter->ent_size = ent_size;
3997
3998         return entry;
3999 }
4000
4001 /* Find the next real entry, and increment the iterator to the next entry */
4002 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4003 {
4004         iter->ent = __find_next_entry(iter, &iter->cpu,
4005                                       &iter->lost_events, &iter->ts);
4006
4007         if (iter->ent)
4008                 trace_iterator_increment(iter);
4009
4010         return iter->ent ? iter : NULL;
4011 }
4012
4013 static void trace_consume(struct trace_iterator *iter)
4014 {
4015         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4016                             &iter->lost_events);
4017 }
4018
4019 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4020 {
4021         struct trace_iterator *iter = m->private;
4022         int i = (int)*pos;
4023         void *ent;
4024
4025         WARN_ON_ONCE(iter->leftover);
4026
4027         (*pos)++;
4028
4029         /* can't go backwards */
4030         if (iter->idx > i)
4031                 return NULL;
4032
4033         if (iter->idx < 0)
4034                 ent = trace_find_next_entry_inc(iter);
4035         else
4036                 ent = iter;
4037
4038         while (ent && iter->idx < i)
4039                 ent = trace_find_next_entry_inc(iter);
4040
4041         iter->pos = *pos;
4042
4043         return ent;
4044 }
4045
4046 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4047 {
4048         struct ring_buffer_iter *buf_iter;
4049         unsigned long entries = 0;
4050         u64 ts;
4051
4052         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4053
4054         buf_iter = trace_buffer_iter(iter, cpu);
4055         if (!buf_iter)
4056                 return;
4057
4058         ring_buffer_iter_reset(buf_iter);
4059
4060         /*
4061          * We could have the case with the max latency tracers
4062          * that a reset never took place on a cpu. This is evident
4063          * by the timestamp being before the start of the buffer.
4064          */
4065         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4066                 if (ts >= iter->array_buffer->time_start)
4067                         break;
4068                 entries++;
4069                 ring_buffer_iter_advance(buf_iter);
4070         }
4071
4072         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4073 }
4074
4075 /*
4076  * The current tracer is copied to avoid a global locking
4077  * all around.
4078  */
4079 static void *s_start(struct seq_file *m, loff_t *pos)
4080 {
4081         struct trace_iterator *iter = m->private;
4082         struct trace_array *tr = iter->tr;
4083         int cpu_file = iter->cpu_file;
4084         void *p = NULL;
4085         loff_t l = 0;
4086         int cpu;
4087
4088         /*
4089          * copy the tracer to avoid using a global lock all around.
4090          * iter->trace is a copy of current_trace, the pointer to the
4091          * name may be used instead of a strcmp(), as iter->trace->name
4092          * will point to the same string as current_trace->name.
4093          */
4094         mutex_lock(&trace_types_lock);
4095         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4096                 *iter->trace = *tr->current_trace;
4097         mutex_unlock(&trace_types_lock);
4098
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100         if (iter->snapshot && iter->trace->use_max_tr)
4101                 return ERR_PTR(-EBUSY);
4102 #endif
4103
4104         if (*pos != iter->pos) {
4105                 iter->ent = NULL;
4106                 iter->cpu = 0;
4107                 iter->idx = -1;
4108
4109                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4110                         for_each_tracing_cpu(cpu)
4111                                 tracing_iter_reset(iter, cpu);
4112                 } else
4113                         tracing_iter_reset(iter, cpu_file);
4114
4115                 iter->leftover = 0;
4116                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4117                         ;
4118
4119         } else {
4120                 /*
4121                  * If we overflowed the seq_file before, then we want
4122                  * to just reuse the trace_seq buffer again.
4123                  */
4124                 if (iter->leftover)
4125                         p = iter;
4126                 else {
4127                         l = *pos - 1;
4128                         p = s_next(m, p, &l);
4129                 }
4130         }
4131
4132         trace_event_read_lock();
4133         trace_access_lock(cpu_file);
4134         return p;
4135 }
4136
4137 static void s_stop(struct seq_file *m, void *p)
4138 {
4139         struct trace_iterator *iter = m->private;
4140
4141 #ifdef CONFIG_TRACER_MAX_TRACE
4142         if (iter->snapshot && iter->trace->use_max_tr)
4143                 return;
4144 #endif
4145
4146         trace_access_unlock(iter->cpu_file);
4147         trace_event_read_unlock();
4148 }
4149
4150 static void
4151 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4152                       unsigned long *entries, int cpu)
4153 {
4154         unsigned long count;
4155
4156         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4157         /*
4158          * If this buffer has skipped entries, then we hold all
4159          * entries for the trace and we need to ignore the
4160          * ones before the time stamp.
4161          */
4162         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4163                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4164                 /* total is the same as the entries */
4165                 *total = count;
4166         } else
4167                 *total = count +
4168                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4169         *entries = count;
4170 }
4171
4172 static void
4173 get_total_entries(struct array_buffer *buf,
4174                   unsigned long *total, unsigned long *entries)
4175 {
4176         unsigned long t, e;
4177         int cpu;
4178
4179         *total = 0;
4180         *entries = 0;
4181
4182         for_each_tracing_cpu(cpu) {
4183                 get_total_entries_cpu(buf, &t, &e, cpu);
4184                 *total += t;
4185                 *entries += e;
4186         }
4187 }
4188
4189 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4190 {
4191         unsigned long total, entries;
4192
4193         if (!tr)
4194                 tr = &global_trace;
4195
4196         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4197
4198         return entries;
4199 }
4200
4201 unsigned long trace_total_entries(struct trace_array *tr)
4202 {
4203         unsigned long total, entries;
4204
4205         if (!tr)
4206                 tr = &global_trace;
4207
4208         get_total_entries(&tr->array_buffer, &total, &entries);
4209
4210         return entries;
4211 }
4212
4213 static void print_lat_help_header(struct seq_file *m)
4214 {
4215         seq_puts(m, "#                    _------=> CPU#            \n"
4216                     "#                   / _-----=> irqs-off/BH-disabled\n"
4217                     "#                  | / _----=> need-resched    \n"
4218                     "#                  || / _---=> hardirq/softirq \n"
4219                     "#                  ||| / _--=> preempt-depth   \n"
4220                     "#                  |||| / _-=> migrate-disable \n"
4221                     "#                  ||||| /     delay           \n"
4222                     "#  cmd     pid     |||||| time  |   caller     \n"
4223                     "#     \\   /        ||||||  \\    |    /       \n");
4224 }
4225
4226 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4227 {
4228         unsigned long total;
4229         unsigned long entries;
4230
4231         get_total_entries(buf, &total, &entries);
4232         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4233                    entries, total, num_online_cpus());
4234         seq_puts(m, "#\n");
4235 }
4236
4237 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4238                                    unsigned int flags)
4239 {
4240         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4241
4242         print_event_info(buf, m);
4243
4244         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4245         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4246 }
4247
4248 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4249                                        unsigned int flags)
4250 {
4251         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4252         const char *space = "            ";
4253         int prec = tgid ? 12 : 2;
4254
4255         print_event_info(buf, m);
4256
4257         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4258         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4259         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4260         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4261         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4262         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4263         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4264         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4265 }
4266
4267 void
4268 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4269 {
4270         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4271         struct array_buffer *buf = iter->array_buffer;
4272         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4273         struct tracer *type = iter->trace;
4274         unsigned long entries;
4275         unsigned long total;
4276         const char *name = "preemption";
4277
4278         name = type->name;
4279
4280         get_total_entries(buf, &total, &entries);
4281
4282         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4283                    name, UTS_RELEASE);
4284         seq_puts(m, "# -----------------------------------"
4285                  "---------------------------------\n");
4286         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4287                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4288                    nsecs_to_usecs(data->saved_latency),
4289                    entries,
4290                    total,
4291                    buf->cpu,
4292                    preempt_model_none()      ? "server" :
4293                    preempt_model_voluntary() ? "desktop" :
4294                    preempt_model_full()      ? "preempt" :
4295                    preempt_model_rt()        ? "preempt_rt" :
4296                    "unknown",
4297                    /* These are reserved for later use */
4298                    0, 0, 0, 0);
4299 #ifdef CONFIG_SMP
4300         seq_printf(m, " #P:%d)\n", num_online_cpus());
4301 #else
4302         seq_puts(m, ")\n");
4303 #endif
4304         seq_puts(m, "#    -----------------\n");
4305         seq_printf(m, "#    | task: %.16s-%d "
4306                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4307                    data->comm, data->pid,
4308                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4309                    data->policy, data->rt_priority);
4310         seq_puts(m, "#    -----------------\n");
4311
4312         if (data->critical_start) {
4313                 seq_puts(m, "#  => started at: ");
4314                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4315                 trace_print_seq(m, &iter->seq);
4316                 seq_puts(m, "\n#  => ended at:   ");
4317                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4318                 trace_print_seq(m, &iter->seq);
4319                 seq_puts(m, "\n#\n");
4320         }
4321
4322         seq_puts(m, "#\n");
4323 }
4324
4325 static void test_cpu_buff_start(struct trace_iterator *iter)
4326 {
4327         struct trace_seq *s = &iter->seq;
4328         struct trace_array *tr = iter->tr;
4329
4330         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4331                 return;
4332
4333         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4334                 return;
4335
4336         if (cpumask_available(iter->started) &&
4337             cpumask_test_cpu(iter->cpu, iter->started))
4338                 return;
4339
4340         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4341                 return;
4342
4343         if (cpumask_available(iter->started))
4344                 cpumask_set_cpu(iter->cpu, iter->started);
4345
4346         /* Don't print started cpu buffer for the first entry of the trace */
4347         if (iter->idx > 1)
4348                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4349                                 iter->cpu);
4350 }
4351
4352 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4353 {
4354         struct trace_array *tr = iter->tr;
4355         struct trace_seq *s = &iter->seq;
4356         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4357         struct trace_entry *entry;
4358         struct trace_event *event;
4359
4360         entry = iter->ent;
4361
4362         test_cpu_buff_start(iter);
4363
4364         event = ftrace_find_event(entry->type);
4365
4366         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4367                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4368                         trace_print_lat_context(iter);
4369                 else
4370                         trace_print_context(iter);
4371         }
4372
4373         if (trace_seq_has_overflowed(s))
4374                 return TRACE_TYPE_PARTIAL_LINE;
4375
4376         if (event)
4377                 return event->funcs->trace(iter, sym_flags, event);
4378
4379         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4380
4381         return trace_handle_return(s);
4382 }
4383
4384 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4385 {
4386         struct trace_array *tr = iter->tr;
4387         struct trace_seq *s = &iter->seq;
4388         struct trace_entry *entry;
4389         struct trace_event *event;
4390
4391         entry = iter->ent;
4392
4393         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4394                 trace_seq_printf(s, "%d %d %llu ",
4395                                  entry->pid, iter->cpu, iter->ts);
4396
4397         if (trace_seq_has_overflowed(s))
4398                 return TRACE_TYPE_PARTIAL_LINE;
4399
4400         event = ftrace_find_event(entry->type);
4401         if (event)
4402                 return event->funcs->raw(iter, 0, event);
4403
4404         trace_seq_printf(s, "%d ?\n", entry->type);
4405
4406         return trace_handle_return(s);
4407 }
4408
4409 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4410 {
4411         struct trace_array *tr = iter->tr;
4412         struct trace_seq *s = &iter->seq;
4413         unsigned char newline = '\n';
4414         struct trace_entry *entry;
4415         struct trace_event *event;
4416
4417         entry = iter->ent;
4418
4419         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4420                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4421                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4422                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4423                 if (trace_seq_has_overflowed(s))
4424                         return TRACE_TYPE_PARTIAL_LINE;
4425         }
4426
4427         event = ftrace_find_event(entry->type);
4428         if (event) {
4429                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4430                 if (ret != TRACE_TYPE_HANDLED)
4431                         return ret;
4432         }
4433
4434         SEQ_PUT_FIELD(s, newline);
4435
4436         return trace_handle_return(s);
4437 }
4438
4439 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4440 {
4441         struct trace_array *tr = iter->tr;
4442         struct trace_seq *s = &iter->seq;
4443         struct trace_entry *entry;
4444         struct trace_event *event;
4445
4446         entry = iter->ent;
4447
4448         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4449                 SEQ_PUT_FIELD(s, entry->pid);
4450                 SEQ_PUT_FIELD(s, iter->cpu);
4451                 SEQ_PUT_FIELD(s, iter->ts);
4452                 if (trace_seq_has_overflowed(s))
4453                         return TRACE_TYPE_PARTIAL_LINE;
4454         }
4455
4456         event = ftrace_find_event(entry->type);
4457         return event ? event->funcs->binary(iter, 0, event) :
4458                 TRACE_TYPE_HANDLED;
4459 }
4460
4461 int trace_empty(struct trace_iterator *iter)
4462 {
4463         struct ring_buffer_iter *buf_iter;
4464         int cpu;
4465
4466         /* If we are looking at one CPU buffer, only check that one */
4467         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4468                 cpu = iter->cpu_file;
4469                 buf_iter = trace_buffer_iter(iter, cpu);
4470                 if (buf_iter) {
4471                         if (!ring_buffer_iter_empty(buf_iter))
4472                                 return 0;
4473                 } else {
4474                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4475                                 return 0;
4476                 }
4477                 return 1;
4478         }
4479
4480         for_each_tracing_cpu(cpu) {
4481                 buf_iter = trace_buffer_iter(iter, cpu);
4482                 if (buf_iter) {
4483                         if (!ring_buffer_iter_empty(buf_iter))
4484                                 return 0;
4485                 } else {
4486                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4487                                 return 0;
4488                 }
4489         }
4490
4491         return 1;
4492 }
4493
4494 /*  Called with trace_event_read_lock() held. */
4495 enum print_line_t print_trace_line(struct trace_iterator *iter)
4496 {
4497         struct trace_array *tr = iter->tr;
4498         unsigned long trace_flags = tr->trace_flags;
4499         enum print_line_t ret;
4500
4501         if (iter->lost_events) {
4502                 if (iter->lost_events == (unsigned long)-1)
4503                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4504                                          iter->cpu);
4505                 else
4506                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4507                                          iter->cpu, iter->lost_events);
4508                 if (trace_seq_has_overflowed(&iter->seq))
4509                         return TRACE_TYPE_PARTIAL_LINE;
4510         }
4511
4512         if (iter->trace && iter->trace->print_line) {
4513                 ret = iter->trace->print_line(iter);
4514                 if (ret != TRACE_TYPE_UNHANDLED)
4515                         return ret;
4516         }
4517
4518         if (iter->ent->type == TRACE_BPUTS &&
4519                         trace_flags & TRACE_ITER_PRINTK &&
4520                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4521                 return trace_print_bputs_msg_only(iter);
4522
4523         if (iter->ent->type == TRACE_BPRINT &&
4524                         trace_flags & TRACE_ITER_PRINTK &&
4525                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4526                 return trace_print_bprintk_msg_only(iter);
4527
4528         if (iter->ent->type == TRACE_PRINT &&
4529                         trace_flags & TRACE_ITER_PRINTK &&
4530                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4531                 return trace_print_printk_msg_only(iter);
4532
4533         if (trace_flags & TRACE_ITER_BIN)
4534                 return print_bin_fmt(iter);
4535
4536         if (trace_flags & TRACE_ITER_HEX)
4537                 return print_hex_fmt(iter);
4538
4539         if (trace_flags & TRACE_ITER_RAW)
4540                 return print_raw_fmt(iter);
4541
4542         return print_trace_fmt(iter);
4543 }
4544
4545 void trace_latency_header(struct seq_file *m)
4546 {
4547         struct trace_iterator *iter = m->private;
4548         struct trace_array *tr = iter->tr;
4549
4550         /* print nothing if the buffers are empty */
4551         if (trace_empty(iter))
4552                 return;
4553
4554         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4555                 print_trace_header(m, iter);
4556
4557         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4558                 print_lat_help_header(m);
4559 }
4560
4561 void trace_default_header(struct seq_file *m)
4562 {
4563         struct trace_iterator *iter = m->private;
4564         struct trace_array *tr = iter->tr;
4565         unsigned long trace_flags = tr->trace_flags;
4566
4567         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4568                 return;
4569
4570         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4571                 /* print nothing if the buffers are empty */
4572                 if (trace_empty(iter))
4573                         return;
4574                 print_trace_header(m, iter);
4575                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4576                         print_lat_help_header(m);
4577         } else {
4578                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4579                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4580                                 print_func_help_header_irq(iter->array_buffer,
4581                                                            m, trace_flags);
4582                         else
4583                                 print_func_help_header(iter->array_buffer, m,
4584                                                        trace_flags);
4585                 }
4586         }
4587 }
4588
4589 static void test_ftrace_alive(struct seq_file *m)
4590 {
4591         if (!ftrace_is_dead())
4592                 return;
4593         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4594                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4595 }
4596
4597 #ifdef CONFIG_TRACER_MAX_TRACE
4598 static void show_snapshot_main_help(struct seq_file *m)
4599 {
4600         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4601                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4602                     "#                      Takes a snapshot of the main buffer.\n"
4603                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4604                     "#                      (Doesn't have to be '2' works with any number that\n"
4605                     "#                       is not a '0' or '1')\n");
4606 }
4607
4608 static void show_snapshot_percpu_help(struct seq_file *m)
4609 {
4610         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4611 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4612         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4613                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4614 #else
4615         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4616                     "#                     Must use main snapshot file to allocate.\n");
4617 #endif
4618         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4619                     "#                      (Doesn't have to be '2' works with any number that\n"
4620                     "#                       is not a '0' or '1')\n");
4621 }
4622
4623 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4624 {
4625         if (iter->tr->allocated_snapshot)
4626                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4627         else
4628                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4629
4630         seq_puts(m, "# Snapshot commands:\n");
4631         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4632                 show_snapshot_main_help(m);
4633         else
4634                 show_snapshot_percpu_help(m);
4635 }
4636 #else
4637 /* Should never be called */
4638 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4639 #endif
4640
4641 static int s_show(struct seq_file *m, void *v)
4642 {
4643         struct trace_iterator *iter = v;
4644         int ret;
4645
4646         if (iter->ent == NULL) {
4647                 if (iter->tr) {
4648                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4649                         seq_puts(m, "#\n");
4650                         test_ftrace_alive(m);
4651                 }
4652                 if (iter->snapshot && trace_empty(iter))
4653                         print_snapshot_help(m, iter);
4654                 else if (iter->trace && iter->trace->print_header)
4655                         iter->trace->print_header(m);
4656                 else
4657                         trace_default_header(m);
4658
4659         } else if (iter->leftover) {
4660                 /*
4661                  * If we filled the seq_file buffer earlier, we
4662                  * want to just show it now.
4663                  */
4664                 ret = trace_print_seq(m, &iter->seq);
4665
4666                 /* ret should this time be zero, but you never know */
4667                 iter->leftover = ret;
4668
4669         } else {
4670                 print_trace_line(iter);
4671                 ret = trace_print_seq(m, &iter->seq);
4672                 /*
4673                  * If we overflow the seq_file buffer, then it will
4674                  * ask us for this data again at start up.
4675                  * Use that instead.
4676                  *  ret is 0 if seq_file write succeeded.
4677                  *        -1 otherwise.
4678                  */
4679                 iter->leftover = ret;
4680         }
4681
4682         return 0;
4683 }
4684
4685 /*
4686  * Should be used after trace_array_get(), trace_types_lock
4687  * ensures that i_cdev was already initialized.
4688  */
4689 static inline int tracing_get_cpu(struct inode *inode)
4690 {
4691         if (inode->i_cdev) /* See trace_create_cpu_file() */
4692                 return (long)inode->i_cdev - 1;
4693         return RING_BUFFER_ALL_CPUS;
4694 }
4695
4696 static const struct seq_operations tracer_seq_ops = {
4697         .start          = s_start,
4698         .next           = s_next,
4699         .stop           = s_stop,
4700         .show           = s_show,
4701 };
4702
4703 static struct trace_iterator *
4704 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4705 {
4706         struct trace_array *tr = inode->i_private;
4707         struct trace_iterator *iter;
4708         int cpu;
4709
4710         if (tracing_disabled)
4711                 return ERR_PTR(-ENODEV);
4712
4713         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4714         if (!iter)
4715                 return ERR_PTR(-ENOMEM);
4716
4717         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4718                                     GFP_KERNEL);
4719         if (!iter->buffer_iter)
4720                 goto release;
4721
4722         /*
4723          * trace_find_next_entry() may need to save off iter->ent.
4724          * It will place it into the iter->temp buffer. As most
4725          * events are less than 128, allocate a buffer of that size.
4726          * If one is greater, then trace_find_next_entry() will
4727          * allocate a new buffer to adjust for the bigger iter->ent.
4728          * It's not critical if it fails to get allocated here.
4729          */
4730         iter->temp = kmalloc(128, GFP_KERNEL);
4731         if (iter->temp)
4732                 iter->temp_size = 128;
4733
4734         /*
4735          * trace_event_printf() may need to modify given format
4736          * string to replace %p with %px so that it shows real address
4737          * instead of hash value. However, that is only for the event
4738          * tracing, other tracer may not need. Defer the allocation
4739          * until it is needed.
4740          */
4741         iter->fmt = NULL;
4742         iter->fmt_size = 0;
4743
4744         /*
4745          * We make a copy of the current tracer to avoid concurrent
4746          * changes on it while we are reading.
4747          */
4748         mutex_lock(&trace_types_lock);
4749         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4750         if (!iter->trace)
4751                 goto fail;
4752
4753         *iter->trace = *tr->current_trace;
4754
4755         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4756                 goto fail;
4757
4758         iter->tr = tr;
4759
4760 #ifdef CONFIG_TRACER_MAX_TRACE
4761         /* Currently only the top directory has a snapshot */
4762         if (tr->current_trace->print_max || snapshot)
4763                 iter->array_buffer = &tr->max_buffer;
4764         else
4765 #endif
4766                 iter->array_buffer = &tr->array_buffer;
4767         iter->snapshot = snapshot;
4768         iter->pos = -1;
4769         iter->cpu_file = tracing_get_cpu(inode);
4770         mutex_init(&iter->mutex);
4771
4772         /* Notify the tracer early; before we stop tracing. */
4773         if (iter->trace->open)
4774                 iter->trace->open(iter);
4775
4776         /* Annotate start of buffers if we had overruns */
4777         if (ring_buffer_overruns(iter->array_buffer->buffer))
4778                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4779
4780         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4781         if (trace_clocks[tr->clock_id].in_ns)
4782                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4783
4784         /*
4785          * If pause-on-trace is enabled, then stop the trace while
4786          * dumping, unless this is the "snapshot" file
4787          */
4788         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4789                 tracing_stop_tr(tr);
4790
4791         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4792                 for_each_tracing_cpu(cpu) {
4793                         iter->buffer_iter[cpu] =
4794                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4795                                                          cpu, GFP_KERNEL);
4796                 }
4797                 ring_buffer_read_prepare_sync();
4798                 for_each_tracing_cpu(cpu) {
4799                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4800                         tracing_iter_reset(iter, cpu);
4801                 }
4802         } else {
4803                 cpu = iter->cpu_file;
4804                 iter->buffer_iter[cpu] =
4805                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4806                                                  cpu, GFP_KERNEL);
4807                 ring_buffer_read_prepare_sync();
4808                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4809                 tracing_iter_reset(iter, cpu);
4810         }
4811
4812         mutex_unlock(&trace_types_lock);
4813
4814         return iter;
4815
4816  fail:
4817         mutex_unlock(&trace_types_lock);
4818         kfree(iter->trace);
4819         kfree(iter->temp);
4820         kfree(iter->buffer_iter);
4821 release:
4822         seq_release_private(inode, file);
4823         return ERR_PTR(-ENOMEM);
4824 }
4825
4826 int tracing_open_generic(struct inode *inode, struct file *filp)
4827 {
4828         int ret;
4829
4830         ret = tracing_check_open_get_tr(NULL);
4831         if (ret)
4832                 return ret;
4833
4834         filp->private_data = inode->i_private;
4835         return 0;
4836 }
4837
4838 bool tracing_is_disabled(void)
4839 {
4840         return (tracing_disabled) ? true: false;
4841 }
4842
4843 /*
4844  * Open and update trace_array ref count.
4845  * Must have the current trace_array passed to it.
4846  */
4847 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4848 {
4849         struct trace_array *tr = inode->i_private;
4850         int ret;
4851
4852         ret = tracing_check_open_get_tr(tr);
4853         if (ret)
4854                 return ret;
4855
4856         filp->private_data = inode->i_private;
4857
4858         return 0;
4859 }
4860
4861 static int tracing_mark_open(struct inode *inode, struct file *filp)
4862 {
4863         stream_open(inode, filp);
4864         return tracing_open_generic_tr(inode, filp);
4865 }
4866
4867 static int tracing_release(struct inode *inode, struct file *file)
4868 {
4869         struct trace_array *tr = inode->i_private;
4870         struct seq_file *m = file->private_data;
4871         struct trace_iterator *iter;
4872         int cpu;
4873
4874         if (!(file->f_mode & FMODE_READ)) {
4875                 trace_array_put(tr);
4876                 return 0;
4877         }
4878
4879         /* Writes do not use seq_file */
4880         iter = m->private;
4881         mutex_lock(&trace_types_lock);
4882
4883         for_each_tracing_cpu(cpu) {
4884                 if (iter->buffer_iter[cpu])
4885                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4886         }
4887
4888         if (iter->trace && iter->trace->close)
4889                 iter->trace->close(iter);
4890
4891         if (!iter->snapshot && tr->stop_count)
4892                 /* reenable tracing if it was previously enabled */
4893                 tracing_start_tr(tr);
4894
4895         __trace_array_put(tr);
4896
4897         mutex_unlock(&trace_types_lock);
4898
4899         mutex_destroy(&iter->mutex);
4900         free_cpumask_var(iter->started);
4901         kfree(iter->fmt);
4902         kfree(iter->temp);
4903         kfree(iter->trace);
4904         kfree(iter->buffer_iter);
4905         seq_release_private(inode, file);
4906
4907         return 0;
4908 }
4909
4910 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4911 {
4912         struct trace_array *tr = inode->i_private;
4913
4914         trace_array_put(tr);
4915         return 0;
4916 }
4917
4918 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4919 {
4920         struct trace_array *tr = inode->i_private;
4921
4922         trace_array_put(tr);
4923
4924         return single_release(inode, file);
4925 }
4926
4927 static int tracing_open(struct inode *inode, struct file *file)
4928 {
4929         struct trace_array *tr = inode->i_private;
4930         struct trace_iterator *iter;
4931         int ret;
4932
4933         ret = tracing_check_open_get_tr(tr);
4934         if (ret)
4935                 return ret;
4936
4937         /* If this file was open for write, then erase contents */
4938         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4939                 int cpu = tracing_get_cpu(inode);
4940                 struct array_buffer *trace_buf = &tr->array_buffer;
4941
4942 #ifdef CONFIG_TRACER_MAX_TRACE
4943                 if (tr->current_trace->print_max)
4944                         trace_buf = &tr->max_buffer;
4945 #endif
4946
4947                 if (cpu == RING_BUFFER_ALL_CPUS)
4948                         tracing_reset_online_cpus(trace_buf);
4949                 else
4950                         tracing_reset_cpu(trace_buf, cpu);
4951         }
4952
4953         if (file->f_mode & FMODE_READ) {
4954                 iter = __tracing_open(inode, file, false);
4955                 if (IS_ERR(iter))
4956                         ret = PTR_ERR(iter);
4957                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4958                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4959         }
4960
4961         if (ret < 0)
4962                 trace_array_put(tr);
4963
4964         return ret;
4965 }
4966
4967 /*
4968  * Some tracers are not suitable for instance buffers.
4969  * A tracer is always available for the global array (toplevel)
4970  * or if it explicitly states that it is.
4971  */
4972 static bool
4973 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4974 {
4975         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4976 }
4977
4978 /* Find the next tracer that this trace array may use */
4979 static struct tracer *
4980 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4981 {
4982         while (t && !trace_ok_for_array(t, tr))
4983                 t = t->next;
4984
4985         return t;
4986 }
4987
4988 static void *
4989 t_next(struct seq_file *m, void *v, loff_t *pos)
4990 {
4991         struct trace_array *tr = m->private;
4992         struct tracer *t = v;
4993
4994         (*pos)++;
4995
4996         if (t)
4997                 t = get_tracer_for_array(tr, t->next);
4998
4999         return t;
5000 }
5001
5002 static void *t_start(struct seq_file *m, loff_t *pos)
5003 {
5004         struct trace_array *tr = m->private;
5005         struct tracer *t;
5006         loff_t l = 0;
5007
5008         mutex_lock(&trace_types_lock);
5009
5010         t = get_tracer_for_array(tr, trace_types);
5011         for (; t && l < *pos; t = t_next(m, t, &l))
5012                         ;
5013
5014         return t;
5015 }
5016
5017 static void t_stop(struct seq_file *m, void *p)
5018 {
5019         mutex_unlock(&trace_types_lock);
5020 }
5021
5022 static int t_show(struct seq_file *m, void *v)
5023 {
5024         struct tracer *t = v;
5025
5026         if (!t)
5027                 return 0;
5028
5029         seq_puts(m, t->name);
5030         if (t->next)
5031                 seq_putc(m, ' ');
5032         else
5033                 seq_putc(m, '\n');
5034
5035         return 0;
5036 }
5037
5038 static const struct seq_operations show_traces_seq_ops = {
5039         .start          = t_start,
5040         .next           = t_next,
5041         .stop           = t_stop,
5042         .show           = t_show,
5043 };
5044
5045 static int show_traces_open(struct inode *inode, struct file *file)
5046 {
5047         struct trace_array *tr = inode->i_private;
5048         struct seq_file *m;
5049         int ret;
5050
5051         ret = tracing_check_open_get_tr(tr);
5052         if (ret)
5053                 return ret;
5054
5055         ret = seq_open(file, &show_traces_seq_ops);
5056         if (ret) {
5057                 trace_array_put(tr);
5058                 return ret;
5059         }
5060
5061         m = file->private_data;
5062         m->private = tr;
5063
5064         return 0;
5065 }
5066
5067 static int show_traces_release(struct inode *inode, struct file *file)
5068 {
5069         struct trace_array *tr = inode->i_private;
5070
5071         trace_array_put(tr);
5072         return seq_release(inode, file);
5073 }
5074
5075 static ssize_t
5076 tracing_write_stub(struct file *filp, const char __user *ubuf,
5077                    size_t count, loff_t *ppos)
5078 {
5079         return count;
5080 }
5081
5082 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5083 {
5084         int ret;
5085
5086         if (file->f_mode & FMODE_READ)
5087                 ret = seq_lseek(file, offset, whence);
5088         else
5089                 file->f_pos = ret = 0;
5090
5091         return ret;
5092 }
5093
5094 static const struct file_operations tracing_fops = {
5095         .open           = tracing_open,
5096         .read           = seq_read,
5097         .write          = tracing_write_stub,
5098         .llseek         = tracing_lseek,
5099         .release        = tracing_release,
5100 };
5101
5102 static const struct file_operations show_traces_fops = {
5103         .open           = show_traces_open,
5104         .read           = seq_read,
5105         .llseek         = seq_lseek,
5106         .release        = show_traces_release,
5107 };
5108
5109 static ssize_t
5110 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5111                      size_t count, loff_t *ppos)
5112 {
5113         struct trace_array *tr = file_inode(filp)->i_private;
5114         char *mask_str;
5115         int len;
5116
5117         len = snprintf(NULL, 0, "%*pb\n",
5118                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5119         mask_str = kmalloc(len, GFP_KERNEL);
5120         if (!mask_str)
5121                 return -ENOMEM;
5122
5123         len = snprintf(mask_str, len, "%*pb\n",
5124                        cpumask_pr_args(tr->tracing_cpumask));
5125         if (len >= count) {
5126                 count = -EINVAL;
5127                 goto out_err;
5128         }
5129         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5130
5131 out_err:
5132         kfree(mask_str);
5133
5134         return count;
5135 }
5136
5137 int tracing_set_cpumask(struct trace_array *tr,
5138                         cpumask_var_t tracing_cpumask_new)
5139 {
5140         int cpu;
5141
5142         if (!tr)
5143                 return -EINVAL;
5144
5145         local_irq_disable();
5146         arch_spin_lock(&tr->max_lock);
5147         for_each_tracing_cpu(cpu) {
5148                 /*
5149                  * Increase/decrease the disabled counter if we are
5150                  * about to flip a bit in the cpumask:
5151                  */
5152                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5153                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5154                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5155                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5156                 }
5157                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5158                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5159                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5160                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5161                 }
5162         }
5163         arch_spin_unlock(&tr->max_lock);
5164         local_irq_enable();
5165
5166         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5167
5168         return 0;
5169 }
5170
5171 static ssize_t
5172 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5173                       size_t count, loff_t *ppos)
5174 {
5175         struct trace_array *tr = file_inode(filp)->i_private;
5176         cpumask_var_t tracing_cpumask_new;
5177         int err;
5178
5179         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5180                 return -ENOMEM;
5181
5182         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5183         if (err)
5184                 goto err_free;
5185
5186         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5187         if (err)
5188                 goto err_free;
5189
5190         free_cpumask_var(tracing_cpumask_new);
5191
5192         return count;
5193
5194 err_free:
5195         free_cpumask_var(tracing_cpumask_new);
5196
5197         return err;
5198 }
5199
5200 static const struct file_operations tracing_cpumask_fops = {
5201         .open           = tracing_open_generic_tr,
5202         .read           = tracing_cpumask_read,
5203         .write          = tracing_cpumask_write,
5204         .release        = tracing_release_generic_tr,
5205         .llseek         = generic_file_llseek,
5206 };
5207
5208 static int tracing_trace_options_show(struct seq_file *m, void *v)
5209 {
5210         struct tracer_opt *trace_opts;
5211         struct trace_array *tr = m->private;
5212         u32 tracer_flags;
5213         int i;
5214
5215         mutex_lock(&trace_types_lock);
5216         tracer_flags = tr->current_trace->flags->val;
5217         trace_opts = tr->current_trace->flags->opts;
5218
5219         for (i = 0; trace_options[i]; i++) {
5220                 if (tr->trace_flags & (1 << i))
5221                         seq_printf(m, "%s\n", trace_options[i]);
5222                 else
5223                         seq_printf(m, "no%s\n", trace_options[i]);
5224         }
5225
5226         for (i = 0; trace_opts[i].name; i++) {
5227                 if (tracer_flags & trace_opts[i].bit)
5228                         seq_printf(m, "%s\n", trace_opts[i].name);
5229                 else
5230                         seq_printf(m, "no%s\n", trace_opts[i].name);
5231         }
5232         mutex_unlock(&trace_types_lock);
5233
5234         return 0;
5235 }
5236
5237 static int __set_tracer_option(struct trace_array *tr,
5238                                struct tracer_flags *tracer_flags,
5239                                struct tracer_opt *opts, int neg)
5240 {
5241         struct tracer *trace = tracer_flags->trace;
5242         int ret;
5243
5244         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5245         if (ret)
5246                 return ret;
5247
5248         if (neg)
5249                 tracer_flags->val &= ~opts->bit;
5250         else
5251                 tracer_flags->val |= opts->bit;
5252         return 0;
5253 }
5254
5255 /* Try to assign a tracer specific option */
5256 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5257 {
5258         struct tracer *trace = tr->current_trace;
5259         struct tracer_flags *tracer_flags = trace->flags;
5260         struct tracer_opt *opts = NULL;
5261         int i;
5262
5263         for (i = 0; tracer_flags->opts[i].name; i++) {
5264                 opts = &tracer_flags->opts[i];
5265
5266                 if (strcmp(cmp, opts->name) == 0)
5267                         return __set_tracer_option(tr, trace->flags, opts, neg);
5268         }
5269
5270         return -EINVAL;
5271 }
5272
5273 /* Some tracers require overwrite to stay enabled */
5274 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5275 {
5276         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5277                 return -1;
5278
5279         return 0;
5280 }
5281
5282 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5283 {
5284         int *map;
5285
5286         if ((mask == TRACE_ITER_RECORD_TGID) ||
5287             (mask == TRACE_ITER_RECORD_CMD))
5288                 lockdep_assert_held(&event_mutex);
5289
5290         /* do nothing if flag is already set */
5291         if (!!(tr->trace_flags & mask) == !!enabled)
5292                 return 0;
5293
5294         /* Give the tracer a chance to approve the change */
5295         if (tr->current_trace->flag_changed)
5296                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5297                         return -EINVAL;
5298
5299         if (enabled)
5300                 tr->trace_flags |= mask;
5301         else
5302                 tr->trace_flags &= ~mask;
5303
5304         if (mask == TRACE_ITER_RECORD_CMD)
5305                 trace_event_enable_cmd_record(enabled);
5306
5307         if (mask == TRACE_ITER_RECORD_TGID) {
5308                 if (!tgid_map) {
5309                         tgid_map_max = pid_max;
5310                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5311                                        GFP_KERNEL);
5312
5313                         /*
5314                          * Pairs with smp_load_acquire() in
5315                          * trace_find_tgid_ptr() to ensure that if it observes
5316                          * the tgid_map we just allocated then it also observes
5317                          * the corresponding tgid_map_max value.
5318                          */
5319                         smp_store_release(&tgid_map, map);
5320                 }
5321                 if (!tgid_map) {
5322                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5323                         return -ENOMEM;
5324                 }
5325
5326                 trace_event_enable_tgid_record(enabled);
5327         }
5328
5329         if (mask == TRACE_ITER_EVENT_FORK)
5330                 trace_event_follow_fork(tr, enabled);
5331
5332         if (mask == TRACE_ITER_FUNC_FORK)
5333                 ftrace_pid_follow_fork(tr, enabled);
5334
5335         if (mask == TRACE_ITER_OVERWRITE) {
5336                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5337 #ifdef CONFIG_TRACER_MAX_TRACE
5338                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5339 #endif
5340         }
5341
5342         if (mask == TRACE_ITER_PRINTK) {
5343                 trace_printk_start_stop_comm(enabled);
5344                 trace_printk_control(enabled);
5345         }
5346
5347         return 0;
5348 }
5349
5350 int trace_set_options(struct trace_array *tr, char *option)
5351 {
5352         char *cmp;
5353         int neg = 0;
5354         int ret;
5355         size_t orig_len = strlen(option);
5356         int len;
5357
5358         cmp = strstrip(option);
5359
5360         len = str_has_prefix(cmp, "no");
5361         if (len)
5362                 neg = 1;
5363
5364         cmp += len;
5365
5366         mutex_lock(&event_mutex);
5367         mutex_lock(&trace_types_lock);
5368
5369         ret = match_string(trace_options, -1, cmp);
5370         /* If no option could be set, test the specific tracer options */
5371         if (ret < 0)
5372                 ret = set_tracer_option(tr, cmp, neg);
5373         else
5374                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5375
5376         mutex_unlock(&trace_types_lock);
5377         mutex_unlock(&event_mutex);
5378
5379         /*
5380          * If the first trailing whitespace is replaced with '\0' by strstrip,
5381          * turn it back into a space.
5382          */
5383         if (orig_len > strlen(option))
5384                 option[strlen(option)] = ' ';
5385
5386         return ret;
5387 }
5388
5389 static void __init apply_trace_boot_options(void)
5390 {
5391         char *buf = trace_boot_options_buf;
5392         char *option;
5393
5394         while (true) {
5395                 option = strsep(&buf, ",");
5396
5397                 if (!option)
5398                         break;
5399
5400                 if (*option)
5401                         trace_set_options(&global_trace, option);
5402
5403                 /* Put back the comma to allow this to be called again */
5404                 if (buf)
5405                         *(buf - 1) = ',';
5406         }
5407 }
5408
5409 static ssize_t
5410 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5411                         size_t cnt, loff_t *ppos)
5412 {
5413         struct seq_file *m = filp->private_data;
5414         struct trace_array *tr = m->private;
5415         char buf[64];
5416         int ret;
5417
5418         if (cnt >= sizeof(buf))
5419                 return -EINVAL;
5420
5421         if (copy_from_user(buf, ubuf, cnt))
5422                 return -EFAULT;
5423
5424         buf[cnt] = 0;
5425
5426         ret = trace_set_options(tr, buf);
5427         if (ret < 0)
5428                 return ret;
5429
5430         *ppos += cnt;
5431
5432         return cnt;
5433 }
5434
5435 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5436 {
5437         struct trace_array *tr = inode->i_private;
5438         int ret;
5439
5440         ret = tracing_check_open_get_tr(tr);
5441         if (ret)
5442                 return ret;
5443
5444         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5445         if (ret < 0)
5446                 trace_array_put(tr);
5447
5448         return ret;
5449 }
5450
5451 static const struct file_operations tracing_iter_fops = {
5452         .open           = tracing_trace_options_open,
5453         .read           = seq_read,
5454         .llseek         = seq_lseek,
5455         .release        = tracing_single_release_tr,
5456         .write          = tracing_trace_options_write,
5457 };
5458
5459 static const char readme_msg[] =
5460         "tracing mini-HOWTO:\n\n"
5461         "# echo 0 > tracing_on : quick way to disable tracing\n"
5462         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5463         " Important files:\n"
5464         "  trace\t\t\t- The static contents of the buffer\n"
5465         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5466         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5467         "  current_tracer\t- function and latency tracers\n"
5468         "  available_tracers\t- list of configured tracers for current_tracer\n"
5469         "  error_log\t- error log for failed commands (that support it)\n"
5470         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5471         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5472         "  trace_clock\t\t-change the clock used to order events\n"
5473         "       local:   Per cpu clock but may not be synced across CPUs\n"
5474         "      global:   Synced across CPUs but slows tracing down.\n"
5475         "     counter:   Not a clock, but just an increment\n"
5476         "      uptime:   Jiffy counter from time of boot\n"
5477         "        perf:   Same clock that perf events use\n"
5478 #ifdef CONFIG_X86_64
5479         "     x86-tsc:   TSC cycle counter\n"
5480 #endif
5481         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5482         "       delta:   Delta difference against a buffer-wide timestamp\n"
5483         "    absolute:   Absolute (standalone) timestamp\n"
5484         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5485         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5486         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5487         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5488         "\t\t\t  Remove sub-buffer with rmdir\n"
5489         "  trace_options\t\t- Set format or modify how tracing happens\n"
5490         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5491         "\t\t\t  option name\n"
5492         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5493 #ifdef CONFIG_DYNAMIC_FTRACE
5494         "\n  available_filter_functions - list of functions that can be filtered on\n"
5495         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5496         "\t\t\t  functions\n"
5497         "\t     accepts: func_full_name or glob-matching-pattern\n"
5498         "\t     modules: Can select a group via module\n"
5499         "\t      Format: :mod:<module-name>\n"
5500         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5501         "\t    triggers: a command to perform when function is hit\n"
5502         "\t      Format: <function>:<trigger>[:count]\n"
5503         "\t     trigger: traceon, traceoff\n"
5504         "\t\t      enable_event:<system>:<event>\n"
5505         "\t\t      disable_event:<system>:<event>\n"
5506 #ifdef CONFIG_STACKTRACE
5507         "\t\t      stacktrace\n"
5508 #endif
5509 #ifdef CONFIG_TRACER_SNAPSHOT
5510         "\t\t      snapshot\n"
5511 #endif
5512         "\t\t      dump\n"
5513         "\t\t      cpudump\n"
5514         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5515         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5516         "\t     The first one will disable tracing every time do_fault is hit\n"
5517         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5518         "\t       The first time do trap is hit and it disables tracing, the\n"
5519         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5520         "\t       the counter will not decrement. It only decrements when the\n"
5521         "\t       trigger did work\n"
5522         "\t     To remove trigger without count:\n"
5523         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5524         "\t     To remove trigger with a count:\n"
5525         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5526         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5527         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5528         "\t    modules: Can select a group via module command :mod:\n"
5529         "\t    Does not accept triggers\n"
5530 #endif /* CONFIG_DYNAMIC_FTRACE */
5531 #ifdef CONFIG_FUNCTION_TRACER
5532         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5533         "\t\t    (function)\n"
5534         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5535         "\t\t    (function)\n"
5536 #endif
5537 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5538         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5539         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5540         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5541 #endif
5542 #ifdef CONFIG_TRACER_SNAPSHOT
5543         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5544         "\t\t\t  snapshot buffer. Read the contents for more\n"
5545         "\t\t\t  information\n"
5546 #endif
5547 #ifdef CONFIG_STACK_TRACER
5548         "  stack_trace\t\t- Shows the max stack trace when active\n"
5549         "  stack_max_size\t- Shows current max stack size that was traced\n"
5550         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5551         "\t\t\t  new trace)\n"
5552 #ifdef CONFIG_DYNAMIC_FTRACE
5553         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5554         "\t\t\t  traces\n"
5555 #endif
5556 #endif /* CONFIG_STACK_TRACER */
5557 #ifdef CONFIG_DYNAMIC_EVENTS
5558         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5559         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5560 #endif
5561 #ifdef CONFIG_KPROBE_EVENTS
5562         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5563         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5564 #endif
5565 #ifdef CONFIG_UPROBE_EVENTS
5566         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5567         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5568 #endif
5569 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5570         "\t  accepts: event-definitions (one definition per line)\n"
5571         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5572         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5573 #ifdef CONFIG_HIST_TRIGGERS
5574         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5575 #endif
5576         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5577         "\t           -:[<group>/]<event>\n"
5578 #ifdef CONFIG_KPROBE_EVENTS
5579         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5580   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5581 #endif
5582 #ifdef CONFIG_UPROBE_EVENTS
5583   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5584 #endif
5585         "\t     args: <name>=fetcharg[:type]\n"
5586         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5587 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5588         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5589 #else
5590         "\t           $stack<index>, $stack, $retval, $comm,\n"
5591 #endif
5592         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5593         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5594         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5595         "\t           <type>\\[<array-size>\\]\n"
5596 #ifdef CONFIG_HIST_TRIGGERS
5597         "\t    field: <stype> <name>;\n"
5598         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5599         "\t           [unsigned] char/int/long\n"
5600 #endif
5601         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5602         "\t            of the <attached-group>/<attached-event>.\n"
5603 #endif
5604         "  events/\t\t- Directory containing all trace event subsystems:\n"
5605         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5606         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5607         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5608         "\t\t\t  events\n"
5609         "      filter\t\t- If set, only events passing filter are traced\n"
5610         "  events/<system>/<event>/\t- Directory containing control files for\n"
5611         "\t\t\t  <event>:\n"
5612         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5613         "      filter\t\t- If set, only events passing filter are traced\n"
5614         "      trigger\t\t- If set, a command to perform when event is hit\n"
5615         "\t    Format: <trigger>[:count][if <filter>]\n"
5616         "\t   trigger: traceon, traceoff\n"
5617         "\t            enable_event:<system>:<event>\n"
5618         "\t            disable_event:<system>:<event>\n"
5619 #ifdef CONFIG_HIST_TRIGGERS
5620         "\t            enable_hist:<system>:<event>\n"
5621         "\t            disable_hist:<system>:<event>\n"
5622 #endif
5623 #ifdef CONFIG_STACKTRACE
5624         "\t\t    stacktrace\n"
5625 #endif
5626 #ifdef CONFIG_TRACER_SNAPSHOT
5627         "\t\t    snapshot\n"
5628 #endif
5629 #ifdef CONFIG_HIST_TRIGGERS
5630         "\t\t    hist (see below)\n"
5631 #endif
5632         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5633         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5634         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5635         "\t                  events/block/block_unplug/trigger\n"
5636         "\t   The first disables tracing every time block_unplug is hit.\n"
5637         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5638         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5639         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5640         "\t   Like function triggers, the counter is only decremented if it\n"
5641         "\t    enabled or disabled tracing.\n"
5642         "\t   To remove a trigger without a count:\n"
5643         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5644         "\t   To remove a trigger with a count:\n"
5645         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5646         "\t   Filters can be ignored when removing a trigger.\n"
5647 #ifdef CONFIG_HIST_TRIGGERS
5648         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5649         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5650         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5651         "\t            [:values=<field1[,field2,...]>]\n"
5652         "\t            [:sort=<field1[,field2,...]>]\n"
5653         "\t            [:size=#entries]\n"
5654         "\t            [:pause][:continue][:clear]\n"
5655         "\t            [:name=histname1]\n"
5656         "\t            [:<handler>.<action>]\n"
5657         "\t            [if <filter>]\n\n"
5658         "\t    Note, special fields can be used as well:\n"
5659         "\t            common_timestamp - to record current timestamp\n"
5660         "\t            common_cpu - to record the CPU the event happened on\n"
5661         "\n"
5662         "\t    A hist trigger variable can be:\n"
5663         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5664         "\t        - a reference to another variable e.g. y=$x,\n"
5665         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5666         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5667         "\n"
5668         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5669         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5670         "\t    variable reference, field or numeric literal.\n"
5671         "\n"
5672         "\t    When a matching event is hit, an entry is added to a hash\n"
5673         "\t    table using the key(s) and value(s) named, and the value of a\n"
5674         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5675         "\t    correspond to fields in the event's format description.  Keys\n"
5676         "\t    can be any field, or the special string 'stacktrace'.\n"
5677         "\t    Compound keys consisting of up to two fields can be specified\n"
5678         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5679         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5680         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5681         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5682         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5683         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5684         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5685         "\t    its histogram data will be shared with other triggers of the\n"
5686         "\t    same name, and trigger hits will update this common data.\n\n"
5687         "\t    Reading the 'hist' file for the event will dump the hash\n"
5688         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5689         "\t    triggers attached to an event, there will be a table for each\n"
5690         "\t    trigger in the output.  The table displayed for a named\n"
5691         "\t    trigger will be the same as any other instance having the\n"
5692         "\t    same name.  The default format used to display a given field\n"
5693         "\t    can be modified by appending any of the following modifiers\n"
5694         "\t    to the field name, as applicable:\n\n"
5695         "\t            .hex        display a number as a hex value\n"
5696         "\t            .sym        display an address as a symbol\n"
5697         "\t            .sym-offset display an address as a symbol and offset\n"
5698         "\t            .execname   display a common_pid as a program name\n"
5699         "\t            .syscall    display a syscall id as a syscall name\n"
5700         "\t            .log2       display log2 value rather than raw number\n"
5701         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5702         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5703         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5704         "\t    trigger or to start a hist trigger but not log any events\n"
5705         "\t    until told to do so.  'continue' can be used to start or\n"
5706         "\t    restart a paused hist trigger.\n\n"
5707         "\t    The 'clear' parameter will clear the contents of a running\n"
5708         "\t    hist trigger and leave its current paused/active state\n"
5709         "\t    unchanged.\n\n"
5710         "\t    The enable_hist and disable_hist triggers can be used to\n"
5711         "\t    have one event conditionally start and stop another event's\n"
5712         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5713         "\t    the enable_event and disable_event triggers.\n\n"
5714         "\t    Hist trigger handlers and actions are executed whenever a\n"
5715         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5716         "\t        <handler>.<action>\n\n"
5717         "\t    The available handlers are:\n\n"
5718         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5719         "\t        onmax(var)               - invoke if var exceeds current max\n"
5720         "\t        onchange(var)            - invoke action if var changes\n\n"
5721         "\t    The available actions are:\n\n"
5722         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5723         "\t        save(field,...)                      - save current event fields\n"
5724 #ifdef CONFIG_TRACER_SNAPSHOT
5725         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5726 #endif
5727 #ifdef CONFIG_SYNTH_EVENTS
5728         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5729         "\t  Write into this file to define/undefine new synthetic events.\n"
5730         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5731 #endif
5732 #endif
5733 ;
5734
5735 static ssize_t
5736 tracing_readme_read(struct file *filp, char __user *ubuf,
5737                        size_t cnt, loff_t *ppos)
5738 {
5739         return simple_read_from_buffer(ubuf, cnt, ppos,
5740                                         readme_msg, strlen(readme_msg));
5741 }
5742
5743 static const struct file_operations tracing_readme_fops = {
5744         .open           = tracing_open_generic,
5745         .read           = tracing_readme_read,
5746         .llseek         = generic_file_llseek,
5747 };
5748
5749 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5750 {
5751         int pid = ++(*pos);
5752
5753         return trace_find_tgid_ptr(pid);
5754 }
5755
5756 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5757 {
5758         int pid = *pos;
5759
5760         return trace_find_tgid_ptr(pid);
5761 }
5762
5763 static void saved_tgids_stop(struct seq_file *m, void *v)
5764 {
5765 }
5766
5767 static int saved_tgids_show(struct seq_file *m, void *v)
5768 {
5769         int *entry = (int *)v;
5770         int pid = entry - tgid_map;
5771         int tgid = *entry;
5772
5773         if (tgid == 0)
5774                 return SEQ_SKIP;
5775
5776         seq_printf(m, "%d %d\n", pid, tgid);
5777         return 0;
5778 }
5779
5780 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5781         .start          = saved_tgids_start,
5782         .stop           = saved_tgids_stop,
5783         .next           = saved_tgids_next,
5784         .show           = saved_tgids_show,
5785 };
5786
5787 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5788 {
5789         int ret;
5790
5791         ret = tracing_check_open_get_tr(NULL);
5792         if (ret)
5793                 return ret;
5794
5795         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5796 }
5797
5798
5799 static const struct file_operations tracing_saved_tgids_fops = {
5800         .open           = tracing_saved_tgids_open,
5801         .read           = seq_read,
5802         .llseek         = seq_lseek,
5803         .release        = seq_release,
5804 };
5805
5806 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5807 {
5808         unsigned int *ptr = v;
5809
5810         if (*pos || m->count)
5811                 ptr++;
5812
5813         (*pos)++;
5814
5815         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5816              ptr++) {
5817                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5818                         continue;
5819
5820                 return ptr;
5821         }
5822
5823         return NULL;
5824 }
5825
5826 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5827 {
5828         void *v;
5829         loff_t l = 0;
5830
5831         preempt_disable();
5832         arch_spin_lock(&trace_cmdline_lock);
5833
5834         v = &savedcmd->map_cmdline_to_pid[0];
5835         while (l <= *pos) {
5836                 v = saved_cmdlines_next(m, v, &l);
5837                 if (!v)
5838                         return NULL;
5839         }
5840
5841         return v;
5842 }
5843
5844 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5845 {
5846         arch_spin_unlock(&trace_cmdline_lock);
5847         preempt_enable();
5848 }
5849
5850 static int saved_cmdlines_show(struct seq_file *m, void *v)
5851 {
5852         char buf[TASK_COMM_LEN];
5853         unsigned int *pid = v;
5854
5855         __trace_find_cmdline(*pid, buf);
5856         seq_printf(m, "%d %s\n", *pid, buf);
5857         return 0;
5858 }
5859
5860 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5861         .start          = saved_cmdlines_start,
5862         .next           = saved_cmdlines_next,
5863         .stop           = saved_cmdlines_stop,
5864         .show           = saved_cmdlines_show,
5865 };
5866
5867 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5868 {
5869         int ret;
5870
5871         ret = tracing_check_open_get_tr(NULL);
5872         if (ret)
5873                 return ret;
5874
5875         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5876 }
5877
5878 static const struct file_operations tracing_saved_cmdlines_fops = {
5879         .open           = tracing_saved_cmdlines_open,
5880         .read           = seq_read,
5881         .llseek         = seq_lseek,
5882         .release        = seq_release,
5883 };
5884
5885 static ssize_t
5886 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5887                                  size_t cnt, loff_t *ppos)
5888 {
5889         char buf[64];
5890         int r;
5891
5892         arch_spin_lock(&trace_cmdline_lock);
5893         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5894         arch_spin_unlock(&trace_cmdline_lock);
5895
5896         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5897 }
5898
5899 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5900 {
5901         kfree(s->saved_cmdlines);
5902         kfree(s->map_cmdline_to_pid);
5903         kfree(s);
5904 }
5905
5906 static int tracing_resize_saved_cmdlines(unsigned int val)
5907 {
5908         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5909
5910         s = kmalloc(sizeof(*s), GFP_KERNEL);
5911         if (!s)
5912                 return -ENOMEM;
5913
5914         if (allocate_cmdlines_buffer(val, s) < 0) {
5915                 kfree(s);
5916                 return -ENOMEM;
5917         }
5918
5919         arch_spin_lock(&trace_cmdline_lock);
5920         savedcmd_temp = savedcmd;
5921         savedcmd = s;
5922         arch_spin_unlock(&trace_cmdline_lock);
5923         free_saved_cmdlines_buffer(savedcmd_temp);
5924
5925         return 0;
5926 }
5927
5928 static ssize_t
5929 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5930                                   size_t cnt, loff_t *ppos)
5931 {
5932         unsigned long val;
5933         int ret;
5934
5935         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5936         if (ret)
5937                 return ret;
5938
5939         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5940         if (!val || val > PID_MAX_DEFAULT)
5941                 return -EINVAL;
5942
5943         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5944         if (ret < 0)
5945                 return ret;
5946
5947         *ppos += cnt;
5948
5949         return cnt;
5950 }
5951
5952 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5953         .open           = tracing_open_generic,
5954         .read           = tracing_saved_cmdlines_size_read,
5955         .write          = tracing_saved_cmdlines_size_write,
5956 };
5957
5958 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5959 static union trace_eval_map_item *
5960 update_eval_map(union trace_eval_map_item *ptr)
5961 {
5962         if (!ptr->map.eval_string) {
5963                 if (ptr->tail.next) {
5964                         ptr = ptr->tail.next;
5965                         /* Set ptr to the next real item (skip head) */
5966                         ptr++;
5967                 } else
5968                         return NULL;
5969         }
5970         return ptr;
5971 }
5972
5973 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5974 {
5975         union trace_eval_map_item *ptr = v;
5976
5977         /*
5978          * Paranoid! If ptr points to end, we don't want to increment past it.
5979          * This really should never happen.
5980          */
5981         (*pos)++;
5982         ptr = update_eval_map(ptr);
5983         if (WARN_ON_ONCE(!ptr))
5984                 return NULL;
5985
5986         ptr++;
5987         ptr = update_eval_map(ptr);
5988
5989         return ptr;
5990 }
5991
5992 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5993 {
5994         union trace_eval_map_item *v;
5995         loff_t l = 0;
5996
5997         mutex_lock(&trace_eval_mutex);
5998
5999         v = trace_eval_maps;
6000         if (v)
6001                 v++;
6002
6003         while (v && l < *pos) {
6004                 v = eval_map_next(m, v, &l);
6005         }
6006
6007         return v;
6008 }
6009
6010 static void eval_map_stop(struct seq_file *m, void *v)
6011 {
6012         mutex_unlock(&trace_eval_mutex);
6013 }
6014
6015 static int eval_map_show(struct seq_file *m, void *v)
6016 {
6017         union trace_eval_map_item *ptr = v;
6018
6019         seq_printf(m, "%s %ld (%s)\n",
6020                    ptr->map.eval_string, ptr->map.eval_value,
6021                    ptr->map.system);
6022
6023         return 0;
6024 }
6025
6026 static const struct seq_operations tracing_eval_map_seq_ops = {
6027         .start          = eval_map_start,
6028         .next           = eval_map_next,
6029         .stop           = eval_map_stop,
6030         .show           = eval_map_show,
6031 };
6032
6033 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6034 {
6035         int ret;
6036
6037         ret = tracing_check_open_get_tr(NULL);
6038         if (ret)
6039                 return ret;
6040
6041         return seq_open(filp, &tracing_eval_map_seq_ops);
6042 }
6043
6044 static const struct file_operations tracing_eval_map_fops = {
6045         .open           = tracing_eval_map_open,
6046         .read           = seq_read,
6047         .llseek         = seq_lseek,
6048         .release        = seq_release,
6049 };
6050
6051 static inline union trace_eval_map_item *
6052 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6053 {
6054         /* Return tail of array given the head */
6055         return ptr + ptr->head.length + 1;
6056 }
6057
6058 static void
6059 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6060                            int len)
6061 {
6062         struct trace_eval_map **stop;
6063         struct trace_eval_map **map;
6064         union trace_eval_map_item *map_array;
6065         union trace_eval_map_item *ptr;
6066
6067         stop = start + len;
6068
6069         /*
6070          * The trace_eval_maps contains the map plus a head and tail item,
6071          * where the head holds the module and length of array, and the
6072          * tail holds a pointer to the next list.
6073          */
6074         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6075         if (!map_array) {
6076                 pr_warn("Unable to allocate trace eval mapping\n");
6077                 return;
6078         }
6079
6080         mutex_lock(&trace_eval_mutex);
6081
6082         if (!trace_eval_maps)
6083                 trace_eval_maps = map_array;
6084         else {
6085                 ptr = trace_eval_maps;
6086                 for (;;) {
6087                         ptr = trace_eval_jmp_to_tail(ptr);
6088                         if (!ptr->tail.next)
6089                                 break;
6090                         ptr = ptr->tail.next;
6091
6092                 }
6093                 ptr->tail.next = map_array;
6094         }
6095         map_array->head.mod = mod;
6096         map_array->head.length = len;
6097         map_array++;
6098
6099         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6100                 map_array->map = **map;
6101                 map_array++;
6102         }
6103         memset(map_array, 0, sizeof(*map_array));
6104
6105         mutex_unlock(&trace_eval_mutex);
6106 }
6107
6108 static void trace_create_eval_file(struct dentry *d_tracer)
6109 {
6110         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6111                           NULL, &tracing_eval_map_fops);
6112 }
6113
6114 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6115 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6116 static inline void trace_insert_eval_map_file(struct module *mod,
6117                               struct trace_eval_map **start, int len) { }
6118 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6119
6120 static void trace_insert_eval_map(struct module *mod,
6121                                   struct trace_eval_map **start, int len)
6122 {
6123         struct trace_eval_map **map;
6124
6125         if (len <= 0)
6126                 return;
6127
6128         map = start;
6129
6130         trace_event_eval_update(map, len);
6131
6132         trace_insert_eval_map_file(mod, start, len);
6133 }
6134
6135 static ssize_t
6136 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6137                        size_t cnt, loff_t *ppos)
6138 {
6139         struct trace_array *tr = filp->private_data;
6140         char buf[MAX_TRACER_SIZE+2];
6141         int r;
6142
6143         mutex_lock(&trace_types_lock);
6144         r = sprintf(buf, "%s\n", tr->current_trace->name);
6145         mutex_unlock(&trace_types_lock);
6146
6147         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6148 }
6149
6150 int tracer_init(struct tracer *t, struct trace_array *tr)
6151 {
6152         tracing_reset_online_cpus(&tr->array_buffer);
6153         return t->init(tr);
6154 }
6155
6156 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6157 {
6158         int cpu;
6159
6160         for_each_tracing_cpu(cpu)
6161                 per_cpu_ptr(buf->data, cpu)->entries = val;
6162 }
6163
6164 #ifdef CONFIG_TRACER_MAX_TRACE
6165 /* resize @tr's buffer to the size of @size_tr's entries */
6166 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6167                                         struct array_buffer *size_buf, int cpu_id)
6168 {
6169         int cpu, ret = 0;
6170
6171         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6172                 for_each_tracing_cpu(cpu) {
6173                         ret = ring_buffer_resize(trace_buf->buffer,
6174                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6175                         if (ret < 0)
6176                                 break;
6177                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6178                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6179                 }
6180         } else {
6181                 ret = ring_buffer_resize(trace_buf->buffer,
6182                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6183                 if (ret == 0)
6184                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6185                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6186         }
6187
6188         return ret;
6189 }
6190 #endif /* CONFIG_TRACER_MAX_TRACE */
6191
6192 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6193                                         unsigned long size, int cpu)
6194 {
6195         int ret;
6196
6197         /*
6198          * If kernel or user changes the size of the ring buffer
6199          * we use the size that was given, and we can forget about
6200          * expanding it later.
6201          */
6202         ring_buffer_expanded = true;
6203
6204         /* May be called before buffers are initialized */
6205         if (!tr->array_buffer.buffer)
6206                 return 0;
6207
6208         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6209         if (ret < 0)
6210                 return ret;
6211
6212 #ifdef CONFIG_TRACER_MAX_TRACE
6213         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6214             !tr->current_trace->use_max_tr)
6215                 goto out;
6216
6217         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6218         if (ret < 0) {
6219                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6220                                                      &tr->array_buffer, cpu);
6221                 if (r < 0) {
6222                         /*
6223                          * AARGH! We are left with different
6224                          * size max buffer!!!!
6225                          * The max buffer is our "snapshot" buffer.
6226                          * When a tracer needs a snapshot (one of the
6227                          * latency tracers), it swaps the max buffer
6228                          * with the saved snap shot. We succeeded to
6229                          * update the size of the main buffer, but failed to
6230                          * update the size of the max buffer. But when we tried
6231                          * to reset the main buffer to the original size, we
6232                          * failed there too. This is very unlikely to
6233                          * happen, but if it does, warn and kill all
6234                          * tracing.
6235                          */
6236                         WARN_ON(1);
6237                         tracing_disabled = 1;
6238                 }
6239                 return ret;
6240         }
6241
6242         if (cpu == RING_BUFFER_ALL_CPUS)
6243                 set_buffer_entries(&tr->max_buffer, size);
6244         else
6245                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6246
6247  out:
6248 #endif /* CONFIG_TRACER_MAX_TRACE */
6249
6250         if (cpu == RING_BUFFER_ALL_CPUS)
6251                 set_buffer_entries(&tr->array_buffer, size);
6252         else
6253                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6254
6255         return ret;
6256 }
6257
6258 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6259                                   unsigned long size, int cpu_id)
6260 {
6261         int ret;
6262
6263         mutex_lock(&trace_types_lock);
6264
6265         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6266                 /* make sure, this cpu is enabled in the mask */
6267                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6268                         ret = -EINVAL;
6269                         goto out;
6270                 }
6271         }
6272
6273         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6274         if (ret < 0)
6275                 ret = -ENOMEM;
6276
6277 out:
6278         mutex_unlock(&trace_types_lock);
6279
6280         return ret;
6281 }
6282
6283
6284 /**
6285  * tracing_update_buffers - used by tracing facility to expand ring buffers
6286  *
6287  * To save on memory when the tracing is never used on a system with it
6288  * configured in. The ring buffers are set to a minimum size. But once
6289  * a user starts to use the tracing facility, then they need to grow
6290  * to their default size.
6291  *
6292  * This function is to be called when a tracer is about to be used.
6293  */
6294 int tracing_update_buffers(void)
6295 {
6296         int ret = 0;
6297
6298         mutex_lock(&trace_types_lock);
6299         if (!ring_buffer_expanded)
6300                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6301                                                 RING_BUFFER_ALL_CPUS);
6302         mutex_unlock(&trace_types_lock);
6303
6304         return ret;
6305 }
6306
6307 struct trace_option_dentry;
6308
6309 static void
6310 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6311
6312 /*
6313  * Used to clear out the tracer before deletion of an instance.
6314  * Must have trace_types_lock held.
6315  */
6316 static void tracing_set_nop(struct trace_array *tr)
6317 {
6318         if (tr->current_trace == &nop_trace)
6319                 return;
6320         
6321         tr->current_trace->enabled--;
6322
6323         if (tr->current_trace->reset)
6324                 tr->current_trace->reset(tr);
6325
6326         tr->current_trace = &nop_trace;
6327 }
6328
6329 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6330 {
6331         /* Only enable if the directory has been created already. */
6332         if (!tr->dir)
6333                 return;
6334
6335         create_trace_option_files(tr, t);
6336 }
6337
6338 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6339 {
6340         struct tracer *t;
6341 #ifdef CONFIG_TRACER_MAX_TRACE
6342         bool had_max_tr;
6343 #endif
6344         int ret = 0;
6345
6346         mutex_lock(&trace_types_lock);
6347
6348         if (!ring_buffer_expanded) {
6349                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6350                                                 RING_BUFFER_ALL_CPUS);
6351                 if (ret < 0)
6352                         goto out;
6353                 ret = 0;
6354         }
6355
6356         for (t = trace_types; t; t = t->next) {
6357                 if (strcmp(t->name, buf) == 0)
6358                         break;
6359         }
6360         if (!t) {
6361                 ret = -EINVAL;
6362                 goto out;
6363         }
6364         if (t == tr->current_trace)
6365                 goto out;
6366
6367 #ifdef CONFIG_TRACER_SNAPSHOT
6368         if (t->use_max_tr) {
6369                 arch_spin_lock(&tr->max_lock);
6370                 if (tr->cond_snapshot)
6371                         ret = -EBUSY;
6372                 arch_spin_unlock(&tr->max_lock);
6373                 if (ret)
6374                         goto out;
6375         }
6376 #endif
6377         /* Some tracers won't work on kernel command line */
6378         if (system_state < SYSTEM_RUNNING && t->noboot) {
6379                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6380                         t->name);
6381                 goto out;
6382         }
6383
6384         /* Some tracers are only allowed for the top level buffer */
6385         if (!trace_ok_for_array(t, tr)) {
6386                 ret = -EINVAL;
6387                 goto out;
6388         }
6389
6390         /* If trace pipe files are being read, we can't change the tracer */
6391         if (tr->trace_ref) {
6392                 ret = -EBUSY;
6393                 goto out;
6394         }
6395
6396         trace_branch_disable();
6397
6398         tr->current_trace->enabled--;
6399
6400         if (tr->current_trace->reset)
6401                 tr->current_trace->reset(tr);
6402
6403         /* Current trace needs to be nop_trace before synchronize_rcu */
6404         tr->current_trace = &nop_trace;
6405
6406 #ifdef CONFIG_TRACER_MAX_TRACE
6407         had_max_tr = tr->allocated_snapshot;
6408
6409         if (had_max_tr && !t->use_max_tr) {
6410                 /*
6411                  * We need to make sure that the update_max_tr sees that
6412                  * current_trace changed to nop_trace to keep it from
6413                  * swapping the buffers after we resize it.
6414                  * The update_max_tr is called from interrupts disabled
6415                  * so a synchronized_sched() is sufficient.
6416                  */
6417                 synchronize_rcu();
6418                 free_snapshot(tr);
6419         }
6420 #endif
6421
6422 #ifdef CONFIG_TRACER_MAX_TRACE
6423         if (t->use_max_tr && !had_max_tr) {
6424                 ret = tracing_alloc_snapshot_instance(tr);
6425                 if (ret < 0)
6426                         goto out;
6427         }
6428 #endif
6429
6430         if (t->init) {
6431                 ret = tracer_init(t, tr);
6432                 if (ret)
6433                         goto out;
6434         }
6435
6436         tr->current_trace = t;
6437         tr->current_trace->enabled++;
6438         trace_branch_enable(tr);
6439  out:
6440         mutex_unlock(&trace_types_lock);
6441
6442         return ret;
6443 }
6444
6445 static ssize_t
6446 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6447                         size_t cnt, loff_t *ppos)
6448 {
6449         struct trace_array *tr = filp->private_data;
6450         char buf[MAX_TRACER_SIZE+1];
6451         int i;
6452         size_t ret;
6453         int err;
6454
6455         ret = cnt;
6456
6457         if (cnt > MAX_TRACER_SIZE)
6458                 cnt = MAX_TRACER_SIZE;
6459
6460         if (copy_from_user(buf, ubuf, cnt))
6461                 return -EFAULT;
6462
6463         buf[cnt] = 0;
6464
6465         /* strip ending whitespace. */
6466         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6467                 buf[i] = 0;
6468
6469         err = tracing_set_tracer(tr, buf);
6470         if (err)
6471                 return err;
6472
6473         *ppos += ret;
6474
6475         return ret;
6476 }
6477
6478 static ssize_t
6479 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6480                    size_t cnt, loff_t *ppos)
6481 {
6482         char buf[64];
6483         int r;
6484
6485         r = snprintf(buf, sizeof(buf), "%ld\n",
6486                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6487         if (r > sizeof(buf))
6488                 r = sizeof(buf);
6489         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6490 }
6491
6492 static ssize_t
6493 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6494                     size_t cnt, loff_t *ppos)
6495 {
6496         unsigned long val;
6497         int ret;
6498
6499         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6500         if (ret)
6501                 return ret;
6502
6503         *ptr = val * 1000;
6504
6505         return cnt;
6506 }
6507
6508 static ssize_t
6509 tracing_thresh_read(struct file *filp, char __user *ubuf,
6510                     size_t cnt, loff_t *ppos)
6511 {
6512         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6513 }
6514
6515 static ssize_t
6516 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6517                      size_t cnt, loff_t *ppos)
6518 {
6519         struct trace_array *tr = filp->private_data;
6520         int ret;
6521
6522         mutex_lock(&trace_types_lock);
6523         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6524         if (ret < 0)
6525                 goto out;
6526
6527         if (tr->current_trace->update_thresh) {
6528                 ret = tr->current_trace->update_thresh(tr);
6529                 if (ret < 0)
6530                         goto out;
6531         }
6532
6533         ret = cnt;
6534 out:
6535         mutex_unlock(&trace_types_lock);
6536
6537         return ret;
6538 }
6539
6540 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6541
6542 static ssize_t
6543 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6544                      size_t cnt, loff_t *ppos)
6545 {
6546         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6547 }
6548
6549 static ssize_t
6550 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6551                       size_t cnt, loff_t *ppos)
6552 {
6553         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6554 }
6555
6556 #endif
6557
6558 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6559 {
6560         struct trace_array *tr = inode->i_private;
6561         struct trace_iterator *iter;
6562         int ret;
6563
6564         ret = tracing_check_open_get_tr(tr);
6565         if (ret)
6566                 return ret;
6567
6568         mutex_lock(&trace_types_lock);
6569
6570         /* create a buffer to store the information to pass to userspace */
6571         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6572         if (!iter) {
6573                 ret = -ENOMEM;
6574                 __trace_array_put(tr);
6575                 goto out;
6576         }
6577
6578         trace_seq_init(&iter->seq);
6579         iter->trace = tr->current_trace;
6580
6581         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6582                 ret = -ENOMEM;
6583                 goto fail;
6584         }
6585
6586         /* trace pipe does not show start of buffer */
6587         cpumask_setall(iter->started);
6588
6589         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6590                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6591
6592         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6593         if (trace_clocks[tr->clock_id].in_ns)
6594                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6595
6596         iter->tr = tr;
6597         iter->array_buffer = &tr->array_buffer;
6598         iter->cpu_file = tracing_get_cpu(inode);
6599         mutex_init(&iter->mutex);
6600         filp->private_data = iter;
6601
6602         if (iter->trace->pipe_open)
6603                 iter->trace->pipe_open(iter);
6604
6605         nonseekable_open(inode, filp);
6606
6607         tr->trace_ref++;
6608 out:
6609         mutex_unlock(&trace_types_lock);
6610         return ret;
6611
6612 fail:
6613         kfree(iter);
6614         __trace_array_put(tr);
6615         mutex_unlock(&trace_types_lock);
6616         return ret;
6617 }
6618
6619 static int tracing_release_pipe(struct inode *inode, struct file *file)
6620 {
6621         struct trace_iterator *iter = file->private_data;
6622         struct trace_array *tr = inode->i_private;
6623
6624         mutex_lock(&trace_types_lock);
6625
6626         tr->trace_ref--;
6627
6628         if (iter->trace->pipe_close)
6629                 iter->trace->pipe_close(iter);
6630
6631         mutex_unlock(&trace_types_lock);
6632
6633         free_cpumask_var(iter->started);
6634         mutex_destroy(&iter->mutex);
6635         kfree(iter);
6636
6637         trace_array_put(tr);
6638
6639         return 0;
6640 }
6641
6642 static __poll_t
6643 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6644 {
6645         struct trace_array *tr = iter->tr;
6646
6647         /* Iterators are static, they should be filled or empty */
6648         if (trace_buffer_iter(iter, iter->cpu_file))
6649                 return EPOLLIN | EPOLLRDNORM;
6650
6651         if (tr->trace_flags & TRACE_ITER_BLOCK)
6652                 /*
6653                  * Always select as readable when in blocking mode
6654                  */
6655                 return EPOLLIN | EPOLLRDNORM;
6656         else
6657                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6658                                              filp, poll_table);
6659 }
6660
6661 static __poll_t
6662 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6663 {
6664         struct trace_iterator *iter = filp->private_data;
6665
6666         return trace_poll(iter, filp, poll_table);
6667 }
6668
6669 /* Must be called with iter->mutex held. */
6670 static int tracing_wait_pipe(struct file *filp)
6671 {
6672         struct trace_iterator *iter = filp->private_data;
6673         int ret;
6674
6675         while (trace_empty(iter)) {
6676
6677                 if ((filp->f_flags & O_NONBLOCK)) {
6678                         return -EAGAIN;
6679                 }
6680
6681                 /*
6682                  * We block until we read something and tracing is disabled.
6683                  * We still block if tracing is disabled, but we have never
6684                  * read anything. This allows a user to cat this file, and
6685                  * then enable tracing. But after we have read something,
6686                  * we give an EOF when tracing is again disabled.
6687                  *
6688                  * iter->pos will be 0 if we haven't read anything.
6689                  */
6690                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6691                         break;
6692
6693                 mutex_unlock(&iter->mutex);
6694
6695                 ret = wait_on_pipe(iter, 0);
6696
6697                 mutex_lock(&iter->mutex);
6698
6699                 if (ret)
6700                         return ret;
6701         }
6702
6703         return 1;
6704 }
6705
6706 /*
6707  * Consumer reader.
6708  */
6709 static ssize_t
6710 tracing_read_pipe(struct file *filp, char __user *ubuf,
6711                   size_t cnt, loff_t *ppos)
6712 {
6713         struct trace_iterator *iter = filp->private_data;
6714         ssize_t sret;
6715
6716         /*
6717          * Avoid more than one consumer on a single file descriptor
6718          * This is just a matter of traces coherency, the ring buffer itself
6719          * is protected.
6720          */
6721         mutex_lock(&iter->mutex);
6722
6723         /* return any leftover data */
6724         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6725         if (sret != -EBUSY)
6726                 goto out;
6727
6728         trace_seq_init(&iter->seq);
6729
6730         if (iter->trace->read) {
6731                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6732                 if (sret)
6733                         goto out;
6734         }
6735
6736 waitagain:
6737         sret = tracing_wait_pipe(filp);
6738         if (sret <= 0)
6739                 goto out;
6740
6741         /* stop when tracing is finished */
6742         if (trace_empty(iter)) {
6743                 sret = 0;
6744                 goto out;
6745         }
6746
6747         if (cnt >= PAGE_SIZE)
6748                 cnt = PAGE_SIZE - 1;
6749
6750         /* reset all but tr, trace, and overruns */
6751         trace_iterator_reset(iter);
6752         cpumask_clear(iter->started);
6753         trace_seq_init(&iter->seq);
6754
6755         trace_event_read_lock();
6756         trace_access_lock(iter->cpu_file);
6757         while (trace_find_next_entry_inc(iter) != NULL) {
6758                 enum print_line_t ret;
6759                 int save_len = iter->seq.seq.len;
6760
6761                 ret = print_trace_line(iter);
6762                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6763                         /* don't print partial lines */
6764                         iter->seq.seq.len = save_len;
6765                         break;
6766                 }
6767                 if (ret != TRACE_TYPE_NO_CONSUME)
6768                         trace_consume(iter);
6769
6770                 if (trace_seq_used(&iter->seq) >= cnt)
6771                         break;
6772
6773                 /*
6774                  * Setting the full flag means we reached the trace_seq buffer
6775                  * size and we should leave by partial output condition above.
6776                  * One of the trace_seq_* functions is not used properly.
6777                  */
6778                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6779                           iter->ent->type);
6780         }
6781         trace_access_unlock(iter->cpu_file);
6782         trace_event_read_unlock();
6783
6784         /* Now copy what we have to the user */
6785         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6786         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6787                 trace_seq_init(&iter->seq);
6788
6789         /*
6790          * If there was nothing to send to user, in spite of consuming trace
6791          * entries, go back to wait for more entries.
6792          */
6793         if (sret == -EBUSY)
6794                 goto waitagain;
6795
6796 out:
6797         mutex_unlock(&iter->mutex);
6798
6799         return sret;
6800 }
6801
6802 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6803                                      unsigned int idx)
6804 {
6805         __free_page(spd->pages[idx]);
6806 }
6807
6808 static size_t
6809 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6810 {
6811         size_t count;
6812         int save_len;
6813         int ret;
6814
6815         /* Seq buffer is page-sized, exactly what we need. */
6816         for (;;) {
6817                 save_len = iter->seq.seq.len;
6818                 ret = print_trace_line(iter);
6819
6820                 if (trace_seq_has_overflowed(&iter->seq)) {
6821                         iter->seq.seq.len = save_len;
6822                         break;
6823                 }
6824
6825                 /*
6826                  * This should not be hit, because it should only
6827                  * be set if the iter->seq overflowed. But check it
6828                  * anyway to be safe.
6829                  */
6830                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6831                         iter->seq.seq.len = save_len;
6832                         break;
6833                 }
6834
6835                 count = trace_seq_used(&iter->seq) - save_len;
6836                 if (rem < count) {
6837                         rem = 0;
6838                         iter->seq.seq.len = save_len;
6839                         break;
6840                 }
6841
6842                 if (ret != TRACE_TYPE_NO_CONSUME)
6843                         trace_consume(iter);
6844                 rem -= count;
6845                 if (!trace_find_next_entry_inc(iter))   {
6846                         rem = 0;
6847                         iter->ent = NULL;
6848                         break;
6849                 }
6850         }
6851
6852         return rem;
6853 }
6854
6855 static ssize_t tracing_splice_read_pipe(struct file *filp,
6856                                         loff_t *ppos,
6857                                         struct pipe_inode_info *pipe,
6858                                         size_t len,
6859                                         unsigned int flags)
6860 {
6861         struct page *pages_def[PIPE_DEF_BUFFERS];
6862         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6863         struct trace_iterator *iter = filp->private_data;
6864         struct splice_pipe_desc spd = {
6865                 .pages          = pages_def,
6866                 .partial        = partial_def,
6867                 .nr_pages       = 0, /* This gets updated below. */
6868                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6869                 .ops            = &default_pipe_buf_ops,
6870                 .spd_release    = tracing_spd_release_pipe,
6871         };
6872         ssize_t ret;
6873         size_t rem;
6874         unsigned int i;
6875
6876         if (splice_grow_spd(pipe, &spd))
6877                 return -ENOMEM;
6878
6879         mutex_lock(&iter->mutex);
6880
6881         if (iter->trace->splice_read) {
6882                 ret = iter->trace->splice_read(iter, filp,
6883                                                ppos, pipe, len, flags);
6884                 if (ret)
6885                         goto out_err;
6886         }
6887
6888         ret = tracing_wait_pipe(filp);
6889         if (ret <= 0)
6890                 goto out_err;
6891
6892         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6893                 ret = -EFAULT;
6894                 goto out_err;
6895         }
6896
6897         trace_event_read_lock();
6898         trace_access_lock(iter->cpu_file);
6899
6900         /* Fill as many pages as possible. */
6901         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6902                 spd.pages[i] = alloc_page(GFP_KERNEL);
6903                 if (!spd.pages[i])
6904                         break;
6905
6906                 rem = tracing_fill_pipe_page(rem, iter);
6907
6908                 /* Copy the data into the page, so we can start over. */
6909                 ret = trace_seq_to_buffer(&iter->seq,
6910                                           page_address(spd.pages[i]),
6911                                           trace_seq_used(&iter->seq));
6912                 if (ret < 0) {
6913                         __free_page(spd.pages[i]);
6914                         break;
6915                 }
6916                 spd.partial[i].offset = 0;
6917                 spd.partial[i].len = trace_seq_used(&iter->seq);
6918
6919                 trace_seq_init(&iter->seq);
6920         }
6921
6922         trace_access_unlock(iter->cpu_file);
6923         trace_event_read_unlock();
6924         mutex_unlock(&iter->mutex);
6925
6926         spd.nr_pages = i;
6927
6928         if (i)
6929                 ret = splice_to_pipe(pipe, &spd);
6930         else
6931                 ret = 0;
6932 out:
6933         splice_shrink_spd(&spd);
6934         return ret;
6935
6936 out_err:
6937         mutex_unlock(&iter->mutex);
6938         goto out;
6939 }
6940
6941 static ssize_t
6942 tracing_entries_read(struct file *filp, char __user *ubuf,
6943                      size_t cnt, loff_t *ppos)
6944 {
6945         struct inode *inode = file_inode(filp);
6946         struct trace_array *tr = inode->i_private;
6947         int cpu = tracing_get_cpu(inode);
6948         char buf[64];
6949         int r = 0;
6950         ssize_t ret;
6951
6952         mutex_lock(&trace_types_lock);
6953
6954         if (cpu == RING_BUFFER_ALL_CPUS) {
6955                 int cpu, buf_size_same;
6956                 unsigned long size;
6957
6958                 size = 0;
6959                 buf_size_same = 1;
6960                 /* check if all cpu sizes are same */
6961                 for_each_tracing_cpu(cpu) {
6962                         /* fill in the size from first enabled cpu */
6963                         if (size == 0)
6964                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6965                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6966                                 buf_size_same = 0;
6967                                 break;
6968                         }
6969                 }
6970
6971                 if (buf_size_same) {
6972                         if (!ring_buffer_expanded)
6973                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6974                                             size >> 10,
6975                                             trace_buf_size >> 10);
6976                         else
6977                                 r = sprintf(buf, "%lu\n", size >> 10);
6978                 } else
6979                         r = sprintf(buf, "X\n");
6980         } else
6981                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6982
6983         mutex_unlock(&trace_types_lock);
6984
6985         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6986         return ret;
6987 }
6988
6989 static ssize_t
6990 tracing_entries_write(struct file *filp, const char __user *ubuf,
6991                       size_t cnt, loff_t *ppos)
6992 {
6993         struct inode *inode = file_inode(filp);
6994         struct trace_array *tr = inode->i_private;
6995         unsigned long val;
6996         int ret;
6997
6998         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6999         if (ret)
7000                 return ret;
7001
7002         /* must have at least 1 entry */
7003         if (!val)
7004                 return -EINVAL;
7005
7006         /* value is in KB */
7007         val <<= 10;
7008         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7009         if (ret < 0)
7010                 return ret;
7011
7012         *ppos += cnt;
7013
7014         return cnt;
7015 }
7016
7017 static ssize_t
7018 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7019                                 size_t cnt, loff_t *ppos)
7020 {
7021         struct trace_array *tr = filp->private_data;
7022         char buf[64];
7023         int r, cpu;
7024         unsigned long size = 0, expanded_size = 0;
7025
7026         mutex_lock(&trace_types_lock);
7027         for_each_tracing_cpu(cpu) {
7028                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7029                 if (!ring_buffer_expanded)
7030                         expanded_size += trace_buf_size >> 10;
7031         }
7032         if (ring_buffer_expanded)
7033                 r = sprintf(buf, "%lu\n", size);
7034         else
7035                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7036         mutex_unlock(&trace_types_lock);
7037
7038         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7039 }
7040
7041 static ssize_t
7042 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7043                           size_t cnt, loff_t *ppos)
7044 {
7045         /*
7046          * There is no need to read what the user has written, this function
7047          * is just to make sure that there is no error when "echo" is used
7048          */
7049
7050         *ppos += cnt;
7051
7052         return cnt;
7053 }
7054
7055 static int
7056 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7057 {
7058         struct trace_array *tr = inode->i_private;
7059
7060         /* disable tracing ? */
7061         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7062                 tracer_tracing_off(tr);
7063         /* resize the ring buffer to 0 */
7064         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7065
7066         trace_array_put(tr);
7067
7068         return 0;
7069 }
7070
7071 static ssize_t
7072 tracing_mark_write(struct file *filp, const char __user *ubuf,
7073                                         size_t cnt, loff_t *fpos)
7074 {
7075         struct trace_array *tr = filp->private_data;
7076         struct ring_buffer_event *event;
7077         enum event_trigger_type tt = ETT_NONE;
7078         struct trace_buffer *buffer;
7079         struct print_entry *entry;
7080         ssize_t written;
7081         int size;
7082         int len;
7083
7084 /* Used in tracing_mark_raw_write() as well */
7085 #define FAULTED_STR "<faulted>"
7086 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7087
7088         if (tracing_disabled)
7089                 return -EINVAL;
7090
7091         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7092                 return -EINVAL;
7093
7094         if (cnt > TRACE_BUF_SIZE)
7095                 cnt = TRACE_BUF_SIZE;
7096
7097         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7098
7099         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7100
7101         /* If less than "<faulted>", then make sure we can still add that */
7102         if (cnt < FAULTED_SIZE)
7103                 size += FAULTED_SIZE - cnt;
7104
7105         buffer = tr->array_buffer.buffer;
7106         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7107                                             tracing_gen_ctx());
7108         if (unlikely(!event))
7109                 /* Ring buffer disabled, return as if not open for write */
7110                 return -EBADF;
7111
7112         entry = ring_buffer_event_data(event);
7113         entry->ip = _THIS_IP_;
7114
7115         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7116         if (len) {
7117                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7118                 cnt = FAULTED_SIZE;
7119                 written = -EFAULT;
7120         } else
7121                 written = cnt;
7122
7123         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7124                 /* do not add \n before testing triggers, but add \0 */
7125                 entry->buf[cnt] = '\0';
7126                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7127         }
7128
7129         if (entry->buf[cnt - 1] != '\n') {
7130                 entry->buf[cnt] = '\n';
7131                 entry->buf[cnt + 1] = '\0';
7132         } else
7133                 entry->buf[cnt] = '\0';
7134
7135         if (static_branch_unlikely(&trace_marker_exports_enabled))
7136                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7137         __buffer_unlock_commit(buffer, event);
7138
7139         if (tt)
7140                 event_triggers_post_call(tr->trace_marker_file, tt);
7141
7142         return written;
7143 }
7144
7145 /* Limit it for now to 3K (including tag) */
7146 #define RAW_DATA_MAX_SIZE (1024*3)
7147
7148 static ssize_t
7149 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7150                                         size_t cnt, loff_t *fpos)
7151 {
7152         struct trace_array *tr = filp->private_data;
7153         struct ring_buffer_event *event;
7154         struct trace_buffer *buffer;
7155         struct raw_data_entry *entry;
7156         ssize_t written;
7157         int size;
7158         int len;
7159
7160 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7161
7162         if (tracing_disabled)
7163                 return -EINVAL;
7164
7165         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7166                 return -EINVAL;
7167
7168         /* The marker must at least have a tag id */
7169         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7170                 return -EINVAL;
7171
7172         if (cnt > TRACE_BUF_SIZE)
7173                 cnt = TRACE_BUF_SIZE;
7174
7175         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7176
7177         size = sizeof(*entry) + cnt;
7178         if (cnt < FAULT_SIZE_ID)
7179                 size += FAULT_SIZE_ID - cnt;
7180
7181         buffer = tr->array_buffer.buffer;
7182         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7183                                             tracing_gen_ctx());
7184         if (!event)
7185                 /* Ring buffer disabled, return as if not open for write */
7186                 return -EBADF;
7187
7188         entry = ring_buffer_event_data(event);
7189
7190         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7191         if (len) {
7192                 entry->id = -1;
7193                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7194                 written = -EFAULT;
7195         } else
7196                 written = cnt;
7197
7198         __buffer_unlock_commit(buffer, event);
7199
7200         return written;
7201 }
7202
7203 static int tracing_clock_show(struct seq_file *m, void *v)
7204 {
7205         struct trace_array *tr = m->private;
7206         int i;
7207
7208         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7209                 seq_printf(m,
7210                         "%s%s%s%s", i ? " " : "",
7211                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7212                         i == tr->clock_id ? "]" : "");
7213         seq_putc(m, '\n');
7214
7215         return 0;
7216 }
7217
7218 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7219 {
7220         int i;
7221
7222         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7223                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7224                         break;
7225         }
7226         if (i == ARRAY_SIZE(trace_clocks))
7227                 return -EINVAL;
7228
7229         mutex_lock(&trace_types_lock);
7230
7231         tr->clock_id = i;
7232
7233         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7234
7235         /*
7236          * New clock may not be consistent with the previous clock.
7237          * Reset the buffer so that it doesn't have incomparable timestamps.
7238          */
7239         tracing_reset_online_cpus(&tr->array_buffer);
7240
7241 #ifdef CONFIG_TRACER_MAX_TRACE
7242         if (tr->max_buffer.buffer)
7243                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7244         tracing_reset_online_cpus(&tr->max_buffer);
7245 #endif
7246
7247         mutex_unlock(&trace_types_lock);
7248
7249         return 0;
7250 }
7251
7252 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7253                                    size_t cnt, loff_t *fpos)
7254 {
7255         struct seq_file *m = filp->private_data;
7256         struct trace_array *tr = m->private;
7257         char buf[64];
7258         const char *clockstr;
7259         int ret;
7260
7261         if (cnt >= sizeof(buf))
7262                 return -EINVAL;
7263
7264         if (copy_from_user(buf, ubuf, cnt))
7265                 return -EFAULT;
7266
7267         buf[cnt] = 0;
7268
7269         clockstr = strstrip(buf);
7270
7271         ret = tracing_set_clock(tr, clockstr);
7272         if (ret)
7273                 return ret;
7274
7275         *fpos += cnt;
7276
7277         return cnt;
7278 }
7279
7280 static int tracing_clock_open(struct inode *inode, struct file *file)
7281 {
7282         struct trace_array *tr = inode->i_private;
7283         int ret;
7284
7285         ret = tracing_check_open_get_tr(tr);
7286         if (ret)
7287                 return ret;
7288
7289         ret = single_open(file, tracing_clock_show, inode->i_private);
7290         if (ret < 0)
7291                 trace_array_put(tr);
7292
7293         return ret;
7294 }
7295
7296 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7297 {
7298         struct trace_array *tr = m->private;
7299
7300         mutex_lock(&trace_types_lock);
7301
7302         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7303                 seq_puts(m, "delta [absolute]\n");
7304         else
7305                 seq_puts(m, "[delta] absolute\n");
7306
7307         mutex_unlock(&trace_types_lock);
7308
7309         return 0;
7310 }
7311
7312 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7313 {
7314         struct trace_array *tr = inode->i_private;
7315         int ret;
7316
7317         ret = tracing_check_open_get_tr(tr);
7318         if (ret)
7319                 return ret;
7320
7321         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7322         if (ret < 0)
7323                 trace_array_put(tr);
7324
7325         return ret;
7326 }
7327
7328 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7329 {
7330         if (rbe == this_cpu_read(trace_buffered_event))
7331                 return ring_buffer_time_stamp(buffer);
7332
7333         return ring_buffer_event_time_stamp(buffer, rbe);
7334 }
7335
7336 /*
7337  * Set or disable using the per CPU trace_buffer_event when possible.
7338  */
7339 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7340 {
7341         int ret = 0;
7342
7343         mutex_lock(&trace_types_lock);
7344
7345         if (set && tr->no_filter_buffering_ref++)
7346                 goto out;
7347
7348         if (!set) {
7349                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7350                         ret = -EINVAL;
7351                         goto out;
7352                 }
7353
7354                 --tr->no_filter_buffering_ref;
7355         }
7356  out:
7357         mutex_unlock(&trace_types_lock);
7358
7359         return ret;
7360 }
7361
7362 struct ftrace_buffer_info {
7363         struct trace_iterator   iter;
7364         void                    *spare;
7365         unsigned int            spare_cpu;
7366         unsigned int            read;
7367 };
7368
7369 #ifdef CONFIG_TRACER_SNAPSHOT
7370 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7371 {
7372         struct trace_array *tr = inode->i_private;
7373         struct trace_iterator *iter;
7374         struct seq_file *m;
7375         int ret;
7376
7377         ret = tracing_check_open_get_tr(tr);
7378         if (ret)
7379                 return ret;
7380
7381         if (file->f_mode & FMODE_READ) {
7382                 iter = __tracing_open(inode, file, true);
7383                 if (IS_ERR(iter))
7384                         ret = PTR_ERR(iter);
7385         } else {
7386                 /* Writes still need the seq_file to hold the private data */
7387                 ret = -ENOMEM;
7388                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7389                 if (!m)
7390                         goto out;
7391                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7392                 if (!iter) {
7393                         kfree(m);
7394                         goto out;
7395                 }
7396                 ret = 0;
7397
7398                 iter->tr = tr;
7399                 iter->array_buffer = &tr->max_buffer;
7400                 iter->cpu_file = tracing_get_cpu(inode);
7401                 m->private = iter;
7402                 file->private_data = m;
7403         }
7404 out:
7405         if (ret < 0)
7406                 trace_array_put(tr);
7407
7408         return ret;
7409 }
7410
7411 static ssize_t
7412 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7413                        loff_t *ppos)
7414 {
7415         struct seq_file *m = filp->private_data;
7416         struct trace_iterator *iter = m->private;
7417         struct trace_array *tr = iter->tr;
7418         unsigned long val;
7419         int ret;
7420
7421         ret = tracing_update_buffers();
7422         if (ret < 0)
7423                 return ret;
7424
7425         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7426         if (ret)
7427                 return ret;
7428
7429         mutex_lock(&trace_types_lock);
7430
7431         if (tr->current_trace->use_max_tr) {
7432                 ret = -EBUSY;
7433                 goto out;
7434         }
7435
7436         arch_spin_lock(&tr->max_lock);
7437         if (tr->cond_snapshot)
7438                 ret = -EBUSY;
7439         arch_spin_unlock(&tr->max_lock);
7440         if (ret)
7441                 goto out;
7442
7443         switch (val) {
7444         case 0:
7445                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7446                         ret = -EINVAL;
7447                         break;
7448                 }
7449                 if (tr->allocated_snapshot)
7450                         free_snapshot(tr);
7451                 break;
7452         case 1:
7453 /* Only allow per-cpu swap if the ring buffer supports it */
7454 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7455                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7456                         ret = -EINVAL;
7457                         break;
7458                 }
7459 #endif
7460                 if (tr->allocated_snapshot)
7461                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7462                                         &tr->array_buffer, iter->cpu_file);
7463                 else
7464                         ret = tracing_alloc_snapshot_instance(tr);
7465                 if (ret < 0)
7466                         break;
7467                 local_irq_disable();
7468                 /* Now, we're going to swap */
7469                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7470                         update_max_tr(tr, current, smp_processor_id(), NULL);
7471                 else
7472                         update_max_tr_single(tr, current, iter->cpu_file);
7473                 local_irq_enable();
7474                 break;
7475         default:
7476                 if (tr->allocated_snapshot) {
7477                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7478                                 tracing_reset_online_cpus(&tr->max_buffer);
7479                         else
7480                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7481                 }
7482                 break;
7483         }
7484
7485         if (ret >= 0) {
7486                 *ppos += cnt;
7487                 ret = cnt;
7488         }
7489 out:
7490         mutex_unlock(&trace_types_lock);
7491         return ret;
7492 }
7493
7494 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7495 {
7496         struct seq_file *m = file->private_data;
7497         int ret;
7498
7499         ret = tracing_release(inode, file);
7500
7501         if (file->f_mode & FMODE_READ)
7502                 return ret;
7503
7504         /* If write only, the seq_file is just a stub */
7505         if (m)
7506                 kfree(m->private);
7507         kfree(m);
7508
7509         return 0;
7510 }
7511
7512 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7513 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7514                                     size_t count, loff_t *ppos);
7515 static int tracing_buffers_release(struct inode *inode, struct file *file);
7516 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7517                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7518
7519 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7520 {
7521         struct ftrace_buffer_info *info;
7522         int ret;
7523
7524         /* The following checks for tracefs lockdown */
7525         ret = tracing_buffers_open(inode, filp);
7526         if (ret < 0)
7527                 return ret;
7528
7529         info = filp->private_data;
7530
7531         if (info->iter.trace->use_max_tr) {
7532                 tracing_buffers_release(inode, filp);
7533                 return -EBUSY;
7534         }
7535
7536         info->iter.snapshot = true;
7537         info->iter.array_buffer = &info->iter.tr->max_buffer;
7538
7539         return ret;
7540 }
7541
7542 #endif /* CONFIG_TRACER_SNAPSHOT */
7543
7544
7545 static const struct file_operations tracing_thresh_fops = {
7546         .open           = tracing_open_generic,
7547         .read           = tracing_thresh_read,
7548         .write          = tracing_thresh_write,
7549         .llseek         = generic_file_llseek,
7550 };
7551
7552 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7553 static const struct file_operations tracing_max_lat_fops = {
7554         .open           = tracing_open_generic,
7555         .read           = tracing_max_lat_read,
7556         .write          = tracing_max_lat_write,
7557         .llseek         = generic_file_llseek,
7558 };
7559 #endif
7560
7561 static const struct file_operations set_tracer_fops = {
7562         .open           = tracing_open_generic,
7563         .read           = tracing_set_trace_read,
7564         .write          = tracing_set_trace_write,
7565         .llseek         = generic_file_llseek,
7566 };
7567
7568 static const struct file_operations tracing_pipe_fops = {
7569         .open           = tracing_open_pipe,
7570         .poll           = tracing_poll_pipe,
7571         .read           = tracing_read_pipe,
7572         .splice_read    = tracing_splice_read_pipe,
7573         .release        = tracing_release_pipe,
7574         .llseek         = no_llseek,
7575 };
7576
7577 static const struct file_operations tracing_entries_fops = {
7578         .open           = tracing_open_generic_tr,
7579         .read           = tracing_entries_read,
7580         .write          = tracing_entries_write,
7581         .llseek         = generic_file_llseek,
7582         .release        = tracing_release_generic_tr,
7583 };
7584
7585 static const struct file_operations tracing_total_entries_fops = {
7586         .open           = tracing_open_generic_tr,
7587         .read           = tracing_total_entries_read,
7588         .llseek         = generic_file_llseek,
7589         .release        = tracing_release_generic_tr,
7590 };
7591
7592 static const struct file_operations tracing_free_buffer_fops = {
7593         .open           = tracing_open_generic_tr,
7594         .write          = tracing_free_buffer_write,
7595         .release        = tracing_free_buffer_release,
7596 };
7597
7598 static const struct file_operations tracing_mark_fops = {
7599         .open           = tracing_mark_open,
7600         .write          = tracing_mark_write,
7601         .release        = tracing_release_generic_tr,
7602 };
7603
7604 static const struct file_operations tracing_mark_raw_fops = {
7605         .open           = tracing_mark_open,
7606         .write          = tracing_mark_raw_write,
7607         .release        = tracing_release_generic_tr,
7608 };
7609
7610 static const struct file_operations trace_clock_fops = {
7611         .open           = tracing_clock_open,
7612         .read           = seq_read,
7613         .llseek         = seq_lseek,
7614         .release        = tracing_single_release_tr,
7615         .write          = tracing_clock_write,
7616 };
7617
7618 static const struct file_operations trace_time_stamp_mode_fops = {
7619         .open           = tracing_time_stamp_mode_open,
7620         .read           = seq_read,
7621         .llseek         = seq_lseek,
7622         .release        = tracing_single_release_tr,
7623 };
7624
7625 #ifdef CONFIG_TRACER_SNAPSHOT
7626 static const struct file_operations snapshot_fops = {
7627         .open           = tracing_snapshot_open,
7628         .read           = seq_read,
7629         .write          = tracing_snapshot_write,
7630         .llseek         = tracing_lseek,
7631         .release        = tracing_snapshot_release,
7632 };
7633
7634 static const struct file_operations snapshot_raw_fops = {
7635         .open           = snapshot_raw_open,
7636         .read           = tracing_buffers_read,
7637         .release        = tracing_buffers_release,
7638         .splice_read    = tracing_buffers_splice_read,
7639         .llseek         = no_llseek,
7640 };
7641
7642 #endif /* CONFIG_TRACER_SNAPSHOT */
7643
7644 /*
7645  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7646  * @filp: The active open file structure
7647  * @ubuf: The userspace provided buffer to read value into
7648  * @cnt: The maximum number of bytes to read
7649  * @ppos: The current "file" position
7650  *
7651  * This function implements the write interface for a struct trace_min_max_param.
7652  * The filp->private_data must point to a trace_min_max_param structure that
7653  * defines where to write the value, the min and the max acceptable values,
7654  * and a lock to protect the write.
7655  */
7656 static ssize_t
7657 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7658 {
7659         struct trace_min_max_param *param = filp->private_data;
7660         u64 val;
7661         int err;
7662
7663         if (!param)
7664                 return -EFAULT;
7665
7666         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7667         if (err)
7668                 return err;
7669
7670         if (param->lock)
7671                 mutex_lock(param->lock);
7672
7673         if (param->min && val < *param->min)
7674                 err = -EINVAL;
7675
7676         if (param->max && val > *param->max)
7677                 err = -EINVAL;
7678
7679         if (!err)
7680                 *param->val = val;
7681
7682         if (param->lock)
7683                 mutex_unlock(param->lock);
7684
7685         if (err)
7686                 return err;
7687
7688         return cnt;
7689 }
7690
7691 /*
7692  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7693  * @filp: The active open file structure
7694  * @ubuf: The userspace provided buffer to read value into
7695  * @cnt: The maximum number of bytes to read
7696  * @ppos: The current "file" position
7697  *
7698  * This function implements the read interface for a struct trace_min_max_param.
7699  * The filp->private_data must point to a trace_min_max_param struct with valid
7700  * data.
7701  */
7702 static ssize_t
7703 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7704 {
7705         struct trace_min_max_param *param = filp->private_data;
7706         char buf[U64_STR_SIZE];
7707         int len;
7708         u64 val;
7709
7710         if (!param)
7711                 return -EFAULT;
7712
7713         val = *param->val;
7714
7715         if (cnt > sizeof(buf))
7716                 cnt = sizeof(buf);
7717
7718         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7719
7720         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7721 }
7722
7723 const struct file_operations trace_min_max_fops = {
7724         .open           = tracing_open_generic,
7725         .read           = trace_min_max_read,
7726         .write          = trace_min_max_write,
7727 };
7728
7729 #define TRACING_LOG_ERRS_MAX    8
7730 #define TRACING_LOG_LOC_MAX     128
7731
7732 #define CMD_PREFIX "  Command: "
7733
7734 struct err_info {
7735         const char      **errs; /* ptr to loc-specific array of err strings */
7736         u8              type;   /* index into errs -> specific err string */
7737         u16             pos;    /* caret position */
7738         u64             ts;
7739 };
7740
7741 struct tracing_log_err {
7742         struct list_head        list;
7743         struct err_info         info;
7744         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7745         char                    *cmd;                     /* what caused err */
7746 };
7747
7748 static DEFINE_MUTEX(tracing_err_log_lock);
7749
7750 static struct tracing_log_err *alloc_tracing_log_err(int len)
7751 {
7752         struct tracing_log_err *err;
7753
7754         err = kzalloc(sizeof(*err), GFP_KERNEL);
7755         if (!err)
7756                 return ERR_PTR(-ENOMEM);
7757
7758         err->cmd = kzalloc(len, GFP_KERNEL);
7759         if (!err->cmd) {
7760                 kfree(err);
7761                 return ERR_PTR(-ENOMEM);
7762         }
7763
7764         return err;
7765 }
7766
7767 static void free_tracing_log_err(struct tracing_log_err *err)
7768 {
7769         kfree(err->cmd);
7770         kfree(err);
7771 }
7772
7773 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7774                                                    int len)
7775 {
7776         struct tracing_log_err *err;
7777
7778         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7779                 err = alloc_tracing_log_err(len);
7780                 if (PTR_ERR(err) != -ENOMEM)
7781                         tr->n_err_log_entries++;
7782
7783                 return err;
7784         }
7785
7786         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7787         kfree(err->cmd);
7788         err->cmd = kzalloc(len, GFP_KERNEL);
7789         if (!err->cmd)
7790                 return ERR_PTR(-ENOMEM);
7791         list_del(&err->list);
7792
7793         return err;
7794 }
7795
7796 /**
7797  * err_pos - find the position of a string within a command for error careting
7798  * @cmd: The tracing command that caused the error
7799  * @str: The string to position the caret at within @cmd
7800  *
7801  * Finds the position of the first occurrence of @str within @cmd.  The
7802  * return value can be passed to tracing_log_err() for caret placement
7803  * within @cmd.
7804  *
7805  * Returns the index within @cmd of the first occurrence of @str or 0
7806  * if @str was not found.
7807  */
7808 unsigned int err_pos(char *cmd, const char *str)
7809 {
7810         char *found;
7811
7812         if (WARN_ON(!strlen(cmd)))
7813                 return 0;
7814
7815         found = strstr(cmd, str);
7816         if (found)
7817                 return found - cmd;
7818
7819         return 0;
7820 }
7821
7822 /**
7823  * tracing_log_err - write an error to the tracing error log
7824  * @tr: The associated trace array for the error (NULL for top level array)
7825  * @loc: A string describing where the error occurred
7826  * @cmd: The tracing command that caused the error
7827  * @errs: The array of loc-specific static error strings
7828  * @type: The index into errs[], which produces the specific static err string
7829  * @pos: The position the caret should be placed in the cmd
7830  *
7831  * Writes an error into tracing/error_log of the form:
7832  *
7833  * <loc>: error: <text>
7834  *   Command: <cmd>
7835  *              ^
7836  *
7837  * tracing/error_log is a small log file containing the last
7838  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7839  * unless there has been a tracing error, and the error log can be
7840  * cleared and have its memory freed by writing the empty string in
7841  * truncation mode to it i.e. echo > tracing/error_log.
7842  *
7843  * NOTE: the @errs array along with the @type param are used to
7844  * produce a static error string - this string is not copied and saved
7845  * when the error is logged - only a pointer to it is saved.  See
7846  * existing callers for examples of how static strings are typically
7847  * defined for use with tracing_log_err().
7848  */
7849 void tracing_log_err(struct trace_array *tr,
7850                      const char *loc, const char *cmd,
7851                      const char **errs, u8 type, u16 pos)
7852 {
7853         struct tracing_log_err *err;
7854         int len = 0;
7855
7856         if (!tr)
7857                 tr = &global_trace;
7858
7859         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7860
7861         mutex_lock(&tracing_err_log_lock);
7862         err = get_tracing_log_err(tr, len);
7863         if (PTR_ERR(err) == -ENOMEM) {
7864                 mutex_unlock(&tracing_err_log_lock);
7865                 return;
7866         }
7867
7868         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7869         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7870
7871         err->info.errs = errs;
7872         err->info.type = type;
7873         err->info.pos = pos;
7874         err->info.ts = local_clock();
7875
7876         list_add_tail(&err->list, &tr->err_log);
7877         mutex_unlock(&tracing_err_log_lock);
7878 }
7879
7880 static void clear_tracing_err_log(struct trace_array *tr)
7881 {
7882         struct tracing_log_err *err, *next;
7883
7884         mutex_lock(&tracing_err_log_lock);
7885         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7886                 list_del(&err->list);
7887                 free_tracing_log_err(err);
7888         }
7889
7890         tr->n_err_log_entries = 0;
7891         mutex_unlock(&tracing_err_log_lock);
7892 }
7893
7894 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7895 {
7896         struct trace_array *tr = m->private;
7897
7898         mutex_lock(&tracing_err_log_lock);
7899
7900         return seq_list_start(&tr->err_log, *pos);
7901 }
7902
7903 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7904 {
7905         struct trace_array *tr = m->private;
7906
7907         return seq_list_next(v, &tr->err_log, pos);
7908 }
7909
7910 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7911 {
7912         mutex_unlock(&tracing_err_log_lock);
7913 }
7914
7915 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7916 {
7917         u16 i;
7918
7919         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7920                 seq_putc(m, ' ');
7921         for (i = 0; i < pos; i++)
7922                 seq_putc(m, ' ');
7923         seq_puts(m, "^\n");
7924 }
7925
7926 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7927 {
7928         struct tracing_log_err *err = v;
7929
7930         if (err) {
7931                 const char *err_text = err->info.errs[err->info.type];
7932                 u64 sec = err->info.ts;
7933                 u32 nsec;
7934
7935                 nsec = do_div(sec, NSEC_PER_SEC);
7936                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7937                            err->loc, err_text);
7938                 seq_printf(m, "%s", err->cmd);
7939                 tracing_err_log_show_pos(m, err->info.pos);
7940         }
7941
7942         return 0;
7943 }
7944
7945 static const struct seq_operations tracing_err_log_seq_ops = {
7946         .start  = tracing_err_log_seq_start,
7947         .next   = tracing_err_log_seq_next,
7948         .stop   = tracing_err_log_seq_stop,
7949         .show   = tracing_err_log_seq_show
7950 };
7951
7952 static int tracing_err_log_open(struct inode *inode, struct file *file)
7953 {
7954         struct trace_array *tr = inode->i_private;
7955         int ret = 0;
7956
7957         ret = tracing_check_open_get_tr(tr);
7958         if (ret)
7959                 return ret;
7960
7961         /* If this file was opened for write, then erase contents */
7962         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7963                 clear_tracing_err_log(tr);
7964
7965         if (file->f_mode & FMODE_READ) {
7966                 ret = seq_open(file, &tracing_err_log_seq_ops);
7967                 if (!ret) {
7968                         struct seq_file *m = file->private_data;
7969                         m->private = tr;
7970                 } else {
7971                         trace_array_put(tr);
7972                 }
7973         }
7974         return ret;
7975 }
7976
7977 static ssize_t tracing_err_log_write(struct file *file,
7978                                      const char __user *buffer,
7979                                      size_t count, loff_t *ppos)
7980 {
7981         return count;
7982 }
7983
7984 static int tracing_err_log_release(struct inode *inode, struct file *file)
7985 {
7986         struct trace_array *tr = inode->i_private;
7987
7988         trace_array_put(tr);
7989
7990         if (file->f_mode & FMODE_READ)
7991                 seq_release(inode, file);
7992
7993         return 0;
7994 }
7995
7996 static const struct file_operations tracing_err_log_fops = {
7997         .open           = tracing_err_log_open,
7998         .write          = tracing_err_log_write,
7999         .read           = seq_read,
8000         .llseek         = seq_lseek,
8001         .release        = tracing_err_log_release,
8002 };
8003
8004 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8005 {
8006         struct trace_array *tr = inode->i_private;
8007         struct ftrace_buffer_info *info;
8008         int ret;
8009
8010         ret = tracing_check_open_get_tr(tr);
8011         if (ret)
8012                 return ret;
8013
8014         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8015         if (!info) {
8016                 trace_array_put(tr);
8017                 return -ENOMEM;
8018         }
8019
8020         mutex_lock(&trace_types_lock);
8021
8022         info->iter.tr           = tr;
8023         info->iter.cpu_file     = tracing_get_cpu(inode);
8024         info->iter.trace        = tr->current_trace;
8025         info->iter.array_buffer = &tr->array_buffer;
8026         info->spare             = NULL;
8027         /* Force reading ring buffer for first read */
8028         info->read              = (unsigned int)-1;
8029
8030         filp->private_data = info;
8031
8032         tr->trace_ref++;
8033
8034         mutex_unlock(&trace_types_lock);
8035
8036         ret = nonseekable_open(inode, filp);
8037         if (ret < 0)
8038                 trace_array_put(tr);
8039
8040         return ret;
8041 }
8042
8043 static __poll_t
8044 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8045 {
8046         struct ftrace_buffer_info *info = filp->private_data;
8047         struct trace_iterator *iter = &info->iter;
8048
8049         return trace_poll(iter, filp, poll_table);
8050 }
8051
8052 static ssize_t
8053 tracing_buffers_read(struct file *filp, char __user *ubuf,
8054                      size_t count, loff_t *ppos)
8055 {
8056         struct ftrace_buffer_info *info = filp->private_data;
8057         struct trace_iterator *iter = &info->iter;
8058         ssize_t ret = 0;
8059         ssize_t size;
8060
8061         if (!count)
8062                 return 0;
8063
8064 #ifdef CONFIG_TRACER_MAX_TRACE
8065         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8066                 return -EBUSY;
8067 #endif
8068
8069         if (!info->spare) {
8070                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8071                                                           iter->cpu_file);
8072                 if (IS_ERR(info->spare)) {
8073                         ret = PTR_ERR(info->spare);
8074                         info->spare = NULL;
8075                 } else {
8076                         info->spare_cpu = iter->cpu_file;
8077                 }
8078         }
8079         if (!info->spare)
8080                 return ret;
8081
8082         /* Do we have previous read data to read? */
8083         if (info->read < PAGE_SIZE)
8084                 goto read;
8085
8086  again:
8087         trace_access_lock(iter->cpu_file);
8088         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8089                                     &info->spare,
8090                                     count,
8091                                     iter->cpu_file, 0);
8092         trace_access_unlock(iter->cpu_file);
8093
8094         if (ret < 0) {
8095                 if (trace_empty(iter)) {
8096                         if ((filp->f_flags & O_NONBLOCK))
8097                                 return -EAGAIN;
8098
8099                         ret = wait_on_pipe(iter, 0);
8100                         if (ret)
8101                                 return ret;
8102
8103                         goto again;
8104                 }
8105                 return 0;
8106         }
8107
8108         info->read = 0;
8109  read:
8110         size = PAGE_SIZE - info->read;
8111         if (size > count)
8112                 size = count;
8113
8114         ret = copy_to_user(ubuf, info->spare + info->read, size);
8115         if (ret == size)
8116                 return -EFAULT;
8117
8118         size -= ret;
8119
8120         *ppos += size;
8121         info->read += size;
8122
8123         return size;
8124 }
8125
8126 static int tracing_buffers_release(struct inode *inode, struct file *file)
8127 {
8128         struct ftrace_buffer_info *info = file->private_data;
8129         struct trace_iterator *iter = &info->iter;
8130
8131         mutex_lock(&trace_types_lock);
8132
8133         iter->tr->trace_ref--;
8134
8135         __trace_array_put(iter->tr);
8136
8137         if (info->spare)
8138                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8139                                            info->spare_cpu, info->spare);
8140         kvfree(info);
8141
8142         mutex_unlock(&trace_types_lock);
8143
8144         return 0;
8145 }
8146
8147 struct buffer_ref {
8148         struct trace_buffer     *buffer;
8149         void                    *page;
8150         int                     cpu;
8151         refcount_t              refcount;
8152 };
8153
8154 static void buffer_ref_release(struct buffer_ref *ref)
8155 {
8156         if (!refcount_dec_and_test(&ref->refcount))
8157                 return;
8158         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8159         kfree(ref);
8160 }
8161
8162 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8163                                     struct pipe_buffer *buf)
8164 {
8165         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8166
8167         buffer_ref_release(ref);
8168         buf->private = 0;
8169 }
8170
8171 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8172                                 struct pipe_buffer *buf)
8173 {
8174         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8175
8176         if (refcount_read(&ref->refcount) > INT_MAX/2)
8177                 return false;
8178
8179         refcount_inc(&ref->refcount);
8180         return true;
8181 }
8182
8183 /* Pipe buffer operations for a buffer. */
8184 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8185         .release                = buffer_pipe_buf_release,
8186         .get                    = buffer_pipe_buf_get,
8187 };
8188
8189 /*
8190  * Callback from splice_to_pipe(), if we need to release some pages
8191  * at the end of the spd in case we error'ed out in filling the pipe.
8192  */
8193 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8194 {
8195         struct buffer_ref *ref =
8196                 (struct buffer_ref *)spd->partial[i].private;
8197
8198         buffer_ref_release(ref);
8199         spd->partial[i].private = 0;
8200 }
8201
8202 static ssize_t
8203 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8204                             struct pipe_inode_info *pipe, size_t len,
8205                             unsigned int flags)
8206 {
8207         struct ftrace_buffer_info *info = file->private_data;
8208         struct trace_iterator *iter = &info->iter;
8209         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8210         struct page *pages_def[PIPE_DEF_BUFFERS];
8211         struct splice_pipe_desc spd = {
8212                 .pages          = pages_def,
8213                 .partial        = partial_def,
8214                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8215                 .ops            = &buffer_pipe_buf_ops,
8216                 .spd_release    = buffer_spd_release,
8217         };
8218         struct buffer_ref *ref;
8219         int entries, i;
8220         ssize_t ret = 0;
8221
8222 #ifdef CONFIG_TRACER_MAX_TRACE
8223         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8224                 return -EBUSY;
8225 #endif
8226
8227         if (*ppos & (PAGE_SIZE - 1))
8228                 return -EINVAL;
8229
8230         if (len & (PAGE_SIZE - 1)) {
8231                 if (len < PAGE_SIZE)
8232                         return -EINVAL;
8233                 len &= PAGE_MASK;
8234         }
8235
8236         if (splice_grow_spd(pipe, &spd))
8237                 return -ENOMEM;
8238
8239  again:
8240         trace_access_lock(iter->cpu_file);
8241         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8242
8243         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8244                 struct page *page;
8245                 int r;
8246
8247                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8248                 if (!ref) {
8249                         ret = -ENOMEM;
8250                         break;
8251                 }
8252
8253                 refcount_set(&ref->refcount, 1);
8254                 ref->buffer = iter->array_buffer->buffer;
8255                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8256                 if (IS_ERR(ref->page)) {
8257                         ret = PTR_ERR(ref->page);
8258                         ref->page = NULL;
8259                         kfree(ref);
8260                         break;
8261                 }
8262                 ref->cpu = iter->cpu_file;
8263
8264                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8265                                           len, iter->cpu_file, 1);
8266                 if (r < 0) {
8267                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8268                                                    ref->page);
8269                         kfree(ref);
8270                         break;
8271                 }
8272
8273                 page = virt_to_page(ref->page);
8274
8275                 spd.pages[i] = page;
8276                 spd.partial[i].len = PAGE_SIZE;
8277                 spd.partial[i].offset = 0;
8278                 spd.partial[i].private = (unsigned long)ref;
8279                 spd.nr_pages++;
8280                 *ppos += PAGE_SIZE;
8281
8282                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8283         }
8284
8285         trace_access_unlock(iter->cpu_file);
8286         spd.nr_pages = i;
8287
8288         /* did we read anything? */
8289         if (!spd.nr_pages) {
8290                 if (ret)
8291                         goto out;
8292
8293                 ret = -EAGAIN;
8294                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8295                         goto out;
8296
8297                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8298                 if (ret)
8299                         goto out;
8300
8301                 goto again;
8302         }
8303
8304         ret = splice_to_pipe(pipe, &spd);
8305 out:
8306         splice_shrink_spd(&spd);
8307
8308         return ret;
8309 }
8310
8311 static const struct file_operations tracing_buffers_fops = {
8312         .open           = tracing_buffers_open,
8313         .read           = tracing_buffers_read,
8314         .poll           = tracing_buffers_poll,
8315         .release        = tracing_buffers_release,
8316         .splice_read    = tracing_buffers_splice_read,
8317         .llseek         = no_llseek,
8318 };
8319
8320 static ssize_t
8321 tracing_stats_read(struct file *filp, char __user *ubuf,
8322                    size_t count, loff_t *ppos)
8323 {
8324         struct inode *inode = file_inode(filp);
8325         struct trace_array *tr = inode->i_private;
8326         struct array_buffer *trace_buf = &tr->array_buffer;
8327         int cpu = tracing_get_cpu(inode);
8328         struct trace_seq *s;
8329         unsigned long cnt;
8330         unsigned long long t;
8331         unsigned long usec_rem;
8332
8333         s = kmalloc(sizeof(*s), GFP_KERNEL);
8334         if (!s)
8335                 return -ENOMEM;
8336
8337         trace_seq_init(s);
8338
8339         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8340         trace_seq_printf(s, "entries: %ld\n", cnt);
8341
8342         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8343         trace_seq_printf(s, "overrun: %ld\n", cnt);
8344
8345         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8346         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8347
8348         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8349         trace_seq_printf(s, "bytes: %ld\n", cnt);
8350
8351         if (trace_clocks[tr->clock_id].in_ns) {
8352                 /* local or global for trace_clock */
8353                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8354                 usec_rem = do_div(t, USEC_PER_SEC);
8355                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8356                                                                 t, usec_rem);
8357
8358                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8359                 usec_rem = do_div(t, USEC_PER_SEC);
8360                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8361         } else {
8362                 /* counter or tsc mode for trace_clock */
8363                 trace_seq_printf(s, "oldest event ts: %llu\n",
8364                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8365
8366                 trace_seq_printf(s, "now ts: %llu\n",
8367                                 ring_buffer_time_stamp(trace_buf->buffer));
8368         }
8369
8370         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8371         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8372
8373         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8374         trace_seq_printf(s, "read events: %ld\n", cnt);
8375
8376         count = simple_read_from_buffer(ubuf, count, ppos,
8377                                         s->buffer, trace_seq_used(s));
8378
8379         kfree(s);
8380
8381         return count;
8382 }
8383
8384 static const struct file_operations tracing_stats_fops = {
8385         .open           = tracing_open_generic_tr,
8386         .read           = tracing_stats_read,
8387         .llseek         = generic_file_llseek,
8388         .release        = tracing_release_generic_tr,
8389 };
8390
8391 #ifdef CONFIG_DYNAMIC_FTRACE
8392
8393 static ssize_t
8394 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8395                   size_t cnt, loff_t *ppos)
8396 {
8397         ssize_t ret;
8398         char *buf;
8399         int r;
8400
8401         /* 256 should be plenty to hold the amount needed */
8402         buf = kmalloc(256, GFP_KERNEL);
8403         if (!buf)
8404                 return -ENOMEM;
8405
8406         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8407                       ftrace_update_tot_cnt,
8408                       ftrace_number_of_pages,
8409                       ftrace_number_of_groups);
8410
8411         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8412         kfree(buf);
8413         return ret;
8414 }
8415
8416 static const struct file_operations tracing_dyn_info_fops = {
8417         .open           = tracing_open_generic,
8418         .read           = tracing_read_dyn_info,
8419         .llseek         = generic_file_llseek,
8420 };
8421 #endif /* CONFIG_DYNAMIC_FTRACE */
8422
8423 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8424 static void
8425 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8426                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8427                 void *data)
8428 {
8429         tracing_snapshot_instance(tr);
8430 }
8431
8432 static void
8433 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8434                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8435                       void *data)
8436 {
8437         struct ftrace_func_mapper *mapper = data;
8438         long *count = NULL;
8439
8440         if (mapper)
8441                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8442
8443         if (count) {
8444
8445                 if (*count <= 0)
8446                         return;
8447
8448                 (*count)--;
8449         }
8450
8451         tracing_snapshot_instance(tr);
8452 }
8453
8454 static int
8455 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8456                       struct ftrace_probe_ops *ops, void *data)
8457 {
8458         struct ftrace_func_mapper *mapper = data;
8459         long *count = NULL;
8460
8461         seq_printf(m, "%ps:", (void *)ip);
8462
8463         seq_puts(m, "snapshot");
8464
8465         if (mapper)
8466                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8467
8468         if (count)
8469                 seq_printf(m, ":count=%ld\n", *count);
8470         else
8471                 seq_puts(m, ":unlimited\n");
8472
8473         return 0;
8474 }
8475
8476 static int
8477 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8478                      unsigned long ip, void *init_data, void **data)
8479 {
8480         struct ftrace_func_mapper *mapper = *data;
8481
8482         if (!mapper) {
8483                 mapper = allocate_ftrace_func_mapper();
8484                 if (!mapper)
8485                         return -ENOMEM;
8486                 *data = mapper;
8487         }
8488
8489         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8490 }
8491
8492 static void
8493 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8494                      unsigned long ip, void *data)
8495 {
8496         struct ftrace_func_mapper *mapper = data;
8497
8498         if (!ip) {
8499                 if (!mapper)
8500                         return;
8501                 free_ftrace_func_mapper(mapper, NULL);
8502                 return;
8503         }
8504
8505         ftrace_func_mapper_remove_ip(mapper, ip);
8506 }
8507
8508 static struct ftrace_probe_ops snapshot_probe_ops = {
8509         .func                   = ftrace_snapshot,
8510         .print                  = ftrace_snapshot_print,
8511 };
8512
8513 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8514         .func                   = ftrace_count_snapshot,
8515         .print                  = ftrace_snapshot_print,
8516         .init                   = ftrace_snapshot_init,
8517         .free                   = ftrace_snapshot_free,
8518 };
8519
8520 static int
8521 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8522                                char *glob, char *cmd, char *param, int enable)
8523 {
8524         struct ftrace_probe_ops *ops;
8525         void *count = (void *)-1;
8526         char *number;
8527         int ret;
8528
8529         if (!tr)
8530                 return -ENODEV;
8531
8532         /* hash funcs only work with set_ftrace_filter */
8533         if (!enable)
8534                 return -EINVAL;
8535
8536         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8537
8538         if (glob[0] == '!')
8539                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8540
8541         if (!param)
8542                 goto out_reg;
8543
8544         number = strsep(&param, ":");
8545
8546         if (!strlen(number))
8547                 goto out_reg;
8548
8549         /*
8550          * We use the callback data field (which is a pointer)
8551          * as our counter.
8552          */
8553         ret = kstrtoul(number, 0, (unsigned long *)&count);
8554         if (ret)
8555                 return ret;
8556
8557  out_reg:
8558         ret = tracing_alloc_snapshot_instance(tr);
8559         if (ret < 0)
8560                 goto out;
8561
8562         ret = register_ftrace_function_probe(glob, tr, ops, count);
8563
8564  out:
8565         return ret < 0 ? ret : 0;
8566 }
8567
8568 static struct ftrace_func_command ftrace_snapshot_cmd = {
8569         .name                   = "snapshot",
8570         .func                   = ftrace_trace_snapshot_callback,
8571 };
8572
8573 static __init int register_snapshot_cmd(void)
8574 {
8575         return register_ftrace_command(&ftrace_snapshot_cmd);
8576 }
8577 #else
8578 static inline __init int register_snapshot_cmd(void) { return 0; }
8579 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8580
8581 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8582 {
8583         if (WARN_ON(!tr->dir))
8584                 return ERR_PTR(-ENODEV);
8585
8586         /* Top directory uses NULL as the parent */
8587         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8588                 return NULL;
8589
8590         /* All sub buffers have a descriptor */
8591         return tr->dir;
8592 }
8593
8594 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8595 {
8596         struct dentry *d_tracer;
8597
8598         if (tr->percpu_dir)
8599                 return tr->percpu_dir;
8600
8601         d_tracer = tracing_get_dentry(tr);
8602         if (IS_ERR(d_tracer))
8603                 return NULL;
8604
8605         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8606
8607         MEM_FAIL(!tr->percpu_dir,
8608                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8609
8610         return tr->percpu_dir;
8611 }
8612
8613 static struct dentry *
8614 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8615                       void *data, long cpu, const struct file_operations *fops)
8616 {
8617         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8618
8619         if (ret) /* See tracing_get_cpu() */
8620                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8621         return ret;
8622 }
8623
8624 static void
8625 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8626 {
8627         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8628         struct dentry *d_cpu;
8629         char cpu_dir[30]; /* 30 characters should be more than enough */
8630
8631         if (!d_percpu)
8632                 return;
8633
8634         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8635         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8636         if (!d_cpu) {
8637                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8638                 return;
8639         }
8640
8641         /* per cpu trace_pipe */
8642         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8643                                 tr, cpu, &tracing_pipe_fops);
8644
8645         /* per cpu trace */
8646         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8647                                 tr, cpu, &tracing_fops);
8648
8649         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8650                                 tr, cpu, &tracing_buffers_fops);
8651
8652         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8653                                 tr, cpu, &tracing_stats_fops);
8654
8655         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8656                                 tr, cpu, &tracing_entries_fops);
8657
8658 #ifdef CONFIG_TRACER_SNAPSHOT
8659         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8660                                 tr, cpu, &snapshot_fops);
8661
8662         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8663                                 tr, cpu, &snapshot_raw_fops);
8664 #endif
8665 }
8666
8667 #ifdef CONFIG_FTRACE_SELFTEST
8668 /* Let selftest have access to static functions in this file */
8669 #include "trace_selftest.c"
8670 #endif
8671
8672 static ssize_t
8673 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8674                         loff_t *ppos)
8675 {
8676         struct trace_option_dentry *topt = filp->private_data;
8677         char *buf;
8678
8679         if (topt->flags->val & topt->opt->bit)
8680                 buf = "1\n";
8681         else
8682                 buf = "0\n";
8683
8684         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8685 }
8686
8687 static ssize_t
8688 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8689                          loff_t *ppos)
8690 {
8691         struct trace_option_dentry *topt = filp->private_data;
8692         unsigned long val;
8693         int ret;
8694
8695         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8696         if (ret)
8697                 return ret;
8698
8699         if (val != 0 && val != 1)
8700                 return -EINVAL;
8701
8702         if (!!(topt->flags->val & topt->opt->bit) != val) {
8703                 mutex_lock(&trace_types_lock);
8704                 ret = __set_tracer_option(topt->tr, topt->flags,
8705                                           topt->opt, !val);
8706                 mutex_unlock(&trace_types_lock);
8707                 if (ret)
8708                         return ret;
8709         }
8710
8711         *ppos += cnt;
8712
8713         return cnt;
8714 }
8715
8716
8717 static const struct file_operations trace_options_fops = {
8718         .open = tracing_open_generic,
8719         .read = trace_options_read,
8720         .write = trace_options_write,
8721         .llseek = generic_file_llseek,
8722 };
8723
8724 /*
8725  * In order to pass in both the trace_array descriptor as well as the index
8726  * to the flag that the trace option file represents, the trace_array
8727  * has a character array of trace_flags_index[], which holds the index
8728  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8729  * The address of this character array is passed to the flag option file
8730  * read/write callbacks.
8731  *
8732  * In order to extract both the index and the trace_array descriptor,
8733  * get_tr_index() uses the following algorithm.
8734  *
8735  *   idx = *ptr;
8736  *
8737  * As the pointer itself contains the address of the index (remember
8738  * index[1] == 1).
8739  *
8740  * Then to get the trace_array descriptor, by subtracting that index
8741  * from the ptr, we get to the start of the index itself.
8742  *
8743  *   ptr - idx == &index[0]
8744  *
8745  * Then a simple container_of() from that pointer gets us to the
8746  * trace_array descriptor.
8747  */
8748 static void get_tr_index(void *data, struct trace_array **ptr,
8749                          unsigned int *pindex)
8750 {
8751         *pindex = *(unsigned char *)data;
8752
8753         *ptr = container_of(data - *pindex, struct trace_array,
8754                             trace_flags_index);
8755 }
8756
8757 static ssize_t
8758 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8759                         loff_t *ppos)
8760 {
8761         void *tr_index = filp->private_data;
8762         struct trace_array *tr;
8763         unsigned int index;
8764         char *buf;
8765
8766         get_tr_index(tr_index, &tr, &index);
8767
8768         if (tr->trace_flags & (1 << index))
8769                 buf = "1\n";
8770         else
8771                 buf = "0\n";
8772
8773         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8774 }
8775
8776 static ssize_t
8777 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8778                          loff_t *ppos)
8779 {
8780         void *tr_index = filp->private_data;
8781         struct trace_array *tr;
8782         unsigned int index;
8783         unsigned long val;
8784         int ret;
8785
8786         get_tr_index(tr_index, &tr, &index);
8787
8788         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8789         if (ret)
8790                 return ret;
8791
8792         if (val != 0 && val != 1)
8793                 return -EINVAL;
8794
8795         mutex_lock(&event_mutex);
8796         mutex_lock(&trace_types_lock);
8797         ret = set_tracer_flag(tr, 1 << index, val);
8798         mutex_unlock(&trace_types_lock);
8799         mutex_unlock(&event_mutex);
8800
8801         if (ret < 0)
8802                 return ret;
8803
8804         *ppos += cnt;
8805
8806         return cnt;
8807 }
8808
8809 static const struct file_operations trace_options_core_fops = {
8810         .open = tracing_open_generic,
8811         .read = trace_options_core_read,
8812         .write = trace_options_core_write,
8813         .llseek = generic_file_llseek,
8814 };
8815
8816 struct dentry *trace_create_file(const char *name,
8817                                  umode_t mode,
8818                                  struct dentry *parent,
8819                                  void *data,
8820                                  const struct file_operations *fops)
8821 {
8822         struct dentry *ret;
8823
8824         ret = tracefs_create_file(name, mode, parent, data, fops);
8825         if (!ret)
8826                 pr_warn("Could not create tracefs '%s' entry\n", name);
8827
8828         return ret;
8829 }
8830
8831
8832 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8833 {
8834         struct dentry *d_tracer;
8835
8836         if (tr->options)
8837                 return tr->options;
8838
8839         d_tracer = tracing_get_dentry(tr);
8840         if (IS_ERR(d_tracer))
8841                 return NULL;
8842
8843         tr->options = tracefs_create_dir("options", d_tracer);
8844         if (!tr->options) {
8845                 pr_warn("Could not create tracefs directory 'options'\n");
8846                 return NULL;
8847         }
8848
8849         return tr->options;
8850 }
8851
8852 static void
8853 create_trace_option_file(struct trace_array *tr,
8854                          struct trace_option_dentry *topt,
8855                          struct tracer_flags *flags,
8856                          struct tracer_opt *opt)
8857 {
8858         struct dentry *t_options;
8859
8860         t_options = trace_options_init_dentry(tr);
8861         if (!t_options)
8862                 return;
8863
8864         topt->flags = flags;
8865         topt->opt = opt;
8866         topt->tr = tr;
8867
8868         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8869                                         t_options, topt, &trace_options_fops);
8870
8871 }
8872
8873 static void
8874 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8875 {
8876         struct trace_option_dentry *topts;
8877         struct trace_options *tr_topts;
8878         struct tracer_flags *flags;
8879         struct tracer_opt *opts;
8880         int cnt;
8881         int i;
8882
8883         if (!tracer)
8884                 return;
8885
8886         flags = tracer->flags;
8887
8888         if (!flags || !flags->opts)
8889                 return;
8890
8891         /*
8892          * If this is an instance, only create flags for tracers
8893          * the instance may have.
8894          */
8895         if (!trace_ok_for_array(tracer, tr))
8896                 return;
8897
8898         for (i = 0; i < tr->nr_topts; i++) {
8899                 /* Make sure there's no duplicate flags. */
8900                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8901                         return;
8902         }
8903
8904         opts = flags->opts;
8905
8906         for (cnt = 0; opts[cnt].name; cnt++)
8907                 ;
8908
8909         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8910         if (!topts)
8911                 return;
8912
8913         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8914                             GFP_KERNEL);
8915         if (!tr_topts) {
8916                 kfree(topts);
8917                 return;
8918         }
8919
8920         tr->topts = tr_topts;
8921         tr->topts[tr->nr_topts].tracer = tracer;
8922         tr->topts[tr->nr_topts].topts = topts;
8923         tr->nr_topts++;
8924
8925         for (cnt = 0; opts[cnt].name; cnt++) {
8926                 create_trace_option_file(tr, &topts[cnt], flags,
8927                                          &opts[cnt]);
8928                 MEM_FAIL(topts[cnt].entry == NULL,
8929                           "Failed to create trace option: %s",
8930                           opts[cnt].name);
8931         }
8932 }
8933
8934 static struct dentry *
8935 create_trace_option_core_file(struct trace_array *tr,
8936                               const char *option, long index)
8937 {
8938         struct dentry *t_options;
8939
8940         t_options = trace_options_init_dentry(tr);
8941         if (!t_options)
8942                 return NULL;
8943
8944         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8945                                  (void *)&tr->trace_flags_index[index],
8946                                  &trace_options_core_fops);
8947 }
8948
8949 static void create_trace_options_dir(struct trace_array *tr)
8950 {
8951         struct dentry *t_options;
8952         bool top_level = tr == &global_trace;
8953         int i;
8954
8955         t_options = trace_options_init_dentry(tr);
8956         if (!t_options)
8957                 return;
8958
8959         for (i = 0; trace_options[i]; i++) {
8960                 if (top_level ||
8961                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8962                         create_trace_option_core_file(tr, trace_options[i], i);
8963         }
8964 }
8965
8966 static ssize_t
8967 rb_simple_read(struct file *filp, char __user *ubuf,
8968                size_t cnt, loff_t *ppos)
8969 {
8970         struct trace_array *tr = filp->private_data;
8971         char buf[64];
8972         int r;
8973
8974         r = tracer_tracing_is_on(tr);
8975         r = sprintf(buf, "%d\n", r);
8976
8977         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8978 }
8979
8980 static ssize_t
8981 rb_simple_write(struct file *filp, const char __user *ubuf,
8982                 size_t cnt, loff_t *ppos)
8983 {
8984         struct trace_array *tr = filp->private_data;
8985         struct trace_buffer *buffer = tr->array_buffer.buffer;
8986         unsigned long val;
8987         int ret;
8988
8989         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8990         if (ret)
8991                 return ret;
8992
8993         if (buffer) {
8994                 mutex_lock(&trace_types_lock);
8995                 if (!!val == tracer_tracing_is_on(tr)) {
8996                         val = 0; /* do nothing */
8997                 } else if (val) {
8998                         tracer_tracing_on(tr);
8999                         if (tr->current_trace->start)
9000                                 tr->current_trace->start(tr);
9001                 } else {
9002                         tracer_tracing_off(tr);
9003                         if (tr->current_trace->stop)
9004                                 tr->current_trace->stop(tr);
9005                 }
9006                 mutex_unlock(&trace_types_lock);
9007         }
9008
9009         (*ppos)++;
9010
9011         return cnt;
9012 }
9013
9014 static const struct file_operations rb_simple_fops = {
9015         .open           = tracing_open_generic_tr,
9016         .read           = rb_simple_read,
9017         .write          = rb_simple_write,
9018         .release        = tracing_release_generic_tr,
9019         .llseek         = default_llseek,
9020 };
9021
9022 static ssize_t
9023 buffer_percent_read(struct file *filp, char __user *ubuf,
9024                     size_t cnt, loff_t *ppos)
9025 {
9026         struct trace_array *tr = filp->private_data;
9027         char buf[64];
9028         int r;
9029
9030         r = tr->buffer_percent;
9031         r = sprintf(buf, "%d\n", r);
9032
9033         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9034 }
9035
9036 static ssize_t
9037 buffer_percent_write(struct file *filp, const char __user *ubuf,
9038                      size_t cnt, loff_t *ppos)
9039 {
9040         struct trace_array *tr = filp->private_data;
9041         unsigned long val;
9042         int ret;
9043
9044         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9045         if (ret)
9046                 return ret;
9047
9048         if (val > 100)
9049                 return -EINVAL;
9050
9051         if (!val)
9052                 val = 1;
9053
9054         tr->buffer_percent = val;
9055
9056         (*ppos)++;
9057
9058         return cnt;
9059 }
9060
9061 static const struct file_operations buffer_percent_fops = {
9062         .open           = tracing_open_generic_tr,
9063         .read           = buffer_percent_read,
9064         .write          = buffer_percent_write,
9065         .release        = tracing_release_generic_tr,
9066         .llseek         = default_llseek,
9067 };
9068
9069 static struct dentry *trace_instance_dir;
9070
9071 static void
9072 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9073
9074 static int
9075 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9076 {
9077         enum ring_buffer_flags rb_flags;
9078
9079         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9080
9081         buf->tr = tr;
9082
9083         buf->buffer = ring_buffer_alloc(size, rb_flags);
9084         if (!buf->buffer)
9085                 return -ENOMEM;
9086
9087         buf->data = alloc_percpu(struct trace_array_cpu);
9088         if (!buf->data) {
9089                 ring_buffer_free(buf->buffer);
9090                 buf->buffer = NULL;
9091                 return -ENOMEM;
9092         }
9093
9094         /* Allocate the first page for all buffers */
9095         set_buffer_entries(&tr->array_buffer,
9096                            ring_buffer_size(tr->array_buffer.buffer, 0));
9097
9098         return 0;
9099 }
9100
9101 static int allocate_trace_buffers(struct trace_array *tr, int size)
9102 {
9103         int ret;
9104
9105         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9106         if (ret)
9107                 return ret;
9108
9109 #ifdef CONFIG_TRACER_MAX_TRACE
9110         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9111                                     allocate_snapshot ? size : 1);
9112         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9113                 ring_buffer_free(tr->array_buffer.buffer);
9114                 tr->array_buffer.buffer = NULL;
9115                 free_percpu(tr->array_buffer.data);
9116                 tr->array_buffer.data = NULL;
9117                 return -ENOMEM;
9118         }
9119         tr->allocated_snapshot = allocate_snapshot;
9120
9121         /*
9122          * Only the top level trace array gets its snapshot allocated
9123          * from the kernel command line.
9124          */
9125         allocate_snapshot = false;
9126 #endif
9127
9128         return 0;
9129 }
9130
9131 static void free_trace_buffer(struct array_buffer *buf)
9132 {
9133         if (buf->buffer) {
9134                 ring_buffer_free(buf->buffer);
9135                 buf->buffer = NULL;
9136                 free_percpu(buf->data);
9137                 buf->data = NULL;
9138         }
9139 }
9140
9141 static void free_trace_buffers(struct trace_array *tr)
9142 {
9143         if (!tr)
9144                 return;
9145
9146         free_trace_buffer(&tr->array_buffer);
9147
9148 #ifdef CONFIG_TRACER_MAX_TRACE
9149         free_trace_buffer(&tr->max_buffer);
9150 #endif
9151 }
9152
9153 static void init_trace_flags_index(struct trace_array *tr)
9154 {
9155         int i;
9156
9157         /* Used by the trace options files */
9158         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9159                 tr->trace_flags_index[i] = i;
9160 }
9161
9162 static void __update_tracer_options(struct trace_array *tr)
9163 {
9164         struct tracer *t;
9165
9166         for (t = trace_types; t; t = t->next)
9167                 add_tracer_options(tr, t);
9168 }
9169
9170 static void update_tracer_options(struct trace_array *tr)
9171 {
9172         mutex_lock(&trace_types_lock);
9173         __update_tracer_options(tr);
9174         mutex_unlock(&trace_types_lock);
9175 }
9176
9177 /* Must have trace_types_lock held */
9178 struct trace_array *trace_array_find(const char *instance)
9179 {
9180         struct trace_array *tr, *found = NULL;
9181
9182         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9183                 if (tr->name && strcmp(tr->name, instance) == 0) {
9184                         found = tr;
9185                         break;
9186                 }
9187         }
9188
9189         return found;
9190 }
9191
9192 struct trace_array *trace_array_find_get(const char *instance)
9193 {
9194         struct trace_array *tr;
9195
9196         mutex_lock(&trace_types_lock);
9197         tr = trace_array_find(instance);
9198         if (tr)
9199                 tr->ref++;
9200         mutex_unlock(&trace_types_lock);
9201
9202         return tr;
9203 }
9204
9205 static int trace_array_create_dir(struct trace_array *tr)
9206 {
9207         int ret;
9208
9209         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9210         if (!tr->dir)
9211                 return -EINVAL;
9212
9213         ret = event_trace_add_tracer(tr->dir, tr);
9214         if (ret) {
9215                 tracefs_remove(tr->dir);
9216                 return ret;
9217         }
9218
9219         init_tracer_tracefs(tr, tr->dir);
9220         __update_tracer_options(tr);
9221
9222         return ret;
9223 }
9224
9225 static struct trace_array *trace_array_create(const char *name)
9226 {
9227         struct trace_array *tr;
9228         int ret;
9229
9230         ret = -ENOMEM;
9231         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9232         if (!tr)
9233                 return ERR_PTR(ret);
9234
9235         tr->name = kstrdup(name, GFP_KERNEL);
9236         if (!tr->name)
9237                 goto out_free_tr;
9238
9239         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9240                 goto out_free_tr;
9241
9242         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9243
9244         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9245
9246         raw_spin_lock_init(&tr->start_lock);
9247
9248         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9249
9250         tr->current_trace = &nop_trace;
9251
9252         INIT_LIST_HEAD(&tr->systems);
9253         INIT_LIST_HEAD(&tr->events);
9254         INIT_LIST_HEAD(&tr->hist_vars);
9255         INIT_LIST_HEAD(&tr->err_log);
9256
9257         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9258                 goto out_free_tr;
9259
9260         if (ftrace_allocate_ftrace_ops(tr) < 0)
9261                 goto out_free_tr;
9262
9263         ftrace_init_trace_array(tr);
9264
9265         init_trace_flags_index(tr);
9266
9267         if (trace_instance_dir) {
9268                 ret = trace_array_create_dir(tr);
9269                 if (ret)
9270                         goto out_free_tr;
9271         } else
9272                 __trace_early_add_events(tr);
9273
9274         list_add(&tr->list, &ftrace_trace_arrays);
9275
9276         tr->ref++;
9277
9278         return tr;
9279
9280  out_free_tr:
9281         ftrace_free_ftrace_ops(tr);
9282         free_trace_buffers(tr);
9283         free_cpumask_var(tr->tracing_cpumask);
9284         kfree(tr->name);
9285         kfree(tr);
9286
9287         return ERR_PTR(ret);
9288 }
9289
9290 static int instance_mkdir(const char *name)
9291 {
9292         struct trace_array *tr;
9293         int ret;
9294
9295         mutex_lock(&event_mutex);
9296         mutex_lock(&trace_types_lock);
9297
9298         ret = -EEXIST;
9299         if (trace_array_find(name))
9300                 goto out_unlock;
9301
9302         tr = trace_array_create(name);
9303
9304         ret = PTR_ERR_OR_ZERO(tr);
9305
9306 out_unlock:
9307         mutex_unlock(&trace_types_lock);
9308         mutex_unlock(&event_mutex);
9309         return ret;
9310 }
9311
9312 /**
9313  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9314  * @name: The name of the trace array to be looked up/created.
9315  *
9316  * Returns pointer to trace array with given name.
9317  * NULL, if it cannot be created.
9318  *
9319  * NOTE: This function increments the reference counter associated with the
9320  * trace array returned. This makes sure it cannot be freed while in use.
9321  * Use trace_array_put() once the trace array is no longer needed.
9322  * If the trace_array is to be freed, trace_array_destroy() needs to
9323  * be called after the trace_array_put(), or simply let user space delete
9324  * it from the tracefs instances directory. But until the
9325  * trace_array_put() is called, user space can not delete it.
9326  *
9327  */
9328 struct trace_array *trace_array_get_by_name(const char *name)
9329 {
9330         struct trace_array *tr;
9331
9332         mutex_lock(&event_mutex);
9333         mutex_lock(&trace_types_lock);
9334
9335         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9336                 if (tr->name && strcmp(tr->name, name) == 0)
9337                         goto out_unlock;
9338         }
9339
9340         tr = trace_array_create(name);
9341
9342         if (IS_ERR(tr))
9343                 tr = NULL;
9344 out_unlock:
9345         if (tr)
9346                 tr->ref++;
9347
9348         mutex_unlock(&trace_types_lock);
9349         mutex_unlock(&event_mutex);
9350         return tr;
9351 }
9352 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9353
9354 static int __remove_instance(struct trace_array *tr)
9355 {
9356         int i;
9357
9358         /* Reference counter for a newly created trace array = 1. */
9359         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9360                 return -EBUSY;
9361
9362         list_del(&tr->list);
9363
9364         /* Disable all the flags that were enabled coming in */
9365         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9366                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9367                         set_tracer_flag(tr, 1 << i, 0);
9368         }
9369
9370         tracing_set_nop(tr);
9371         clear_ftrace_function_probes(tr);
9372         event_trace_del_tracer(tr);
9373         ftrace_clear_pids(tr);
9374         ftrace_destroy_function_files(tr);
9375         tracefs_remove(tr->dir);
9376         free_percpu(tr->last_func_repeats);
9377         free_trace_buffers(tr);
9378
9379         for (i = 0; i < tr->nr_topts; i++) {
9380                 kfree(tr->topts[i].topts);
9381         }
9382         kfree(tr->topts);
9383
9384         free_cpumask_var(tr->tracing_cpumask);
9385         kfree(tr->name);
9386         kfree(tr);
9387
9388         return 0;
9389 }
9390
9391 int trace_array_destroy(struct trace_array *this_tr)
9392 {
9393         struct trace_array *tr;
9394         int ret;
9395
9396         if (!this_tr)
9397                 return -EINVAL;
9398
9399         mutex_lock(&event_mutex);
9400         mutex_lock(&trace_types_lock);
9401
9402         ret = -ENODEV;
9403
9404         /* Making sure trace array exists before destroying it. */
9405         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9406                 if (tr == this_tr) {
9407                         ret = __remove_instance(tr);
9408                         break;
9409                 }
9410         }
9411
9412         mutex_unlock(&trace_types_lock);
9413         mutex_unlock(&event_mutex);
9414
9415         return ret;
9416 }
9417 EXPORT_SYMBOL_GPL(trace_array_destroy);
9418
9419 static int instance_rmdir(const char *name)
9420 {
9421         struct trace_array *tr;
9422         int ret;
9423
9424         mutex_lock(&event_mutex);
9425         mutex_lock(&trace_types_lock);
9426
9427         ret = -ENODEV;
9428         tr = trace_array_find(name);
9429         if (tr)
9430                 ret = __remove_instance(tr);
9431
9432         mutex_unlock(&trace_types_lock);
9433         mutex_unlock(&event_mutex);
9434
9435         return ret;
9436 }
9437
9438 static __init void create_trace_instances(struct dentry *d_tracer)
9439 {
9440         struct trace_array *tr;
9441
9442         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9443                                                          instance_mkdir,
9444                                                          instance_rmdir);
9445         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9446                 return;
9447
9448         mutex_lock(&event_mutex);
9449         mutex_lock(&trace_types_lock);
9450
9451         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9452                 if (!tr->name)
9453                         continue;
9454                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9455                              "Failed to create instance directory\n"))
9456                         break;
9457         }
9458
9459         mutex_unlock(&trace_types_lock);
9460         mutex_unlock(&event_mutex);
9461 }
9462
9463 static void
9464 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9465 {
9466         struct trace_event_file *file;
9467         int cpu;
9468
9469         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9470                         tr, &show_traces_fops);
9471
9472         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9473                         tr, &set_tracer_fops);
9474
9475         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9476                           tr, &tracing_cpumask_fops);
9477
9478         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9479                           tr, &tracing_iter_fops);
9480
9481         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9482                           tr, &tracing_fops);
9483
9484         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9485                           tr, &tracing_pipe_fops);
9486
9487         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9488                           tr, &tracing_entries_fops);
9489
9490         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9491                           tr, &tracing_total_entries_fops);
9492
9493         trace_create_file("free_buffer", 0200, d_tracer,
9494                           tr, &tracing_free_buffer_fops);
9495
9496         trace_create_file("trace_marker", 0220, d_tracer,
9497                           tr, &tracing_mark_fops);
9498
9499         file = __find_event_file(tr, "ftrace", "print");
9500         if (file && file->dir)
9501                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9502                                   file, &event_trigger_fops);
9503         tr->trace_marker_file = file;
9504
9505         trace_create_file("trace_marker_raw", 0220, d_tracer,
9506                           tr, &tracing_mark_raw_fops);
9507
9508         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9509                           &trace_clock_fops);
9510
9511         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9512                           tr, &rb_simple_fops);
9513
9514         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9515                           &trace_time_stamp_mode_fops);
9516
9517         tr->buffer_percent = 50;
9518
9519         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9520                         tr, &buffer_percent_fops);
9521
9522         create_trace_options_dir(tr);
9523
9524         trace_create_maxlat_file(tr, d_tracer);
9525
9526         if (ftrace_create_function_files(tr, d_tracer))
9527                 MEM_FAIL(1, "Could not allocate function filter files");
9528
9529 #ifdef CONFIG_TRACER_SNAPSHOT
9530         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9531                           tr, &snapshot_fops);
9532 #endif
9533
9534         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9535                           tr, &tracing_err_log_fops);
9536
9537         for_each_tracing_cpu(cpu)
9538                 tracing_init_tracefs_percpu(tr, cpu);
9539
9540         ftrace_init_tracefs(tr, d_tracer);
9541 }
9542
9543 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9544 {
9545         struct vfsmount *mnt;
9546         struct file_system_type *type;
9547
9548         /*
9549          * To maintain backward compatibility for tools that mount
9550          * debugfs to get to the tracing facility, tracefs is automatically
9551          * mounted to the debugfs/tracing directory.
9552          */
9553         type = get_fs_type("tracefs");
9554         if (!type)
9555                 return NULL;
9556         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9557         put_filesystem(type);
9558         if (IS_ERR(mnt))
9559                 return NULL;
9560         mntget(mnt);
9561
9562         return mnt;
9563 }
9564
9565 /**
9566  * tracing_init_dentry - initialize top level trace array
9567  *
9568  * This is called when creating files or directories in the tracing
9569  * directory. It is called via fs_initcall() by any of the boot up code
9570  * and expects to return the dentry of the top level tracing directory.
9571  */
9572 int tracing_init_dentry(void)
9573 {
9574         struct trace_array *tr = &global_trace;
9575
9576         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9577                 pr_warn("Tracing disabled due to lockdown\n");
9578                 return -EPERM;
9579         }
9580
9581         /* The top level trace array uses  NULL as parent */
9582         if (tr->dir)
9583                 return 0;
9584
9585         if (WARN_ON(!tracefs_initialized()))
9586                 return -ENODEV;
9587
9588         /*
9589          * As there may still be users that expect the tracing
9590          * files to exist in debugfs/tracing, we must automount
9591          * the tracefs file system there, so older tools still
9592          * work with the newer kernel.
9593          */
9594         tr->dir = debugfs_create_automount("tracing", NULL,
9595                                            trace_automount, NULL);
9596
9597         return 0;
9598 }
9599
9600 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9601 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9602
9603 static struct workqueue_struct *eval_map_wq __initdata;
9604 static struct work_struct eval_map_work __initdata;
9605
9606 static void __init eval_map_work_func(struct work_struct *work)
9607 {
9608         int len;
9609
9610         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9611         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9612 }
9613
9614 static int __init trace_eval_init(void)
9615 {
9616         INIT_WORK(&eval_map_work, eval_map_work_func);
9617
9618         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9619         if (!eval_map_wq) {
9620                 pr_err("Unable to allocate eval_map_wq\n");
9621                 /* Do work here */
9622                 eval_map_work_func(&eval_map_work);
9623                 return -ENOMEM;
9624         }
9625
9626         queue_work(eval_map_wq, &eval_map_work);
9627         return 0;
9628 }
9629
9630 static int __init trace_eval_sync(void)
9631 {
9632         /* Make sure the eval map updates are finished */
9633         if (eval_map_wq)
9634                 destroy_workqueue(eval_map_wq);
9635         return 0;
9636 }
9637
9638 late_initcall_sync(trace_eval_sync);
9639
9640
9641 #ifdef CONFIG_MODULES
9642 static void trace_module_add_evals(struct module *mod)
9643 {
9644         if (!mod->num_trace_evals)
9645                 return;
9646
9647         /*
9648          * Modules with bad taint do not have events created, do
9649          * not bother with enums either.
9650          */
9651         if (trace_module_has_bad_taint(mod))
9652                 return;
9653
9654         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9655 }
9656
9657 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9658 static void trace_module_remove_evals(struct module *mod)
9659 {
9660         union trace_eval_map_item *map;
9661         union trace_eval_map_item **last = &trace_eval_maps;
9662
9663         if (!mod->num_trace_evals)
9664                 return;
9665
9666         mutex_lock(&trace_eval_mutex);
9667
9668         map = trace_eval_maps;
9669
9670         while (map) {
9671                 if (map->head.mod == mod)
9672                         break;
9673                 map = trace_eval_jmp_to_tail(map);
9674                 last = &map->tail.next;
9675                 map = map->tail.next;
9676         }
9677         if (!map)
9678                 goto out;
9679
9680         *last = trace_eval_jmp_to_tail(map)->tail.next;
9681         kfree(map);
9682  out:
9683         mutex_unlock(&trace_eval_mutex);
9684 }
9685 #else
9686 static inline void trace_module_remove_evals(struct module *mod) { }
9687 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9688
9689 static int trace_module_notify(struct notifier_block *self,
9690                                unsigned long val, void *data)
9691 {
9692         struct module *mod = data;
9693
9694         switch (val) {
9695         case MODULE_STATE_COMING:
9696                 trace_module_add_evals(mod);
9697                 break;
9698         case MODULE_STATE_GOING:
9699                 trace_module_remove_evals(mod);
9700                 break;
9701         }
9702
9703         return NOTIFY_OK;
9704 }
9705
9706 static struct notifier_block trace_module_nb = {
9707         .notifier_call = trace_module_notify,
9708         .priority = 0,
9709 };
9710 #endif /* CONFIG_MODULES */
9711
9712 static __init int tracer_init_tracefs(void)
9713 {
9714         int ret;
9715
9716         trace_access_lock_init();
9717
9718         ret = tracing_init_dentry();
9719         if (ret)
9720                 return 0;
9721
9722         event_trace_init();
9723
9724         init_tracer_tracefs(&global_trace, NULL);
9725         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9726
9727         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9728                         &global_trace, &tracing_thresh_fops);
9729
9730         trace_create_file("README", TRACE_MODE_READ, NULL,
9731                         NULL, &tracing_readme_fops);
9732
9733         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9734                         NULL, &tracing_saved_cmdlines_fops);
9735
9736         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9737                           NULL, &tracing_saved_cmdlines_size_fops);
9738
9739         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9740                         NULL, &tracing_saved_tgids_fops);
9741
9742         trace_eval_init();
9743
9744         trace_create_eval_file(NULL);
9745
9746 #ifdef CONFIG_MODULES
9747         register_module_notifier(&trace_module_nb);
9748 #endif
9749
9750 #ifdef CONFIG_DYNAMIC_FTRACE
9751         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9752                         NULL, &tracing_dyn_info_fops);
9753 #endif
9754
9755         create_trace_instances(NULL);
9756
9757         update_tracer_options(&global_trace);
9758
9759         return 0;
9760 }
9761
9762 fs_initcall(tracer_init_tracefs);
9763
9764 static int trace_panic_handler(struct notifier_block *this,
9765                                unsigned long event, void *unused)
9766 {
9767         if (ftrace_dump_on_oops)
9768                 ftrace_dump(ftrace_dump_on_oops);
9769         return NOTIFY_OK;
9770 }
9771
9772 static struct notifier_block trace_panic_notifier = {
9773         .notifier_call  = trace_panic_handler,
9774         .next           = NULL,
9775         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9776 };
9777
9778 static int trace_die_handler(struct notifier_block *self,
9779                              unsigned long val,
9780                              void *data)
9781 {
9782         switch (val) {
9783         case DIE_OOPS:
9784                 if (ftrace_dump_on_oops)
9785                         ftrace_dump(ftrace_dump_on_oops);
9786                 break;
9787         default:
9788                 break;
9789         }
9790         return NOTIFY_OK;
9791 }
9792
9793 static struct notifier_block trace_die_notifier = {
9794         .notifier_call = trace_die_handler,
9795         .priority = 200
9796 };
9797
9798 /*
9799  * printk is set to max of 1024, we really don't need it that big.
9800  * Nothing should be printing 1000 characters anyway.
9801  */
9802 #define TRACE_MAX_PRINT         1000
9803
9804 /*
9805  * Define here KERN_TRACE so that we have one place to modify
9806  * it if we decide to change what log level the ftrace dump
9807  * should be at.
9808  */
9809 #define KERN_TRACE              KERN_EMERG
9810
9811 void
9812 trace_printk_seq(struct trace_seq *s)
9813 {
9814         /* Probably should print a warning here. */
9815         if (s->seq.len >= TRACE_MAX_PRINT)
9816                 s->seq.len = TRACE_MAX_PRINT;
9817
9818         /*
9819          * More paranoid code. Although the buffer size is set to
9820          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9821          * an extra layer of protection.
9822          */
9823         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9824                 s->seq.len = s->seq.size - 1;
9825
9826         /* should be zero ended, but we are paranoid. */
9827         s->buffer[s->seq.len] = 0;
9828
9829         printk(KERN_TRACE "%s", s->buffer);
9830
9831         trace_seq_init(s);
9832 }
9833
9834 void trace_init_global_iter(struct trace_iterator *iter)
9835 {
9836         iter->tr = &global_trace;
9837         iter->trace = iter->tr->current_trace;
9838         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9839         iter->array_buffer = &global_trace.array_buffer;
9840
9841         if (iter->trace && iter->trace->open)
9842                 iter->trace->open(iter);
9843
9844         /* Annotate start of buffers if we had overruns */
9845         if (ring_buffer_overruns(iter->array_buffer->buffer))
9846                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9847
9848         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9849         if (trace_clocks[iter->tr->clock_id].in_ns)
9850                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9851 }
9852
9853 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9854 {
9855         /* use static because iter can be a bit big for the stack */
9856         static struct trace_iterator iter;
9857         static atomic_t dump_running;
9858         struct trace_array *tr = &global_trace;
9859         unsigned int old_userobj;
9860         unsigned long flags;
9861         int cnt = 0, cpu;
9862
9863         /* Only allow one dump user at a time. */
9864         if (atomic_inc_return(&dump_running) != 1) {
9865                 atomic_dec(&dump_running);
9866                 return;
9867         }
9868
9869         /*
9870          * Always turn off tracing when we dump.
9871          * We don't need to show trace output of what happens
9872          * between multiple crashes.
9873          *
9874          * If the user does a sysrq-z, then they can re-enable
9875          * tracing with echo 1 > tracing_on.
9876          */
9877         tracing_off();
9878
9879         local_irq_save(flags);
9880
9881         /* Simulate the iterator */
9882         trace_init_global_iter(&iter);
9883         /* Can not use kmalloc for iter.temp and iter.fmt */
9884         iter.temp = static_temp_buf;
9885         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9886         iter.fmt = static_fmt_buf;
9887         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9888
9889         for_each_tracing_cpu(cpu) {
9890                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9891         }
9892
9893         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9894
9895         /* don't look at user memory in panic mode */
9896         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9897
9898         switch (oops_dump_mode) {
9899         case DUMP_ALL:
9900                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9901                 break;
9902         case DUMP_ORIG:
9903                 iter.cpu_file = raw_smp_processor_id();
9904                 break;
9905         case DUMP_NONE:
9906                 goto out_enable;
9907         default:
9908                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9909                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9910         }
9911
9912         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9913
9914         /* Did function tracer already get disabled? */
9915         if (ftrace_is_dead()) {
9916                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9917                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9918         }
9919
9920         /*
9921          * We need to stop all tracing on all CPUS to read
9922          * the next buffer. This is a bit expensive, but is
9923          * not done often. We fill all what we can read,
9924          * and then release the locks again.
9925          */
9926
9927         while (!trace_empty(&iter)) {
9928
9929                 if (!cnt)
9930                         printk(KERN_TRACE "---------------------------------\n");
9931
9932                 cnt++;
9933
9934                 trace_iterator_reset(&iter);
9935                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9936
9937                 if (trace_find_next_entry_inc(&iter) != NULL) {
9938                         int ret;
9939
9940                         ret = print_trace_line(&iter);
9941                         if (ret != TRACE_TYPE_NO_CONSUME)
9942                                 trace_consume(&iter);
9943                 }
9944                 touch_nmi_watchdog();
9945
9946                 trace_printk_seq(&iter.seq);
9947         }
9948
9949         if (!cnt)
9950                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9951         else
9952                 printk(KERN_TRACE "---------------------------------\n");
9953
9954  out_enable:
9955         tr->trace_flags |= old_userobj;
9956
9957         for_each_tracing_cpu(cpu) {
9958                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9959         }
9960         atomic_dec(&dump_running);
9961         local_irq_restore(flags);
9962 }
9963 EXPORT_SYMBOL_GPL(ftrace_dump);
9964
9965 #define WRITE_BUFSIZE  4096
9966
9967 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9968                                 size_t count, loff_t *ppos,
9969                                 int (*createfn)(const char *))
9970 {
9971         char *kbuf, *buf, *tmp;
9972         int ret = 0;
9973         size_t done = 0;
9974         size_t size;
9975
9976         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9977         if (!kbuf)
9978                 return -ENOMEM;
9979
9980         while (done < count) {
9981                 size = count - done;
9982
9983                 if (size >= WRITE_BUFSIZE)
9984                         size = WRITE_BUFSIZE - 1;
9985
9986                 if (copy_from_user(kbuf, buffer + done, size)) {
9987                         ret = -EFAULT;
9988                         goto out;
9989                 }
9990                 kbuf[size] = '\0';
9991                 buf = kbuf;
9992                 do {
9993                         tmp = strchr(buf, '\n');
9994                         if (tmp) {
9995                                 *tmp = '\0';
9996                                 size = tmp - buf + 1;
9997                         } else {
9998                                 size = strlen(buf);
9999                                 if (done + size < count) {
10000                                         if (buf != kbuf)
10001                                                 break;
10002                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10003                                         pr_warn("Line length is too long: Should be less than %d\n",
10004                                                 WRITE_BUFSIZE - 2);
10005                                         ret = -EINVAL;
10006                                         goto out;
10007                                 }
10008                         }
10009                         done += size;
10010
10011                         /* Remove comments */
10012                         tmp = strchr(buf, '#');
10013
10014                         if (tmp)
10015                                 *tmp = '\0';
10016
10017                         ret = createfn(buf);
10018                         if (ret)
10019                                 goto out;
10020                         buf += size;
10021
10022                 } while (done < count);
10023         }
10024         ret = done;
10025
10026 out:
10027         kfree(kbuf);
10028
10029         return ret;
10030 }
10031
10032 __init static int tracer_alloc_buffers(void)
10033 {
10034         int ring_buf_size;
10035         int ret = -ENOMEM;
10036
10037
10038         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10039                 pr_warn("Tracing disabled due to lockdown\n");
10040                 return -EPERM;
10041         }
10042
10043         /*
10044          * Make sure we don't accidentally add more trace options
10045          * than we have bits for.
10046          */
10047         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10048
10049         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10050                 goto out;
10051
10052         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10053                 goto out_free_buffer_mask;
10054
10055         /* Only allocate trace_printk buffers if a trace_printk exists */
10056         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10057                 /* Must be called before global_trace.buffer is allocated */
10058                 trace_printk_init_buffers();
10059
10060         /* To save memory, keep the ring buffer size to its minimum */
10061         if (ring_buffer_expanded)
10062                 ring_buf_size = trace_buf_size;
10063         else
10064                 ring_buf_size = 1;
10065
10066         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10067         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10068
10069         raw_spin_lock_init(&global_trace.start_lock);
10070
10071         /*
10072          * The prepare callbacks allocates some memory for the ring buffer. We
10073          * don't free the buffer if the CPU goes down. If we were to free
10074          * the buffer, then the user would lose any trace that was in the
10075          * buffer. The memory will be removed once the "instance" is removed.
10076          */
10077         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10078                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10079                                       NULL);
10080         if (ret < 0)
10081                 goto out_free_cpumask;
10082         /* Used for event triggers */
10083         ret = -ENOMEM;
10084         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10085         if (!temp_buffer)
10086                 goto out_rm_hp_state;
10087
10088         if (trace_create_savedcmd() < 0)
10089                 goto out_free_temp_buffer;
10090
10091         /* TODO: make the number of buffers hot pluggable with CPUS */
10092         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10093                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10094                 goto out_free_savedcmd;
10095         }
10096
10097         if (global_trace.buffer_disabled)
10098                 tracing_off();
10099
10100         if (trace_boot_clock) {
10101                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10102                 if (ret < 0)
10103                         pr_warn("Trace clock %s not defined, going back to default\n",
10104                                 trace_boot_clock);
10105         }
10106
10107         /*
10108          * register_tracer() might reference current_trace, so it
10109          * needs to be set before we register anything. This is
10110          * just a bootstrap of current_trace anyway.
10111          */
10112         global_trace.current_trace = &nop_trace;
10113
10114         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10115
10116         ftrace_init_global_array_ops(&global_trace);
10117
10118         init_trace_flags_index(&global_trace);
10119
10120         register_tracer(&nop_trace);
10121
10122         /* Function tracing may start here (via kernel command line) */
10123         init_function_trace();
10124
10125         /* All seems OK, enable tracing */
10126         tracing_disabled = 0;
10127
10128         atomic_notifier_chain_register(&panic_notifier_list,
10129                                        &trace_panic_notifier);
10130
10131         register_die_notifier(&trace_die_notifier);
10132
10133         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10134
10135         INIT_LIST_HEAD(&global_trace.systems);
10136         INIT_LIST_HEAD(&global_trace.events);
10137         INIT_LIST_HEAD(&global_trace.hist_vars);
10138         INIT_LIST_HEAD(&global_trace.err_log);
10139         list_add(&global_trace.list, &ftrace_trace_arrays);
10140
10141         apply_trace_boot_options();
10142
10143         register_snapshot_cmd();
10144
10145         test_can_verify();
10146
10147         return 0;
10148
10149 out_free_savedcmd:
10150         free_saved_cmdlines_buffer(savedcmd);
10151 out_free_temp_buffer:
10152         ring_buffer_free(temp_buffer);
10153 out_rm_hp_state:
10154         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10155 out_free_cpumask:
10156         free_cpumask_var(global_trace.tracing_cpumask);
10157 out_free_buffer_mask:
10158         free_cpumask_var(tracing_buffer_mask);
10159 out:
10160         return ret;
10161 }
10162
10163 void __init ftrace_boot_snapshot(void)
10164 {
10165         if (snapshot_at_boot) {
10166                 tracing_snapshot();
10167                 internal_trace_puts("** Boot snapshot taken **\n");
10168         }
10169 }
10170
10171 void __init early_trace_init(void)
10172 {
10173         if (tracepoint_printk) {
10174                 tracepoint_print_iter =
10175                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10176                 if (MEM_FAIL(!tracepoint_print_iter,
10177                              "Failed to allocate trace iterator\n"))
10178                         tracepoint_printk = 0;
10179                 else
10180                         static_key_enable(&tracepoint_printk_key.key);
10181         }
10182         tracer_alloc_buffers();
10183 }
10184
10185 void __init trace_init(void)
10186 {
10187         trace_event_init();
10188 }
10189
10190 __init static void clear_boot_tracer(void)
10191 {
10192         /*
10193          * The default tracer at boot buffer is an init section.
10194          * This function is called in lateinit. If we did not
10195          * find the boot tracer, then clear it out, to prevent
10196          * later registration from accessing the buffer that is
10197          * about to be freed.
10198          */
10199         if (!default_bootup_tracer)
10200                 return;
10201
10202         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10203                default_bootup_tracer);
10204         default_bootup_tracer = NULL;
10205 }
10206
10207 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10208 __init static void tracing_set_default_clock(void)
10209 {
10210         /* sched_clock_stable() is determined in late_initcall */
10211         if (!trace_boot_clock && !sched_clock_stable()) {
10212                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10213                         pr_warn("Can not set tracing clock due to lockdown\n");
10214                         return;
10215                 }
10216
10217                 printk(KERN_WARNING
10218                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10219                        "If you want to keep using the local clock, then add:\n"
10220                        "  \"trace_clock=local\"\n"
10221                        "on the kernel command line\n");
10222                 tracing_set_clock(&global_trace, "global");
10223         }
10224 }
10225 #else
10226 static inline void tracing_set_default_clock(void) { }
10227 #endif
10228
10229 __init static int late_trace_init(void)
10230 {
10231         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10232                 static_key_disable(&tracepoint_printk_key.key);
10233                 tracepoint_printk = 0;
10234         }
10235
10236         tracing_set_default_clock();
10237         clear_boot_tracer();
10238         return 0;
10239 }
10240
10241 late_initcall_sync(late_trace_init);