64c5b8146ccacb318ce3b871d454d53c72425162
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning) {
1303                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304                         "Disabling tracing due to warning\n");
1305                 tracing_off();
1306         }
1307 }
1308
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317         if (tr->array_buffer.buffer)
1318                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319         return !tr->buffer_disabled;
1320 }
1321
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327         return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330
1331 static int __init set_buf_size(char *str)
1332 {
1333         unsigned long buf_size;
1334
1335         if (!str)
1336                 return 0;
1337         buf_size = memparse(str, &str);
1338         /* nr_entries can not be zero */
1339         if (buf_size == 0)
1340                 return 0;
1341         trace_buf_size = buf_size;
1342         return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348         unsigned long threshold;
1349         int ret;
1350
1351         if (!str)
1352                 return 0;
1353         ret = kstrtoul(str, 0, &threshold);
1354         if (ret < 0)
1355                 return 0;
1356         tracing_thresh = threshold * 1000;
1357         return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363         return nsecs / 1000;
1364 }
1365
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377         TRACE_FLAGS
1378         NULL
1379 };
1380
1381 static struct {
1382         u64 (*func)(void);
1383         const char *name;
1384         int in_ns;              /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386         { trace_clock_local,            "local",        1 },
1387         { trace_clock_global,           "global",       1 },
1388         { trace_clock_counter,          "counter",      0 },
1389         { trace_clock_jiffies,          "uptime",       0 },
1390         { trace_clock,                  "perf",         1 },
1391         { ktime_get_mono_fast_ns,       "mono",         1 },
1392         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1393         { ktime_get_boot_fast_ns,       "boot",         1 },
1394         ARCH_TRACE_CLOCKS
1395 };
1396
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399         if (trace_clocks[tr->clock_id].in_ns)
1400                 return true;
1401
1402         return false;
1403 }
1404
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410         memset(parser, 0, sizeof(*parser));
1411
1412         parser->buffer = kmalloc(size, GFP_KERNEL);
1413         if (!parser->buffer)
1414                 return 1;
1415
1416         parser->size = size;
1417         return 0;
1418 }
1419
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425         kfree(parser->buffer);
1426         parser->buffer = NULL;
1427 }
1428
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441         size_t cnt, loff_t *ppos)
1442 {
1443         char ch;
1444         size_t read = 0;
1445         ssize_t ret;
1446
1447         if (!*ppos)
1448                 trace_parser_clear(parser);
1449
1450         ret = get_user(ch, ubuf++);
1451         if (ret)
1452                 goto out;
1453
1454         read++;
1455         cnt--;
1456
1457         /*
1458          * The parser is not finished with the last write,
1459          * continue reading the user input without skipping spaces.
1460          */
1461         if (!parser->cont) {
1462                 /* skip white space */
1463                 while (cnt && isspace(ch)) {
1464                         ret = get_user(ch, ubuf++);
1465                         if (ret)
1466                                 goto out;
1467                         read++;
1468                         cnt--;
1469                 }
1470
1471                 parser->idx = 0;
1472
1473                 /* only spaces were written */
1474                 if (isspace(ch) || !ch) {
1475                         *ppos += read;
1476                         ret = read;
1477                         goto out;
1478                 }
1479         }
1480
1481         /* read the non-space input */
1482         while (cnt && !isspace(ch) && ch) {
1483                 if (parser->idx < parser->size - 1)
1484                         parser->buffer[parser->idx++] = ch;
1485                 else {
1486                         ret = -EINVAL;
1487                         goto out;
1488                 }
1489                 ret = get_user(ch, ubuf++);
1490                 if (ret)
1491                         goto out;
1492                 read++;
1493                 cnt--;
1494         }
1495
1496         /* We either got finished input or we have to wait for another call. */
1497         if (isspace(ch) || !ch) {
1498                 parser->buffer[parser->idx] = 0;
1499                 parser->cont = false;
1500         } else if (parser->idx < parser->size - 1) {
1501                 parser->cont = true;
1502                 parser->buffer[parser->idx++] = ch;
1503                 /* Make sure the parsed string always terminates with '\0'. */
1504                 parser->buffer[parser->idx] = 0;
1505         } else {
1506                 ret = -EINVAL;
1507                 goto out;
1508         }
1509
1510         *ppos += read;
1511         ret = read;
1512
1513 out:
1514         return ret;
1515 }
1516
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520         int len;
1521
1522         if (trace_seq_used(s) <= s->seq.readpos)
1523                 return -EBUSY;
1524
1525         len = trace_seq_used(s) - s->seq.readpos;
1526         if (cnt > len)
1527                 cnt = len;
1528         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529
1530         s->seq.readpos += cnt;
1531         return cnt;
1532 }
1533
1534 unsigned long __read_mostly     tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538         defined(CONFIG_FSNOTIFY)
1539
1540 static struct workqueue_struct *fsnotify_wq;
1541
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544         struct trace_array *tr = container_of(work, struct trace_array,
1545                                               fsnotify_work);
1546         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552         struct trace_array *tr = container_of(iwork, struct trace_array,
1553                                               fsnotify_irqwork);
1554         queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558                                      struct dentry *d_tracer)
1559 {
1560         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563                                               d_tracer, &tr->max_latency,
1564                                               &tracing_max_lat_fops);
1565 }
1566
1567 __init static int latency_fsnotify_init(void)
1568 {
1569         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1571         if (!fsnotify_wq) {
1572                 pr_err("Unable to allocate tr_max_lat_wq\n");
1573                 return -ENOMEM;
1574         }
1575         return 0;
1576 }
1577
1578 late_initcall_sync(latency_fsnotify_init);
1579
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582         if (!fsnotify_wq)
1583                 return;
1584         /*
1585          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586          * possible that we are called from __schedule() or do_idle(), which
1587          * could cause a deadlock.
1588          */
1589         irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597
1598 #define trace_create_maxlat_file(tr, d_tracer)                          \
1599         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1600                           &tr->max_latency, &tracing_max_lat_fops)
1601
1602 #endif
1603
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613         struct array_buffer *trace_buf = &tr->array_buffer;
1614         struct array_buffer *max_buf = &tr->max_buffer;
1615         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617
1618         max_buf->cpu = cpu;
1619         max_buf->time_start = data->preempt_timestamp;
1620
1621         max_data->saved_latency = tr->max_latency;
1622         max_data->critical_start = data->critical_start;
1623         max_data->critical_end = data->critical_end;
1624
1625         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626         max_data->pid = tsk->pid;
1627         /*
1628          * If tsk == current, then use current_uid(), as that does not use
1629          * RCU. The irq tracer can be called out of RCU scope.
1630          */
1631         if (tsk == current)
1632                 max_data->uid = current_uid();
1633         else
1634                 max_data->uid = task_uid(tsk);
1635
1636         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637         max_data->policy = tsk->policy;
1638         max_data->rt_priority = tsk->rt_priority;
1639
1640         /* record this tasks comm */
1641         tracing_record_cmdline(tsk);
1642         latency_fsnotify(tr);
1643 }
1644
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657               void *cond_data)
1658 {
1659         if (tr->stop_count)
1660                 return;
1661
1662         WARN_ON_ONCE(!irqs_disabled());
1663
1664         if (!tr->allocated_snapshot) {
1665                 /* Only the nop tracer should hit this when disabling */
1666                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667                 return;
1668         }
1669
1670         arch_spin_lock(&tr->max_lock);
1671
1672         /* Inherit the recordable setting from array_buffer */
1673         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674                 ring_buffer_record_on(tr->max_buffer.buffer);
1675         else
1676                 ring_buffer_record_off(tr->max_buffer.buffer);
1677
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680                 goto out_unlock;
1681 #endif
1682         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683
1684         __update_max_tr(tr, tsk, cpu);
1685
1686  out_unlock:
1687         arch_spin_unlock(&tr->max_lock);
1688 }
1689
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701         int ret;
1702
1703         if (tr->stop_count)
1704                 return;
1705
1706         WARN_ON_ONCE(!irqs_disabled());
1707         if (!tr->allocated_snapshot) {
1708                 /* Only the nop tracer should hit this when disabling */
1709                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710                 return;
1711         }
1712
1713         arch_spin_lock(&tr->max_lock);
1714
1715         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716
1717         if (ret == -EBUSY) {
1718                 /*
1719                  * We failed to swap the buffer due to a commit taking
1720                  * place on this CPU. We fail to record, but we reset
1721                  * the max trace buffer (no one writes directly to it)
1722                  * and flag that it failed.
1723                  */
1724                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725                         "Failed to swap buffers due to commit in progress\n");
1726         }
1727
1728         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729
1730         __update_max_tr(tr, tsk, cpu);
1731         arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737         /* Iterators are static, they should be filled or empty */
1738         if (trace_buffer_iter(iter, iter->cpu_file))
1739                 return 0;
1740
1741         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742                                 full);
1743 }
1744
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747
1748 struct trace_selftests {
1749         struct list_head                list;
1750         struct tracer                   *type;
1751 };
1752
1753 static LIST_HEAD(postponed_selftests);
1754
1755 static int save_selftest(struct tracer *type)
1756 {
1757         struct trace_selftests *selftest;
1758
1759         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760         if (!selftest)
1761                 return -ENOMEM;
1762
1763         selftest->type = type;
1764         list_add(&selftest->list, &postponed_selftests);
1765         return 0;
1766 }
1767
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770         struct trace_array *tr = &global_trace;
1771         struct tracer *saved_tracer = tr->current_trace;
1772         int ret;
1773
1774         if (!type->selftest || tracing_selftest_disabled)
1775                 return 0;
1776
1777         /*
1778          * If a tracer registers early in boot up (before scheduling is
1779          * initialized and such), then do not run its selftests yet.
1780          * Instead, run it a little later in the boot process.
1781          */
1782         if (!selftests_can_run)
1783                 return save_selftest(type);
1784
1785         /*
1786          * Run a selftest on this tracer.
1787          * Here we reset the trace buffer, and set the current
1788          * tracer to be this tracer. The tracer can then run some
1789          * internal tracing to verify that everything is in order.
1790          * If we fail, we do not register this tracer.
1791          */
1792         tracing_reset_online_cpus(&tr->array_buffer);
1793
1794         tr->current_trace = type;
1795
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797         if (type->use_max_tr) {
1798                 /* If we expanded the buffers, make sure the max is expanded too */
1799                 if (ring_buffer_expanded)
1800                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801                                            RING_BUFFER_ALL_CPUS);
1802                 tr->allocated_snapshot = true;
1803         }
1804 #endif
1805
1806         /* the test is responsible for initializing and enabling */
1807         pr_info("Testing tracer %s: ", type->name);
1808         ret = type->selftest(type, tr);
1809         /* the test is responsible for resetting too */
1810         tr->current_trace = saved_tracer;
1811         if (ret) {
1812                 printk(KERN_CONT "FAILED!\n");
1813                 /* Add the warning after printing 'FAILED' */
1814                 WARN_ON(1);
1815                 return -1;
1816         }
1817         /* Only reset on passing, to avoid touching corrupted buffers */
1818         tracing_reset_online_cpus(&tr->array_buffer);
1819
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821         if (type->use_max_tr) {
1822                 tr->allocated_snapshot = false;
1823
1824                 /* Shrink the max buffer again */
1825                 if (ring_buffer_expanded)
1826                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1827                                            RING_BUFFER_ALL_CPUS);
1828         }
1829 #endif
1830
1831         printk(KERN_CONT "PASSED\n");
1832         return 0;
1833 }
1834
1835 static __init int init_trace_selftests(void)
1836 {
1837         struct trace_selftests *p, *n;
1838         struct tracer *t, **last;
1839         int ret;
1840
1841         selftests_can_run = true;
1842
1843         mutex_lock(&trace_types_lock);
1844
1845         if (list_empty(&postponed_selftests))
1846                 goto out;
1847
1848         pr_info("Running postponed tracer tests:\n");
1849
1850         tracing_selftest_running = true;
1851         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852                 /* This loop can take minutes when sanitizers are enabled, so
1853                  * lets make sure we allow RCU processing.
1854                  */
1855                 cond_resched();
1856                 ret = run_tracer_selftest(p->type);
1857                 /* If the test fails, then warn and remove from available_tracers */
1858                 if (ret < 0) {
1859                         WARN(1, "tracer: %s failed selftest, disabling\n",
1860                              p->type->name);
1861                         last = &trace_types;
1862                         for (t = trace_types; t; t = t->next) {
1863                                 if (t == p->type) {
1864                                         *last = t->next;
1865                                         break;
1866                                 }
1867                                 last = &t->next;
1868                         }
1869                 }
1870                 list_del(&p->list);
1871                 kfree(p);
1872         }
1873         tracing_selftest_running = false;
1874
1875  out:
1876         mutex_unlock(&trace_types_lock);
1877
1878         return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884         return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889
1890 static void __init apply_trace_boot_options(void);
1891
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900         struct tracer *t;
1901         int ret = 0;
1902
1903         if (!type->name) {
1904                 pr_info("Tracer must have a name\n");
1905                 return -1;
1906         }
1907
1908         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910                 return -1;
1911         }
1912
1913         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914                 pr_warn("Can not register tracer %s due to lockdown\n",
1915                            type->name);
1916                 return -EPERM;
1917         }
1918
1919         mutex_lock(&trace_types_lock);
1920
1921         tracing_selftest_running = true;
1922
1923         for (t = trace_types; t; t = t->next) {
1924                 if (strcmp(type->name, t->name) == 0) {
1925                         /* already found */
1926                         pr_info("Tracer %s already registered\n",
1927                                 type->name);
1928                         ret = -1;
1929                         goto out;
1930                 }
1931         }
1932
1933         if (!type->set_flag)
1934                 type->set_flag = &dummy_set_flag;
1935         if (!type->flags) {
1936                 /*allocate a dummy tracer_flags*/
1937                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938                 if (!type->flags) {
1939                         ret = -ENOMEM;
1940                         goto out;
1941                 }
1942                 type->flags->val = 0;
1943                 type->flags->opts = dummy_tracer_opt;
1944         } else
1945                 if (!type->flags->opts)
1946                         type->flags->opts = dummy_tracer_opt;
1947
1948         /* store the tracer for __set_tracer_option */
1949         type->flags->trace = type;
1950
1951         ret = run_tracer_selftest(type);
1952         if (ret < 0)
1953                 goto out;
1954
1955         type->next = trace_types;
1956         trace_types = type;
1957         add_tracer_options(&global_trace, type);
1958
1959  out:
1960         tracing_selftest_running = false;
1961         mutex_unlock(&trace_types_lock);
1962
1963         if (ret || !default_bootup_tracer)
1964                 goto out_unlock;
1965
1966         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967                 goto out_unlock;
1968
1969         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970         /* Do we want this tracer to start on bootup? */
1971         tracing_set_tracer(&global_trace, type->name);
1972         default_bootup_tracer = NULL;
1973
1974         apply_trace_boot_options();
1975
1976         /* disable other selftests, since this will break it. */
1977         tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980                type->name);
1981 #endif
1982
1983  out_unlock:
1984         return ret;
1985 }
1986
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989         struct trace_buffer *buffer = buf->buffer;
1990
1991         if (!buffer)
1992                 return;
1993
1994         ring_buffer_record_disable(buffer);
1995
1996         /* Make sure all commits have finished */
1997         synchronize_rcu();
1998         ring_buffer_reset_cpu(buffer, cpu);
1999
2000         ring_buffer_record_enable(buffer);
2001 }
2002
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005         struct trace_buffer *buffer = buf->buffer;
2006         int cpu;
2007
2008         if (!buffer)
2009                 return;
2010
2011         ring_buffer_record_disable(buffer);
2012
2013         /* Make sure all commits have finished */
2014         synchronize_rcu();
2015
2016         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2017
2018         for_each_online_cpu(cpu)
2019                 ring_buffer_reset_cpu(buffer, cpu);
2020
2021         ring_buffer_record_enable(buffer);
2022 }
2023
2024 /* Must have trace_types_lock held */
2025 void tracing_reset_all_online_cpus(void)
2026 {
2027         struct trace_array *tr;
2028
2029         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2030                 if (!tr->clear_trace)
2031                         continue;
2032                 tr->clear_trace = false;
2033                 tracing_reset_online_cpus(&tr->array_buffer);
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035                 tracing_reset_online_cpus(&tr->max_buffer);
2036 #endif
2037         }
2038 }
2039
2040 static int *tgid_map;
2041
2042 #define SAVED_CMDLINES_DEFAULT 128
2043 #define NO_CMDLINE_MAP UINT_MAX
2044 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2045 struct saved_cmdlines_buffer {
2046         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2047         unsigned *map_cmdline_to_pid;
2048         unsigned cmdline_num;
2049         int cmdline_idx;
2050         char *saved_cmdlines;
2051 };
2052 static struct saved_cmdlines_buffer *savedcmd;
2053
2054 /* temporary disable recording */
2055 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2056
2057 static inline char *get_saved_cmdlines(int idx)
2058 {
2059         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2060 }
2061
2062 static inline void set_cmdline(int idx, const char *cmdline)
2063 {
2064         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2065 }
2066
2067 static int allocate_cmdlines_buffer(unsigned int val,
2068                                     struct saved_cmdlines_buffer *s)
2069 {
2070         s->map_cmdline_to_pid = kmalloc_array(val,
2071                                               sizeof(*s->map_cmdline_to_pid),
2072                                               GFP_KERNEL);
2073         if (!s->map_cmdline_to_pid)
2074                 return -ENOMEM;
2075
2076         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2077         if (!s->saved_cmdlines) {
2078                 kfree(s->map_cmdline_to_pid);
2079                 return -ENOMEM;
2080         }
2081
2082         s->cmdline_idx = 0;
2083         s->cmdline_num = val;
2084         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2085                sizeof(s->map_pid_to_cmdline));
2086         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2087                val * sizeof(*s->map_cmdline_to_pid));
2088
2089         return 0;
2090 }
2091
2092 static int trace_create_savedcmd(void)
2093 {
2094         int ret;
2095
2096         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097         if (!savedcmd)
2098                 return -ENOMEM;
2099
2100         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2101         if (ret < 0) {
2102                 kfree(savedcmd);
2103                 savedcmd = NULL;
2104                 return -ENOMEM;
2105         }
2106
2107         return 0;
2108 }
2109
2110 int is_tracing_stopped(void)
2111 {
2112         return global_trace.stop_count;
2113 }
2114
2115 /**
2116  * tracing_start - quick start of the tracer
2117  *
2118  * If tracing is enabled but was stopped by tracing_stop,
2119  * this will start the tracer back up.
2120  */
2121 void tracing_start(void)
2122 {
2123         struct trace_buffer *buffer;
2124         unsigned long flags;
2125
2126         if (tracing_disabled)
2127                 return;
2128
2129         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2130         if (--global_trace.stop_count) {
2131                 if (global_trace.stop_count < 0) {
2132                         /* Someone screwed up their debugging */
2133                         WARN_ON_ONCE(1);
2134                         global_trace.stop_count = 0;
2135                 }
2136                 goto out;
2137         }
2138
2139         /* Prevent the buffers from switching */
2140         arch_spin_lock(&global_trace.max_lock);
2141
2142         buffer = global_trace.array_buffer.buffer;
2143         if (buffer)
2144                 ring_buffer_record_enable(buffer);
2145
2146 #ifdef CONFIG_TRACER_MAX_TRACE
2147         buffer = global_trace.max_buffer.buffer;
2148         if (buffer)
2149                 ring_buffer_record_enable(buffer);
2150 #endif
2151
2152         arch_spin_unlock(&global_trace.max_lock);
2153
2154  out:
2155         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2156 }
2157
2158 static void tracing_start_tr(struct trace_array *tr)
2159 {
2160         struct trace_buffer *buffer;
2161         unsigned long flags;
2162
2163         if (tracing_disabled)
2164                 return;
2165
2166         /* If global, we need to also start the max tracer */
2167         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2168                 return tracing_start();
2169
2170         raw_spin_lock_irqsave(&tr->start_lock, flags);
2171
2172         if (--tr->stop_count) {
2173                 if (tr->stop_count < 0) {
2174                         /* Someone screwed up their debugging */
2175                         WARN_ON_ONCE(1);
2176                         tr->stop_count = 0;
2177                 }
2178                 goto out;
2179         }
2180
2181         buffer = tr->array_buffer.buffer;
2182         if (buffer)
2183                 ring_buffer_record_enable(buffer);
2184
2185  out:
2186         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 }
2188
2189 /**
2190  * tracing_stop - quick stop of the tracer
2191  *
2192  * Light weight way to stop tracing. Use in conjunction with
2193  * tracing_start.
2194  */
2195 void tracing_stop(void)
2196 {
2197         struct trace_buffer *buffer;
2198         unsigned long flags;
2199
2200         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2201         if (global_trace.stop_count++)
2202                 goto out;
2203
2204         /* Prevent the buffers from switching */
2205         arch_spin_lock(&global_trace.max_lock);
2206
2207         buffer = global_trace.array_buffer.buffer;
2208         if (buffer)
2209                 ring_buffer_record_disable(buffer);
2210
2211 #ifdef CONFIG_TRACER_MAX_TRACE
2212         buffer = global_trace.max_buffer.buffer;
2213         if (buffer)
2214                 ring_buffer_record_disable(buffer);
2215 #endif
2216
2217         arch_spin_unlock(&global_trace.max_lock);
2218
2219  out:
2220         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2221 }
2222
2223 static void tracing_stop_tr(struct trace_array *tr)
2224 {
2225         struct trace_buffer *buffer;
2226         unsigned long flags;
2227
2228         /* If global, we need to also stop the max tracer */
2229         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2230                 return tracing_stop();
2231
2232         raw_spin_lock_irqsave(&tr->start_lock, flags);
2233         if (tr->stop_count++)
2234                 goto out;
2235
2236         buffer = tr->array_buffer.buffer;
2237         if (buffer)
2238                 ring_buffer_record_disable(buffer);
2239
2240  out:
2241         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2242 }
2243
2244 static int trace_save_cmdline(struct task_struct *tsk)
2245 {
2246         unsigned pid, idx;
2247
2248         /* treat recording of idle task as a success */
2249         if (!tsk->pid)
2250                 return 1;
2251
2252         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253                 return 0;
2254
2255         /*
2256          * It's not the end of the world if we don't get
2257          * the lock, but we also don't want to spin
2258          * nor do we want to disable interrupts,
2259          * so if we miss here, then better luck next time.
2260          */
2261         if (!arch_spin_trylock(&trace_cmdline_lock))
2262                 return 0;
2263
2264         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2265         if (idx == NO_CMDLINE_MAP) {
2266                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2267
2268                 /*
2269                  * Check whether the cmdline buffer at idx has a pid
2270                  * mapped. We are going to overwrite that entry so we
2271                  * need to clear the map_pid_to_cmdline. Otherwise we
2272                  * would read the new comm for the old pid.
2273                  */
2274                 pid = savedcmd->map_cmdline_to_pid[idx];
2275                 if (pid != NO_CMDLINE_MAP)
2276                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2277
2278                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2279                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2280
2281                 savedcmd->cmdline_idx = idx;
2282         }
2283
2284         set_cmdline(idx, tsk->comm);
2285
2286         arch_spin_unlock(&trace_cmdline_lock);
2287
2288         return 1;
2289 }
2290
2291 static void __trace_find_cmdline(int pid, char comm[])
2292 {
2293         unsigned map;
2294
2295         if (!pid) {
2296                 strcpy(comm, "<idle>");
2297                 return;
2298         }
2299
2300         if (WARN_ON_ONCE(pid < 0)) {
2301                 strcpy(comm, "<XXX>");
2302                 return;
2303         }
2304
2305         if (pid > PID_MAX_DEFAULT) {
2306                 strcpy(comm, "<...>");
2307                 return;
2308         }
2309
2310         map = savedcmd->map_pid_to_cmdline[pid];
2311         if (map != NO_CMDLINE_MAP)
2312                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2313         else
2314                 strcpy(comm, "<...>");
2315 }
2316
2317 void trace_find_cmdline(int pid, char comm[])
2318 {
2319         preempt_disable();
2320         arch_spin_lock(&trace_cmdline_lock);
2321
2322         __trace_find_cmdline(pid, comm);
2323
2324         arch_spin_unlock(&trace_cmdline_lock);
2325         preempt_enable();
2326 }
2327
2328 int trace_find_tgid(int pid)
2329 {
2330         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2331                 return 0;
2332
2333         return tgid_map[pid];
2334 }
2335
2336 static int trace_save_tgid(struct task_struct *tsk)
2337 {
2338         /* treat recording of idle task as a success */
2339         if (!tsk->pid)
2340                 return 1;
2341
2342         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2343                 return 0;
2344
2345         tgid_map[tsk->pid] = tsk->tgid;
2346         return 1;
2347 }
2348
2349 static bool tracing_record_taskinfo_skip(int flags)
2350 {
2351         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2352                 return true;
2353         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2354                 return true;
2355         if (!__this_cpu_read(trace_taskinfo_save))
2356                 return true;
2357         return false;
2358 }
2359
2360 /**
2361  * tracing_record_taskinfo - record the task info of a task
2362  *
2363  * @task:  task to record
2364  * @flags: TRACE_RECORD_CMDLINE for recording comm
2365  *         TRACE_RECORD_TGID for recording tgid
2366  */
2367 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 {
2369         bool done;
2370
2371         if (tracing_record_taskinfo_skip(flags))
2372                 return;
2373
2374         /*
2375          * Record as much task information as possible. If some fail, continue
2376          * to try to record the others.
2377          */
2378         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2379         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2380
2381         /* If recording any information failed, retry again soon. */
2382         if (!done)
2383                 return;
2384
2385         __this_cpu_write(trace_taskinfo_save, false);
2386 }
2387
2388 /**
2389  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2390  *
2391  * @prev: previous task during sched_switch
2392  * @next: next task during sched_switch
2393  * @flags: TRACE_RECORD_CMDLINE for recording comm
2394  *         TRACE_RECORD_TGID for recording tgid
2395  */
2396 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2397                                           struct task_struct *next, int flags)
2398 {
2399         bool done;
2400
2401         if (tracing_record_taskinfo_skip(flags))
2402                 return;
2403
2404         /*
2405          * Record as much task information as possible. If some fail, continue
2406          * to try to record the others.
2407          */
2408         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2409         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2410         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2411         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2412
2413         /* If recording any information failed, retry again soon. */
2414         if (!done)
2415                 return;
2416
2417         __this_cpu_write(trace_taskinfo_save, false);
2418 }
2419
2420 /* Helpers to record a specific task information */
2421 void tracing_record_cmdline(struct task_struct *task)
2422 {
2423         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2424 }
2425
2426 void tracing_record_tgid(struct task_struct *task)
2427 {
2428         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 }
2430
2431 /*
2432  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2433  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2434  * simplifies those functions and keeps them in sync.
2435  */
2436 enum print_line_t trace_handle_return(struct trace_seq *s)
2437 {
2438         return trace_seq_has_overflowed(s) ?
2439                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2440 }
2441 EXPORT_SYMBOL_GPL(trace_handle_return);
2442
2443 void
2444 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2445                              unsigned long flags, int pc)
2446 {
2447         struct task_struct *tsk = current;
2448
2449         entry->preempt_count            = pc & 0xff;
2450         entry->pid                      = (tsk) ? tsk->pid : 0;
2451         entry->type                     = type;
2452         entry->flags =
2453 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2454                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2455 #else
2456                 TRACE_FLAG_IRQS_NOSUPPORT |
2457 #endif
2458                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2459                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2460                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2461                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2462                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2463 }
2464 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2465
2466 struct ring_buffer_event *
2467 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2468                           int type,
2469                           unsigned long len,
2470                           unsigned long flags, int pc)
2471 {
2472         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2473 }
2474
2475 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2476 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2477 static int trace_buffered_event_ref;
2478
2479 /**
2480  * trace_buffered_event_enable - enable buffering events
2481  *
2482  * When events are being filtered, it is quicker to use a temporary
2483  * buffer to write the event data into if there's a likely chance
2484  * that it will not be committed. The discard of the ring buffer
2485  * is not as fast as committing, and is much slower than copying
2486  * a commit.
2487  *
2488  * When an event is to be filtered, allocate per cpu buffers to
2489  * write the event data into, and if the event is filtered and discarded
2490  * it is simply dropped, otherwise, the entire data is to be committed
2491  * in one shot.
2492  */
2493 void trace_buffered_event_enable(void)
2494 {
2495         struct ring_buffer_event *event;
2496         struct page *page;
2497         int cpu;
2498
2499         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500
2501         if (trace_buffered_event_ref++)
2502                 return;
2503
2504         for_each_tracing_cpu(cpu) {
2505                 page = alloc_pages_node(cpu_to_node(cpu),
2506                                         GFP_KERNEL | __GFP_NORETRY, 0);
2507                 if (!page)
2508                         goto failed;
2509
2510                 event = page_address(page);
2511                 memset(event, 0, sizeof(*event));
2512
2513                 per_cpu(trace_buffered_event, cpu) = event;
2514
2515                 preempt_disable();
2516                 if (cpu == smp_processor_id() &&
2517                     this_cpu_read(trace_buffered_event) !=
2518                     per_cpu(trace_buffered_event, cpu))
2519                         WARN_ON_ONCE(1);
2520                 preempt_enable();
2521         }
2522
2523         return;
2524  failed:
2525         trace_buffered_event_disable();
2526 }
2527
2528 static void enable_trace_buffered_event(void *data)
2529 {
2530         /* Probably not needed, but do it anyway */
2531         smp_rmb();
2532         this_cpu_dec(trace_buffered_event_cnt);
2533 }
2534
2535 static void disable_trace_buffered_event(void *data)
2536 {
2537         this_cpu_inc(trace_buffered_event_cnt);
2538 }
2539
2540 /**
2541  * trace_buffered_event_disable - disable buffering events
2542  *
2543  * When a filter is removed, it is faster to not use the buffered
2544  * events, and to commit directly into the ring buffer. Free up
2545  * the temp buffers when there are no more users. This requires
2546  * special synchronization with current events.
2547  */
2548 void trace_buffered_event_disable(void)
2549 {
2550         int cpu;
2551
2552         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2553
2554         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2555                 return;
2556
2557         if (--trace_buffered_event_ref)
2558                 return;
2559
2560         preempt_disable();
2561         /* For each CPU, set the buffer as used. */
2562         smp_call_function_many(tracing_buffer_mask,
2563                                disable_trace_buffered_event, NULL, 1);
2564         preempt_enable();
2565
2566         /* Wait for all current users to finish */
2567         synchronize_rcu();
2568
2569         for_each_tracing_cpu(cpu) {
2570                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2571                 per_cpu(trace_buffered_event, cpu) = NULL;
2572         }
2573         /*
2574          * Make sure trace_buffered_event is NULL before clearing
2575          * trace_buffered_event_cnt.
2576          */
2577         smp_wmb();
2578
2579         preempt_disable();
2580         /* Do the work on each cpu */
2581         smp_call_function_many(tracing_buffer_mask,
2582                                enable_trace_buffered_event, NULL, 1);
2583         preempt_enable();
2584 }
2585
2586 static struct trace_buffer *temp_buffer;
2587
2588 struct ring_buffer_event *
2589 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2590                           struct trace_event_file *trace_file,
2591                           int type, unsigned long len,
2592                           unsigned long flags, int pc)
2593 {
2594         struct ring_buffer_event *entry;
2595         int val;
2596
2597         *current_rb = trace_file->tr->array_buffer.buffer;
2598
2599         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2600              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2601             (entry = this_cpu_read(trace_buffered_event))) {
2602                 /* Try to use the per cpu buffer first */
2603                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2604                 if (val == 1) {
2605                         trace_event_setup(entry, type, flags, pc);
2606                         entry->array[0] = len;
2607                         return entry;
2608                 }
2609                 this_cpu_dec(trace_buffered_event_cnt);
2610         }
2611
2612         entry = __trace_buffer_lock_reserve(*current_rb,
2613                                             type, len, flags, pc);
2614         /*
2615          * If tracing is off, but we have triggers enabled
2616          * we still need to look at the event data. Use the temp_buffer
2617          * to store the trace event for the tigger to use. It's recusive
2618          * safe and will not be recorded anywhere.
2619          */
2620         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2621                 *current_rb = temp_buffer;
2622                 entry = __trace_buffer_lock_reserve(*current_rb,
2623                                                     type, len, flags, pc);
2624         }
2625         return entry;
2626 }
2627 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2628
2629 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2630 static DEFINE_MUTEX(tracepoint_printk_mutex);
2631
2632 static void output_printk(struct trace_event_buffer *fbuffer)
2633 {
2634         struct trace_event_call *event_call;
2635         struct trace_event_file *file;
2636         struct trace_event *event;
2637         unsigned long flags;
2638         struct trace_iterator *iter = tracepoint_print_iter;
2639
2640         /* We should never get here if iter is NULL */
2641         if (WARN_ON_ONCE(!iter))
2642                 return;
2643
2644         event_call = fbuffer->trace_file->event_call;
2645         if (!event_call || !event_call->event.funcs ||
2646             !event_call->event.funcs->trace)
2647                 return;
2648
2649         file = fbuffer->trace_file;
2650         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2651             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2652              !filter_match_preds(file->filter, fbuffer->entry)))
2653                 return;
2654
2655         event = &fbuffer->trace_file->event_call->event;
2656
2657         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2658         trace_seq_init(&iter->seq);
2659         iter->ent = fbuffer->entry;
2660         event_call->event.funcs->trace(iter, 0, event);
2661         trace_seq_putc(&iter->seq, 0);
2662         printk("%s", iter->seq.buffer);
2663
2664         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2665 }
2666
2667 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2668                              void *buffer, size_t *lenp,
2669                              loff_t *ppos)
2670 {
2671         int save_tracepoint_printk;
2672         int ret;
2673
2674         mutex_lock(&tracepoint_printk_mutex);
2675         save_tracepoint_printk = tracepoint_printk;
2676
2677         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2678
2679         /*
2680          * This will force exiting early, as tracepoint_printk
2681          * is always zero when tracepoint_printk_iter is not allocated
2682          */
2683         if (!tracepoint_print_iter)
2684                 tracepoint_printk = 0;
2685
2686         if (save_tracepoint_printk == tracepoint_printk)
2687                 goto out;
2688
2689         if (tracepoint_printk)
2690                 static_key_enable(&tracepoint_printk_key.key);
2691         else
2692                 static_key_disable(&tracepoint_printk_key.key);
2693
2694  out:
2695         mutex_unlock(&tracepoint_printk_mutex);
2696
2697         return ret;
2698 }
2699
2700 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2701 {
2702         if (static_key_false(&tracepoint_printk_key.key))
2703                 output_printk(fbuffer);
2704
2705         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2706                                     fbuffer->event, fbuffer->entry,
2707                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2708 }
2709 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2710
2711 /*
2712  * Skip 3:
2713  *
2714  *   trace_buffer_unlock_commit_regs()
2715  *   trace_event_buffer_commit()
2716  *   trace_event_raw_event_xxx()
2717  */
2718 # define STACK_SKIP 3
2719
2720 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2721                                      struct trace_buffer *buffer,
2722                                      struct ring_buffer_event *event,
2723                                      unsigned long flags, int pc,
2724                                      struct pt_regs *regs)
2725 {
2726         __buffer_unlock_commit(buffer, event);
2727
2728         /*
2729          * If regs is not set, then skip the necessary functions.
2730          * Note, we can still get here via blktrace, wakeup tracer
2731          * and mmiotrace, but that's ok if they lose a function or
2732          * two. They are not that meaningful.
2733          */
2734         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2735         ftrace_trace_userstack(buffer, flags, pc);
2736 }
2737
2738 /*
2739  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2740  */
2741 void
2742 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2743                                    struct ring_buffer_event *event)
2744 {
2745         __buffer_unlock_commit(buffer, event);
2746 }
2747
2748 static void
2749 trace_process_export(struct trace_export *export,
2750                struct ring_buffer_event *event)
2751 {
2752         struct trace_entry *entry;
2753         unsigned int size = 0;
2754
2755         entry = ring_buffer_event_data(event);
2756         size = ring_buffer_event_length(event);
2757         export->write(export, entry, size);
2758 }
2759
2760 static DEFINE_MUTEX(ftrace_export_lock);
2761
2762 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2763
2764 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2765
2766 static inline void ftrace_exports_enable(void)
2767 {
2768         static_branch_enable(&ftrace_exports_enabled);
2769 }
2770
2771 static inline void ftrace_exports_disable(void)
2772 {
2773         static_branch_disable(&ftrace_exports_enabled);
2774 }
2775
2776 static void ftrace_exports(struct ring_buffer_event *event)
2777 {
2778         struct trace_export *export;
2779
2780         preempt_disable_notrace();
2781
2782         export = rcu_dereference_raw_check(ftrace_exports_list);
2783         while (export) {
2784                 trace_process_export(export, event);
2785                 export = rcu_dereference_raw_check(export->next);
2786         }
2787
2788         preempt_enable_notrace();
2789 }
2790
2791 static inline void
2792 add_trace_export(struct trace_export **list, struct trace_export *export)
2793 {
2794         rcu_assign_pointer(export->next, *list);
2795         /*
2796          * We are entering export into the list but another
2797          * CPU might be walking that list. We need to make sure
2798          * the export->next pointer is valid before another CPU sees
2799          * the export pointer included into the list.
2800          */
2801         rcu_assign_pointer(*list, export);
2802 }
2803
2804 static inline int
2805 rm_trace_export(struct trace_export **list, struct trace_export *export)
2806 {
2807         struct trace_export **p;
2808
2809         for (p = list; *p != NULL; p = &(*p)->next)
2810                 if (*p == export)
2811                         break;
2812
2813         if (*p != export)
2814                 return -1;
2815
2816         rcu_assign_pointer(*p, (*p)->next);
2817
2818         return 0;
2819 }
2820
2821 static inline void
2822 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2823 {
2824         if (*list == NULL)
2825                 ftrace_exports_enable();
2826
2827         add_trace_export(list, export);
2828 }
2829
2830 static inline int
2831 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 {
2833         int ret;
2834
2835         ret = rm_trace_export(list, export);
2836         if (*list == NULL)
2837                 ftrace_exports_disable();
2838
2839         return ret;
2840 }
2841
2842 int register_ftrace_export(struct trace_export *export)
2843 {
2844         if (WARN_ON_ONCE(!export->write))
2845                 return -1;
2846
2847         mutex_lock(&ftrace_export_lock);
2848
2849         add_ftrace_export(&ftrace_exports_list, export);
2850
2851         mutex_unlock(&ftrace_export_lock);
2852
2853         return 0;
2854 }
2855 EXPORT_SYMBOL_GPL(register_ftrace_export);
2856
2857 int unregister_ftrace_export(struct trace_export *export)
2858 {
2859         int ret;
2860
2861         mutex_lock(&ftrace_export_lock);
2862
2863         ret = rm_ftrace_export(&ftrace_exports_list, export);
2864
2865         mutex_unlock(&ftrace_export_lock);
2866
2867         return ret;
2868 }
2869 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2870
2871 void
2872 trace_function(struct trace_array *tr,
2873                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2874                int pc)
2875 {
2876         struct trace_event_call *call = &event_function;
2877         struct trace_buffer *buffer = tr->array_buffer.buffer;
2878         struct ring_buffer_event *event;
2879         struct ftrace_entry *entry;
2880
2881         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882                                             flags, pc);
2883         if (!event)
2884                 return;
2885         entry   = ring_buffer_event_data(event);
2886         entry->ip                       = ip;
2887         entry->parent_ip                = parent_ip;
2888
2889         if (!call_filter_check_discard(call, entry, buffer, event)) {
2890                 if (static_branch_unlikely(&ftrace_exports_enabled))
2891                         ftrace_exports(event);
2892                 __buffer_unlock_commit(buffer, event);
2893         }
2894 }
2895
2896 #ifdef CONFIG_STACKTRACE
2897
2898 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2899 #define FTRACE_KSTACK_NESTING   4
2900
2901 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2902
2903 struct ftrace_stack {
2904         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2905 };
2906
2907
2908 struct ftrace_stacks {
2909         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2910 };
2911
2912 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2913 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2914
2915 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2916                                  unsigned long flags,
2917                                  int skip, int pc, struct pt_regs *regs)
2918 {
2919         struct trace_event_call *call = &event_kernel_stack;
2920         struct ring_buffer_event *event;
2921         unsigned int size, nr_entries;
2922         struct ftrace_stack *fstack;
2923         struct stack_entry *entry;
2924         int stackidx;
2925
2926         /*
2927          * Add one, for this function and the call to save_stack_trace()
2928          * If regs is set, then these functions will not be in the way.
2929          */
2930 #ifndef CONFIG_UNWINDER_ORC
2931         if (!regs)
2932                 skip++;
2933 #endif
2934
2935         /*
2936          * Since events can happen in NMIs there's no safe way to
2937          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2938          * or NMI comes in, it will just have to use the default
2939          * FTRACE_STACK_SIZE.
2940          */
2941         preempt_disable_notrace();
2942
2943         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944
2945         /* This should never happen. If it does, yell once and skip */
2946         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2947                 goto out;
2948
2949         /*
2950          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951          * interrupt will either see the value pre increment or post
2952          * increment. If the interrupt happens pre increment it will have
2953          * restored the counter when it returns.  We just need a barrier to
2954          * keep gcc from moving things around.
2955          */
2956         barrier();
2957
2958         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959         size = ARRAY_SIZE(fstack->calls);
2960
2961         if (regs) {
2962                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963                                                    size, skip);
2964         } else {
2965                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2966         }
2967
2968         size = nr_entries * sizeof(unsigned long);
2969         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2970                                             sizeof(*entry) + size, flags, pc);
2971         if (!event)
2972                 goto out;
2973         entry = ring_buffer_event_data(event);
2974
2975         memcpy(&entry->caller, fstack->calls, size);
2976         entry->size = nr_entries;
2977
2978         if (!call_filter_check_discard(call, entry, buffer, event))
2979                 __buffer_unlock_commit(buffer, event);
2980
2981  out:
2982         /* Again, don't let gcc optimize things here */
2983         barrier();
2984         __this_cpu_dec(ftrace_stack_reserve);
2985         preempt_enable_notrace();
2986
2987 }
2988
2989 static inline void ftrace_trace_stack(struct trace_array *tr,
2990                                       struct trace_buffer *buffer,
2991                                       unsigned long flags,
2992                                       int skip, int pc, struct pt_regs *regs)
2993 {
2994         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2995                 return;
2996
2997         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2998 }
2999
3000 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3001                    int pc)
3002 {
3003         struct trace_buffer *buffer = tr->array_buffer.buffer;
3004
3005         if (rcu_is_watching()) {
3006                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3007                 return;
3008         }
3009
3010         /*
3011          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3012          * but if the above rcu_is_watching() failed, then the NMI
3013          * triggered someplace critical, and rcu_irq_enter() should
3014          * not be called from NMI.
3015          */
3016         if (unlikely(in_nmi()))
3017                 return;
3018
3019         rcu_irq_enter_irqson();
3020         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3021         rcu_irq_exit_irqson();
3022 }
3023
3024 /**
3025  * trace_dump_stack - record a stack back trace in the trace buffer
3026  * @skip: Number of functions to skip (helper handlers)
3027  */
3028 void trace_dump_stack(int skip)
3029 {
3030         unsigned long flags;
3031
3032         if (tracing_disabled || tracing_selftest_running)
3033                 return;
3034
3035         local_save_flags(flags);
3036
3037 #ifndef CONFIG_UNWINDER_ORC
3038         /* Skip 1 to skip this function. */
3039         skip++;
3040 #endif
3041         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3042                              flags, skip, preempt_count(), NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048
3049 static void
3050 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3051 {
3052         struct trace_event_call *call = &event_user_stack;
3053         struct ring_buffer_event *event;
3054         struct userstack_entry *entry;
3055
3056         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3057                 return;
3058
3059         /*
3060          * NMIs can not handle page faults, even with fix ups.
3061          * The save user stack can (and often does) fault.
3062          */
3063         if (unlikely(in_nmi()))
3064                 return;
3065
3066         /*
3067          * prevent recursion, since the user stack tracing may
3068          * trigger other kernel events.
3069          */
3070         preempt_disable();
3071         if (__this_cpu_read(user_stack_count))
3072                 goto out;
3073
3074         __this_cpu_inc(user_stack_count);
3075
3076         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077                                             sizeof(*entry), flags, pc);
3078         if (!event)
3079                 goto out_drop_count;
3080         entry   = ring_buffer_event_data(event);
3081
3082         entry->tgid             = current->tgid;
3083         memset(&entry->caller, 0, sizeof(entry->caller));
3084
3085         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086         if (!call_filter_check_discard(call, entry, buffer, event))
3087                 __buffer_unlock_commit(buffer, event);
3088
3089  out_drop_count:
3090         __this_cpu_dec(user_stack_count);
3091  out:
3092         preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3096                                    unsigned long flags, int pc)
3097 {
3098 }
3099 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3100
3101 #endif /* CONFIG_STACKTRACE */
3102
3103 /* created for use with alloc_percpu */
3104 struct trace_buffer_struct {
3105         int nesting;
3106         char buffer[4][TRACE_BUF_SIZE];
3107 };
3108
3109 static struct trace_buffer_struct *trace_percpu_buffer;
3110
3111 /*
3112  * Thise allows for lockless recording.  If we're nested too deeply, then
3113  * this returns NULL.
3114  */
3115 static char *get_trace_buf(void)
3116 {
3117         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3118
3119         if (!buffer || buffer->nesting >= 4)
3120                 return NULL;
3121
3122         buffer->nesting++;
3123
3124         /* Interrupts must see nesting incremented before we use the buffer */
3125         barrier();
3126         return &buffer->buffer[buffer->nesting][0];
3127 }
3128
3129 static void put_trace_buf(void)
3130 {
3131         /* Don't let the decrement of nesting leak before this */
3132         barrier();
3133         this_cpu_dec(trace_percpu_buffer->nesting);
3134 }
3135
3136 static int alloc_percpu_trace_buffer(void)
3137 {
3138         struct trace_buffer_struct *buffers;
3139
3140         buffers = alloc_percpu(struct trace_buffer_struct);
3141         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3142                 return -ENOMEM;
3143
3144         trace_percpu_buffer = buffers;
3145         return 0;
3146 }
3147
3148 static int buffers_allocated;
3149
3150 void trace_printk_init_buffers(void)
3151 {
3152         if (buffers_allocated)
3153                 return;
3154
3155         if (alloc_percpu_trace_buffer())
3156                 return;
3157
3158         /* trace_printk() is for debug use only. Don't use it in production. */
3159
3160         pr_warn("\n");
3161         pr_warn("**********************************************************\n");
3162         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163         pr_warn("**                                                      **\n");
3164         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3165         pr_warn("**                                                      **\n");
3166         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3167         pr_warn("** unsafe for production use.                           **\n");
3168         pr_warn("**                                                      **\n");
3169         pr_warn("** If you see this message and you are not debugging    **\n");
3170         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3171         pr_warn("**                                                      **\n");
3172         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3173         pr_warn("**********************************************************\n");
3174
3175         /* Expand the buffers to set size */
3176         tracing_update_buffers();
3177
3178         buffers_allocated = 1;
3179
3180         /*
3181          * trace_printk_init_buffers() can be called by modules.
3182          * If that happens, then we need to start cmdline recording
3183          * directly here. If the global_trace.buffer is already
3184          * allocated here, then this was called by module code.
3185          */
3186         if (global_trace.array_buffer.buffer)
3187                 tracing_start_cmdline_record();
3188 }
3189 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3190
3191 void trace_printk_start_comm(void)
3192 {
3193         /* Start tracing comms if trace printk is set */
3194         if (!buffers_allocated)
3195                 return;
3196         tracing_start_cmdline_record();
3197 }
3198
3199 static void trace_printk_start_stop_comm(int enabled)
3200 {
3201         if (!buffers_allocated)
3202                 return;
3203
3204         if (enabled)
3205                 tracing_start_cmdline_record();
3206         else
3207                 tracing_stop_cmdline_record();
3208 }
3209
3210 /**
3211  * trace_vbprintk - write binary msg to tracing buffer
3212  * @ip:    The address of the caller
3213  * @fmt:   The string format to write to the buffer
3214  * @args:  Arguments for @fmt
3215  */
3216 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3217 {
3218         struct trace_event_call *call = &event_bprint;
3219         struct ring_buffer_event *event;
3220         struct trace_buffer *buffer;
3221         struct trace_array *tr = &global_trace;
3222         struct bprint_entry *entry;
3223         unsigned long flags;
3224         char *tbuffer;
3225         int len = 0, size, pc;
3226
3227         if (unlikely(tracing_selftest_running || tracing_disabled))
3228                 return 0;
3229
3230         /* Don't pollute graph traces with trace_vprintk internals */
3231         pause_graph_tracing();
3232
3233         pc = preempt_count();
3234         preempt_disable_notrace();
3235
3236         tbuffer = get_trace_buf();
3237         if (!tbuffer) {
3238                 len = 0;
3239                 goto out_nobuffer;
3240         }
3241
3242         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3243
3244         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3245                 goto out_put;
3246
3247         local_save_flags(flags);
3248         size = sizeof(*entry) + sizeof(u32) * len;
3249         buffer = tr->array_buffer.buffer;
3250         ring_buffer_nest_start(buffer);
3251         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3252                                             flags, pc);
3253         if (!event)
3254                 goto out;
3255         entry = ring_buffer_event_data(event);
3256         entry->ip                       = ip;
3257         entry->fmt                      = fmt;
3258
3259         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3260         if (!call_filter_check_discard(call, entry, buffer, event)) {
3261                 __buffer_unlock_commit(buffer, event);
3262                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3263         }
3264
3265 out:
3266         ring_buffer_nest_end(buffer);
3267 out_put:
3268         put_trace_buf();
3269
3270 out_nobuffer:
3271         preempt_enable_notrace();
3272         unpause_graph_tracing();
3273
3274         return len;
3275 }
3276 EXPORT_SYMBOL_GPL(trace_vbprintk);
3277
3278 __printf(3, 0)
3279 static int
3280 __trace_array_vprintk(struct trace_buffer *buffer,
3281                       unsigned long ip, const char *fmt, va_list args)
3282 {
3283         struct trace_event_call *call = &event_print;
3284         struct ring_buffer_event *event;
3285         int len = 0, size, pc;
3286         struct print_entry *entry;
3287         unsigned long flags;
3288         char *tbuffer;
3289
3290         if (tracing_disabled || tracing_selftest_running)
3291                 return 0;
3292
3293         /* Don't pollute graph traces with trace_vprintk internals */
3294         pause_graph_tracing();
3295
3296         pc = preempt_count();
3297         preempt_disable_notrace();
3298
3299
3300         tbuffer = get_trace_buf();
3301         if (!tbuffer) {
3302                 len = 0;
3303                 goto out_nobuffer;
3304         }
3305
3306         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3307
3308         local_save_flags(flags);
3309         size = sizeof(*entry) + len + 1;
3310         ring_buffer_nest_start(buffer);
3311         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3312                                             flags, pc);
3313         if (!event)
3314                 goto out;
3315         entry = ring_buffer_event_data(event);
3316         entry->ip = ip;
3317
3318         memcpy(&entry->buf, tbuffer, len + 1);
3319         if (!call_filter_check_discard(call, entry, buffer, event)) {
3320                 __buffer_unlock_commit(buffer, event);
3321                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3322         }
3323
3324 out:
3325         ring_buffer_nest_end(buffer);
3326         put_trace_buf();
3327
3328 out_nobuffer:
3329         preempt_enable_notrace();
3330         unpause_graph_tracing();
3331
3332         return len;
3333 }
3334
3335 __printf(3, 0)
3336 int trace_array_vprintk(struct trace_array *tr,
3337                         unsigned long ip, const char *fmt, va_list args)
3338 {
3339         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3340 }
3341
3342 __printf(3, 0)
3343 int trace_array_printk(struct trace_array *tr,
3344                        unsigned long ip, const char *fmt, ...)
3345 {
3346         int ret;
3347         va_list ap;
3348
3349         if (!tr)
3350                 return -ENOENT;
3351
3352         /* This is only allowed for created instances */
3353         if (tr == &global_trace)
3354                 return 0;
3355
3356         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3357                 return 0;
3358
3359         va_start(ap, fmt);
3360         ret = trace_array_vprintk(tr, ip, fmt, ap);
3361         va_end(ap);
3362         return ret;
3363 }
3364 EXPORT_SYMBOL_GPL(trace_array_printk);
3365
3366 __printf(3, 4)
3367 int trace_array_printk_buf(struct trace_buffer *buffer,
3368                            unsigned long ip, const char *fmt, ...)
3369 {
3370         int ret;
3371         va_list ap;
3372
3373         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3374                 return 0;
3375
3376         va_start(ap, fmt);
3377         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3378         va_end(ap);
3379         return ret;
3380 }
3381
3382 __printf(2, 0)
3383 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3384 {
3385         return trace_array_vprintk(&global_trace, ip, fmt, args);
3386 }
3387 EXPORT_SYMBOL_GPL(trace_vprintk);
3388
3389 static void trace_iterator_increment(struct trace_iterator *iter)
3390 {
3391         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3392
3393         iter->idx++;
3394         if (buf_iter)
3395                 ring_buffer_iter_advance(buf_iter);
3396 }
3397
3398 static struct trace_entry *
3399 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3400                 unsigned long *lost_events)
3401 {
3402         struct ring_buffer_event *event;
3403         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3404
3405         if (buf_iter) {
3406                 event = ring_buffer_iter_peek(buf_iter, ts);
3407                 if (lost_events)
3408                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3409                                 (unsigned long)-1 : 0;
3410         } else {
3411                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3412                                          lost_events);
3413         }
3414
3415         if (event) {
3416                 iter->ent_size = ring_buffer_event_length(event);
3417                 return ring_buffer_event_data(event);
3418         }
3419         iter->ent_size = 0;
3420         return NULL;
3421 }
3422
3423 static struct trace_entry *
3424 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3425                   unsigned long *missing_events, u64 *ent_ts)
3426 {
3427         struct trace_buffer *buffer = iter->array_buffer->buffer;
3428         struct trace_entry *ent, *next = NULL;
3429         unsigned long lost_events = 0, next_lost = 0;
3430         int cpu_file = iter->cpu_file;
3431         u64 next_ts = 0, ts;
3432         int next_cpu = -1;
3433         int next_size = 0;
3434         int cpu;
3435
3436         /*
3437          * If we are in a per_cpu trace file, don't bother by iterating over
3438          * all cpu and peek directly.
3439          */
3440         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3441                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3442                         return NULL;
3443                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3444                 if (ent_cpu)
3445                         *ent_cpu = cpu_file;
3446
3447                 return ent;
3448         }
3449
3450         for_each_tracing_cpu(cpu) {
3451
3452                 if (ring_buffer_empty_cpu(buffer, cpu))
3453                         continue;
3454
3455                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3456
3457                 /*
3458                  * Pick the entry with the smallest timestamp:
3459                  */
3460                 if (ent && (!next || ts < next_ts)) {
3461                         next = ent;
3462                         next_cpu = cpu;
3463                         next_ts = ts;
3464                         next_lost = lost_events;
3465                         next_size = iter->ent_size;
3466                 }
3467         }
3468
3469         iter->ent_size = next_size;
3470
3471         if (ent_cpu)
3472                 *ent_cpu = next_cpu;
3473
3474         if (ent_ts)
3475                 *ent_ts = next_ts;
3476
3477         if (missing_events)
3478                 *missing_events = next_lost;
3479
3480         return next;
3481 }
3482
3483 #define STATIC_TEMP_BUF_SIZE    128
3484 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3485
3486 /* Find the next real entry, without updating the iterator itself */
3487 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3488                                           int *ent_cpu, u64 *ent_ts)
3489 {
3490         /* __find_next_entry will reset ent_size */
3491         int ent_size = iter->ent_size;
3492         struct trace_entry *entry;
3493
3494         /*
3495          * If called from ftrace_dump(), then the iter->temp buffer
3496          * will be the static_temp_buf and not created from kmalloc.
3497          * If the entry size is greater than the buffer, we can
3498          * not save it. Just return NULL in that case. This is only
3499          * used to add markers when two consecutive events' time
3500          * stamps have a large delta. See trace_print_lat_context()
3501          */
3502         if (iter->temp == static_temp_buf &&
3503             STATIC_TEMP_BUF_SIZE < ent_size)
3504                 return NULL;
3505
3506         /*
3507          * The __find_next_entry() may call peek_next_entry(), which may
3508          * call ring_buffer_peek() that may make the contents of iter->ent
3509          * undefined. Need to copy iter->ent now.
3510          */
3511         if (iter->ent && iter->ent != iter->temp) {
3512                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3513                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3514                         kfree(iter->temp);
3515                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3516                         if (!iter->temp)
3517                                 return NULL;
3518                 }
3519                 memcpy(iter->temp, iter->ent, iter->ent_size);
3520                 iter->temp_size = iter->ent_size;
3521                 iter->ent = iter->temp;
3522         }
3523         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3524         /* Put back the original ent_size */
3525         iter->ent_size = ent_size;
3526
3527         return entry;
3528 }
3529
3530 /* Find the next real entry, and increment the iterator to the next entry */
3531 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3532 {
3533         iter->ent = __find_next_entry(iter, &iter->cpu,
3534                                       &iter->lost_events, &iter->ts);
3535
3536         if (iter->ent)
3537                 trace_iterator_increment(iter);
3538
3539         return iter->ent ? iter : NULL;
3540 }
3541
3542 static void trace_consume(struct trace_iterator *iter)
3543 {
3544         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3545                             &iter->lost_events);
3546 }
3547
3548 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3549 {
3550         struct trace_iterator *iter = m->private;
3551         int i = (int)*pos;
3552         void *ent;
3553
3554         WARN_ON_ONCE(iter->leftover);
3555
3556         (*pos)++;
3557
3558         /* can't go backwards */
3559         if (iter->idx > i)
3560                 return NULL;
3561
3562         if (iter->idx < 0)
3563                 ent = trace_find_next_entry_inc(iter);
3564         else
3565                 ent = iter;
3566
3567         while (ent && iter->idx < i)
3568                 ent = trace_find_next_entry_inc(iter);
3569
3570         iter->pos = *pos;
3571
3572         return ent;
3573 }
3574
3575 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3576 {
3577         struct ring_buffer_iter *buf_iter;
3578         unsigned long entries = 0;
3579         u64 ts;
3580
3581         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3582
3583         buf_iter = trace_buffer_iter(iter, cpu);
3584         if (!buf_iter)
3585                 return;
3586
3587         ring_buffer_iter_reset(buf_iter);
3588
3589         /*
3590          * We could have the case with the max latency tracers
3591          * that a reset never took place on a cpu. This is evident
3592          * by the timestamp being before the start of the buffer.
3593          */
3594         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3595                 if (ts >= iter->array_buffer->time_start)
3596                         break;
3597                 entries++;
3598                 ring_buffer_iter_advance(buf_iter);
3599         }
3600
3601         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3602 }
3603
3604 /*
3605  * The current tracer is copied to avoid a global locking
3606  * all around.
3607  */
3608 static void *s_start(struct seq_file *m, loff_t *pos)
3609 {
3610         struct trace_iterator *iter = m->private;
3611         struct trace_array *tr = iter->tr;
3612         int cpu_file = iter->cpu_file;
3613         void *p = NULL;
3614         loff_t l = 0;
3615         int cpu;
3616
3617         /*
3618          * copy the tracer to avoid using a global lock all around.
3619          * iter->trace is a copy of current_trace, the pointer to the
3620          * name may be used instead of a strcmp(), as iter->trace->name
3621          * will point to the same string as current_trace->name.
3622          */
3623         mutex_lock(&trace_types_lock);
3624         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3625                 *iter->trace = *tr->current_trace;
3626         mutex_unlock(&trace_types_lock);
3627
3628 #ifdef CONFIG_TRACER_MAX_TRACE
3629         if (iter->snapshot && iter->trace->use_max_tr)
3630                 return ERR_PTR(-EBUSY);
3631 #endif
3632
3633         if (!iter->snapshot)
3634                 atomic_inc(&trace_record_taskinfo_disabled);
3635
3636         if (*pos != iter->pos) {
3637                 iter->ent = NULL;
3638                 iter->cpu = 0;
3639                 iter->idx = -1;
3640
3641                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3642                         for_each_tracing_cpu(cpu)
3643                                 tracing_iter_reset(iter, cpu);
3644                 } else
3645                         tracing_iter_reset(iter, cpu_file);
3646
3647                 iter->leftover = 0;
3648                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3649                         ;
3650
3651         } else {
3652                 /*
3653                  * If we overflowed the seq_file before, then we want
3654                  * to just reuse the trace_seq buffer again.
3655                  */
3656                 if (iter->leftover)
3657                         p = iter;
3658                 else {
3659                         l = *pos - 1;
3660                         p = s_next(m, p, &l);
3661                 }
3662         }
3663
3664         trace_event_read_lock();
3665         trace_access_lock(cpu_file);
3666         return p;
3667 }
3668
3669 static void s_stop(struct seq_file *m, void *p)
3670 {
3671         struct trace_iterator *iter = m->private;
3672
3673 #ifdef CONFIG_TRACER_MAX_TRACE
3674         if (iter->snapshot && iter->trace->use_max_tr)
3675                 return;
3676 #endif
3677
3678         if (!iter->snapshot)
3679                 atomic_dec(&trace_record_taskinfo_disabled);
3680
3681         trace_access_unlock(iter->cpu_file);
3682         trace_event_read_unlock();
3683 }
3684
3685 static void
3686 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3687                       unsigned long *entries, int cpu)
3688 {
3689         unsigned long count;
3690
3691         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3692         /*
3693          * If this buffer has skipped entries, then we hold all
3694          * entries for the trace and we need to ignore the
3695          * ones before the time stamp.
3696          */
3697         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3698                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3699                 /* total is the same as the entries */
3700                 *total = count;
3701         } else
3702                 *total = count +
3703                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3704         *entries = count;
3705 }
3706
3707 static void
3708 get_total_entries(struct array_buffer *buf,
3709                   unsigned long *total, unsigned long *entries)
3710 {
3711         unsigned long t, e;
3712         int cpu;
3713
3714         *total = 0;
3715         *entries = 0;
3716
3717         for_each_tracing_cpu(cpu) {
3718                 get_total_entries_cpu(buf, &t, &e, cpu);
3719                 *total += t;
3720                 *entries += e;
3721         }
3722 }
3723
3724 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3725 {
3726         unsigned long total, entries;
3727
3728         if (!tr)
3729                 tr = &global_trace;
3730
3731         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3732
3733         return entries;
3734 }
3735
3736 unsigned long trace_total_entries(struct trace_array *tr)
3737 {
3738         unsigned long total, entries;
3739
3740         if (!tr)
3741                 tr = &global_trace;
3742
3743         get_total_entries(&tr->array_buffer, &total, &entries);
3744
3745         return entries;
3746 }
3747
3748 static void print_lat_help_header(struct seq_file *m)
3749 {
3750         seq_puts(m, "#                  _------=> CPU#            \n"
3751                     "#                 / _-----=> irqs-off        \n"
3752                     "#                | / _----=> need-resched    \n"
3753                     "#                || / _---=> hardirq/softirq \n"
3754                     "#                ||| / _--=> preempt-depth   \n"
3755                     "#                |||| /     delay            \n"
3756                     "#  cmd     pid   ||||| time  |   caller      \n"
3757                     "#     \\   /      |||||  \\    |   /         \n");
3758 }
3759
3760 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3761 {
3762         unsigned long total;
3763         unsigned long entries;
3764
3765         get_total_entries(buf, &total, &entries);
3766         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3767                    entries, total, num_online_cpus());
3768         seq_puts(m, "#\n");
3769 }
3770
3771 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3772                                    unsigned int flags)
3773 {
3774         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3775
3776         print_event_info(buf, m);
3777
3778         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3779         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3780 }
3781
3782 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3783                                        unsigned int flags)
3784 {
3785         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3786         const char *space = "          ";
3787         int prec = tgid ? 10 : 2;
3788
3789         print_event_info(buf, m);
3790
3791         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3792         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3793         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3794         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3795         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3796         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3797         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3798 }
3799
3800 void
3801 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3802 {
3803         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3804         struct array_buffer *buf = iter->array_buffer;
3805         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3806         struct tracer *type = iter->trace;
3807         unsigned long entries;
3808         unsigned long total;
3809         const char *name = "preemption";
3810
3811         name = type->name;
3812
3813         get_total_entries(buf, &total, &entries);
3814
3815         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3816                    name, UTS_RELEASE);
3817         seq_puts(m, "# -----------------------------------"
3818                  "---------------------------------\n");
3819         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3820                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3821                    nsecs_to_usecs(data->saved_latency),
3822                    entries,
3823                    total,
3824                    buf->cpu,
3825 #if defined(CONFIG_PREEMPT_NONE)
3826                    "server",
3827 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3828                    "desktop",
3829 #elif defined(CONFIG_PREEMPT)
3830                    "preempt",
3831 #elif defined(CONFIG_PREEMPT_RT)
3832                    "preempt_rt",
3833 #else
3834                    "unknown",
3835 #endif
3836                    /* These are reserved for later use */
3837                    0, 0, 0, 0);
3838 #ifdef CONFIG_SMP
3839         seq_printf(m, " #P:%d)\n", num_online_cpus());
3840 #else
3841         seq_puts(m, ")\n");
3842 #endif
3843         seq_puts(m, "#    -----------------\n");
3844         seq_printf(m, "#    | task: %.16s-%d "
3845                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3846                    data->comm, data->pid,
3847                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3848                    data->policy, data->rt_priority);
3849         seq_puts(m, "#    -----------------\n");
3850
3851         if (data->critical_start) {
3852                 seq_puts(m, "#  => started at: ");
3853                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3854                 trace_print_seq(m, &iter->seq);
3855                 seq_puts(m, "\n#  => ended at:   ");
3856                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3857                 trace_print_seq(m, &iter->seq);
3858                 seq_puts(m, "\n#\n");
3859         }
3860
3861         seq_puts(m, "#\n");
3862 }
3863
3864 static void test_cpu_buff_start(struct trace_iterator *iter)
3865 {
3866         struct trace_seq *s = &iter->seq;
3867         struct trace_array *tr = iter->tr;
3868
3869         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3870                 return;
3871
3872         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3873                 return;
3874
3875         if (cpumask_available(iter->started) &&
3876             cpumask_test_cpu(iter->cpu, iter->started))
3877                 return;
3878
3879         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3880                 return;
3881
3882         if (cpumask_available(iter->started))
3883                 cpumask_set_cpu(iter->cpu, iter->started);
3884
3885         /* Don't print started cpu buffer for the first entry of the trace */
3886         if (iter->idx > 1)
3887                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3888                                 iter->cpu);
3889 }
3890
3891 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3892 {
3893         struct trace_array *tr = iter->tr;
3894         struct trace_seq *s = &iter->seq;
3895         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3896         struct trace_entry *entry;
3897         struct trace_event *event;
3898
3899         entry = iter->ent;
3900
3901         test_cpu_buff_start(iter);
3902
3903         event = ftrace_find_event(entry->type);
3904
3905         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3906                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3907                         trace_print_lat_context(iter);
3908                 else
3909                         trace_print_context(iter);
3910         }
3911
3912         if (trace_seq_has_overflowed(s))
3913                 return TRACE_TYPE_PARTIAL_LINE;
3914
3915         if (event)
3916                 return event->funcs->trace(iter, sym_flags, event);
3917
3918         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3919
3920         return trace_handle_return(s);
3921 }
3922
3923 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3924 {
3925         struct trace_array *tr = iter->tr;
3926         struct trace_seq *s = &iter->seq;
3927         struct trace_entry *entry;
3928         struct trace_event *event;
3929
3930         entry = iter->ent;
3931
3932         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3933                 trace_seq_printf(s, "%d %d %llu ",
3934                                  entry->pid, iter->cpu, iter->ts);
3935
3936         if (trace_seq_has_overflowed(s))
3937                 return TRACE_TYPE_PARTIAL_LINE;
3938
3939         event = ftrace_find_event(entry->type);
3940         if (event)
3941                 return event->funcs->raw(iter, 0, event);
3942
3943         trace_seq_printf(s, "%d ?\n", entry->type);
3944
3945         return trace_handle_return(s);
3946 }
3947
3948 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3949 {
3950         struct trace_array *tr = iter->tr;
3951         struct trace_seq *s = &iter->seq;
3952         unsigned char newline = '\n';
3953         struct trace_entry *entry;
3954         struct trace_event *event;
3955
3956         entry = iter->ent;
3957
3958         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3959                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3960                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3961                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3962                 if (trace_seq_has_overflowed(s))
3963                         return TRACE_TYPE_PARTIAL_LINE;
3964         }
3965
3966         event = ftrace_find_event(entry->type);
3967         if (event) {
3968                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3969                 if (ret != TRACE_TYPE_HANDLED)
3970                         return ret;
3971         }
3972
3973         SEQ_PUT_FIELD(s, newline);
3974
3975         return trace_handle_return(s);
3976 }
3977
3978 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3979 {
3980         struct trace_array *tr = iter->tr;
3981         struct trace_seq *s = &iter->seq;
3982         struct trace_entry *entry;
3983         struct trace_event *event;
3984
3985         entry = iter->ent;
3986
3987         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3988                 SEQ_PUT_FIELD(s, entry->pid);
3989                 SEQ_PUT_FIELD(s, iter->cpu);
3990                 SEQ_PUT_FIELD(s, iter->ts);
3991                 if (trace_seq_has_overflowed(s))
3992                         return TRACE_TYPE_PARTIAL_LINE;
3993         }
3994
3995         event = ftrace_find_event(entry->type);
3996         return event ? event->funcs->binary(iter, 0, event) :
3997                 TRACE_TYPE_HANDLED;
3998 }
3999
4000 int trace_empty(struct trace_iterator *iter)
4001 {
4002         struct ring_buffer_iter *buf_iter;
4003         int cpu;
4004
4005         /* If we are looking at one CPU buffer, only check that one */
4006         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4007                 cpu = iter->cpu_file;
4008                 buf_iter = trace_buffer_iter(iter, cpu);
4009                 if (buf_iter) {
4010                         if (!ring_buffer_iter_empty(buf_iter))
4011                                 return 0;
4012                 } else {
4013                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4014                                 return 0;
4015                 }
4016                 return 1;
4017         }
4018
4019         for_each_tracing_cpu(cpu) {
4020                 buf_iter = trace_buffer_iter(iter, cpu);
4021                 if (buf_iter) {
4022                         if (!ring_buffer_iter_empty(buf_iter))
4023                                 return 0;
4024                 } else {
4025                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4026                                 return 0;
4027                 }
4028         }
4029
4030         return 1;
4031 }
4032
4033 /*  Called with trace_event_read_lock() held. */
4034 enum print_line_t print_trace_line(struct trace_iterator *iter)
4035 {
4036         struct trace_array *tr = iter->tr;
4037         unsigned long trace_flags = tr->trace_flags;
4038         enum print_line_t ret;
4039
4040         if (iter->lost_events) {
4041                 if (iter->lost_events == (unsigned long)-1)
4042                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4043                                          iter->cpu);
4044                 else
4045                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4046                                          iter->cpu, iter->lost_events);
4047                 if (trace_seq_has_overflowed(&iter->seq))
4048                         return TRACE_TYPE_PARTIAL_LINE;
4049         }
4050
4051         if (iter->trace && iter->trace->print_line) {
4052                 ret = iter->trace->print_line(iter);
4053                 if (ret != TRACE_TYPE_UNHANDLED)
4054                         return ret;
4055         }
4056
4057         if (iter->ent->type == TRACE_BPUTS &&
4058                         trace_flags & TRACE_ITER_PRINTK &&
4059                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4060                 return trace_print_bputs_msg_only(iter);
4061
4062         if (iter->ent->type == TRACE_BPRINT &&
4063                         trace_flags & TRACE_ITER_PRINTK &&
4064                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4065                 return trace_print_bprintk_msg_only(iter);
4066
4067         if (iter->ent->type == TRACE_PRINT &&
4068                         trace_flags & TRACE_ITER_PRINTK &&
4069                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4070                 return trace_print_printk_msg_only(iter);
4071
4072         if (trace_flags & TRACE_ITER_BIN)
4073                 return print_bin_fmt(iter);
4074
4075         if (trace_flags & TRACE_ITER_HEX)
4076                 return print_hex_fmt(iter);
4077
4078         if (trace_flags & TRACE_ITER_RAW)
4079                 return print_raw_fmt(iter);
4080
4081         return print_trace_fmt(iter);
4082 }
4083
4084 void trace_latency_header(struct seq_file *m)
4085 {
4086         struct trace_iterator *iter = m->private;
4087         struct trace_array *tr = iter->tr;
4088
4089         /* print nothing if the buffers are empty */
4090         if (trace_empty(iter))
4091                 return;
4092
4093         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4094                 print_trace_header(m, iter);
4095
4096         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4097                 print_lat_help_header(m);
4098 }
4099
4100 void trace_default_header(struct seq_file *m)
4101 {
4102         struct trace_iterator *iter = m->private;
4103         struct trace_array *tr = iter->tr;
4104         unsigned long trace_flags = tr->trace_flags;
4105
4106         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4107                 return;
4108
4109         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4110                 /* print nothing if the buffers are empty */
4111                 if (trace_empty(iter))
4112                         return;
4113                 print_trace_header(m, iter);
4114                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4115                         print_lat_help_header(m);
4116         } else {
4117                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4118                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4119                                 print_func_help_header_irq(iter->array_buffer,
4120                                                            m, trace_flags);
4121                         else
4122                                 print_func_help_header(iter->array_buffer, m,
4123                                                        trace_flags);
4124                 }
4125         }
4126 }
4127
4128 static void test_ftrace_alive(struct seq_file *m)
4129 {
4130         if (!ftrace_is_dead())
4131                 return;
4132         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4133                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4134 }
4135
4136 #ifdef CONFIG_TRACER_MAX_TRACE
4137 static void show_snapshot_main_help(struct seq_file *m)
4138 {
4139         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4140                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4141                     "#                      Takes a snapshot of the main buffer.\n"
4142                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4143                     "#                      (Doesn't have to be '2' works with any number that\n"
4144                     "#                       is not a '0' or '1')\n");
4145 }
4146
4147 static void show_snapshot_percpu_help(struct seq_file *m)
4148 {
4149         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4150 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4151         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4152                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4153 #else
4154         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4155                     "#                     Must use main snapshot file to allocate.\n");
4156 #endif
4157         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4158                     "#                      (Doesn't have to be '2' works with any number that\n"
4159                     "#                       is not a '0' or '1')\n");
4160 }
4161
4162 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4163 {
4164         if (iter->tr->allocated_snapshot)
4165                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4166         else
4167                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4168
4169         seq_puts(m, "# Snapshot commands:\n");
4170         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4171                 show_snapshot_main_help(m);
4172         else
4173                 show_snapshot_percpu_help(m);
4174 }
4175 #else
4176 /* Should never be called */
4177 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4178 #endif
4179
4180 static int s_show(struct seq_file *m, void *v)
4181 {
4182         struct trace_iterator *iter = v;
4183         int ret;
4184
4185         if (iter->ent == NULL) {
4186                 if (iter->tr) {
4187                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4188                         seq_puts(m, "#\n");
4189                         test_ftrace_alive(m);
4190                 }
4191                 if (iter->snapshot && trace_empty(iter))
4192                         print_snapshot_help(m, iter);
4193                 else if (iter->trace && iter->trace->print_header)
4194                         iter->trace->print_header(m);
4195                 else
4196                         trace_default_header(m);
4197
4198         } else if (iter->leftover) {
4199                 /*
4200                  * If we filled the seq_file buffer earlier, we
4201                  * want to just show it now.
4202                  */
4203                 ret = trace_print_seq(m, &iter->seq);
4204
4205                 /* ret should this time be zero, but you never know */
4206                 iter->leftover = ret;
4207
4208         } else {
4209                 print_trace_line(iter);
4210                 ret = trace_print_seq(m, &iter->seq);
4211                 /*
4212                  * If we overflow the seq_file buffer, then it will
4213                  * ask us for this data again at start up.
4214                  * Use that instead.
4215                  *  ret is 0 if seq_file write succeeded.
4216                  *        -1 otherwise.
4217                  */
4218                 iter->leftover = ret;
4219         }
4220
4221         return 0;
4222 }
4223
4224 /*
4225  * Should be used after trace_array_get(), trace_types_lock
4226  * ensures that i_cdev was already initialized.
4227  */
4228 static inline int tracing_get_cpu(struct inode *inode)
4229 {
4230         if (inode->i_cdev) /* See trace_create_cpu_file() */
4231                 return (long)inode->i_cdev - 1;
4232         return RING_BUFFER_ALL_CPUS;
4233 }
4234
4235 static const struct seq_operations tracer_seq_ops = {
4236         .start          = s_start,
4237         .next           = s_next,
4238         .stop           = s_stop,
4239         .show           = s_show,
4240 };
4241
4242 static struct trace_iterator *
4243 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4244 {
4245         struct trace_array *tr = inode->i_private;
4246         struct trace_iterator *iter;
4247         int cpu;
4248
4249         if (tracing_disabled)
4250                 return ERR_PTR(-ENODEV);
4251
4252         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4253         if (!iter)
4254                 return ERR_PTR(-ENOMEM);
4255
4256         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4257                                     GFP_KERNEL);
4258         if (!iter->buffer_iter)
4259                 goto release;
4260
4261         /*
4262          * trace_find_next_entry() may need to save off iter->ent.
4263          * It will place it into the iter->temp buffer. As most
4264          * events are less than 128, allocate a buffer of that size.
4265          * If one is greater, then trace_find_next_entry() will
4266          * allocate a new buffer to adjust for the bigger iter->ent.
4267          * It's not critical if it fails to get allocated here.
4268          */
4269         iter->temp = kmalloc(128, GFP_KERNEL);
4270         if (iter->temp)
4271                 iter->temp_size = 128;
4272
4273         /*
4274          * We make a copy of the current tracer to avoid concurrent
4275          * changes on it while we are reading.
4276          */
4277         mutex_lock(&trace_types_lock);
4278         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4279         if (!iter->trace)
4280                 goto fail;
4281
4282         *iter->trace = *tr->current_trace;
4283
4284         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4285                 goto fail;
4286
4287         iter->tr = tr;
4288
4289 #ifdef CONFIG_TRACER_MAX_TRACE
4290         /* Currently only the top directory has a snapshot */
4291         if (tr->current_trace->print_max || snapshot)
4292                 iter->array_buffer = &tr->max_buffer;
4293         else
4294 #endif
4295                 iter->array_buffer = &tr->array_buffer;
4296         iter->snapshot = snapshot;
4297         iter->pos = -1;
4298         iter->cpu_file = tracing_get_cpu(inode);
4299         mutex_init(&iter->mutex);
4300
4301         /* Notify the tracer early; before we stop tracing. */
4302         if (iter->trace->open)
4303                 iter->trace->open(iter);
4304
4305         /* Annotate start of buffers if we had overruns */
4306         if (ring_buffer_overruns(iter->array_buffer->buffer))
4307                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4308
4309         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4310         if (trace_clocks[tr->clock_id].in_ns)
4311                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4312
4313         /*
4314          * If pause-on-trace is enabled, then stop the trace while
4315          * dumping, unless this is the "snapshot" file
4316          */
4317         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4318                 tracing_stop_tr(tr);
4319
4320         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4321                 for_each_tracing_cpu(cpu) {
4322                         iter->buffer_iter[cpu] =
4323                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4324                                                          cpu, GFP_KERNEL);
4325                 }
4326                 ring_buffer_read_prepare_sync();
4327                 for_each_tracing_cpu(cpu) {
4328                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4329                         tracing_iter_reset(iter, cpu);
4330                 }
4331         } else {
4332                 cpu = iter->cpu_file;
4333                 iter->buffer_iter[cpu] =
4334                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4335                                                  cpu, GFP_KERNEL);
4336                 ring_buffer_read_prepare_sync();
4337                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4338                 tracing_iter_reset(iter, cpu);
4339         }
4340
4341         mutex_unlock(&trace_types_lock);
4342
4343         return iter;
4344
4345  fail:
4346         mutex_unlock(&trace_types_lock);
4347         kfree(iter->trace);
4348         kfree(iter->temp);
4349         kfree(iter->buffer_iter);
4350 release:
4351         seq_release_private(inode, file);
4352         return ERR_PTR(-ENOMEM);
4353 }
4354
4355 int tracing_open_generic(struct inode *inode, struct file *filp)
4356 {
4357         int ret;
4358
4359         ret = tracing_check_open_get_tr(NULL);
4360         if (ret)
4361                 return ret;
4362
4363         filp->private_data = inode->i_private;
4364         return 0;
4365 }
4366
4367 bool tracing_is_disabled(void)
4368 {
4369         return (tracing_disabled) ? true: false;
4370 }
4371
4372 /*
4373  * Open and update trace_array ref count.
4374  * Must have the current trace_array passed to it.
4375  */
4376 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4377 {
4378         struct trace_array *tr = inode->i_private;
4379         int ret;
4380
4381         ret = tracing_check_open_get_tr(tr);
4382         if (ret)
4383                 return ret;
4384
4385         filp->private_data = inode->i_private;
4386
4387         return 0;
4388 }
4389
4390 static int tracing_release(struct inode *inode, struct file *file)
4391 {
4392         struct trace_array *tr = inode->i_private;
4393         struct seq_file *m = file->private_data;
4394         struct trace_iterator *iter;
4395         int cpu;
4396
4397         if (!(file->f_mode & FMODE_READ)) {
4398                 trace_array_put(tr);
4399                 return 0;
4400         }
4401
4402         /* Writes do not use seq_file */
4403         iter = m->private;
4404         mutex_lock(&trace_types_lock);
4405
4406         for_each_tracing_cpu(cpu) {
4407                 if (iter->buffer_iter[cpu])
4408                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4409         }
4410
4411         if (iter->trace && iter->trace->close)
4412                 iter->trace->close(iter);
4413
4414         if (!iter->snapshot && tr->stop_count)
4415                 /* reenable tracing if it was previously enabled */
4416                 tracing_start_tr(tr);
4417
4418         __trace_array_put(tr);
4419
4420         mutex_unlock(&trace_types_lock);
4421
4422         mutex_destroy(&iter->mutex);
4423         free_cpumask_var(iter->started);
4424         kfree(iter->temp);
4425         kfree(iter->trace);
4426         kfree(iter->buffer_iter);
4427         seq_release_private(inode, file);
4428
4429         return 0;
4430 }
4431
4432 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4433 {
4434         struct trace_array *tr = inode->i_private;
4435
4436         trace_array_put(tr);
4437         return 0;
4438 }
4439
4440 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4441 {
4442         struct trace_array *tr = inode->i_private;
4443
4444         trace_array_put(tr);
4445
4446         return single_release(inode, file);
4447 }
4448
4449 static int tracing_open(struct inode *inode, struct file *file)
4450 {
4451         struct trace_array *tr = inode->i_private;
4452         struct trace_iterator *iter;
4453         int ret;
4454
4455         ret = tracing_check_open_get_tr(tr);
4456         if (ret)
4457                 return ret;
4458
4459         /* If this file was open for write, then erase contents */
4460         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4461                 int cpu = tracing_get_cpu(inode);
4462                 struct array_buffer *trace_buf = &tr->array_buffer;
4463
4464 #ifdef CONFIG_TRACER_MAX_TRACE
4465                 if (tr->current_trace->print_max)
4466                         trace_buf = &tr->max_buffer;
4467 #endif
4468
4469                 if (cpu == RING_BUFFER_ALL_CPUS)
4470                         tracing_reset_online_cpus(trace_buf);
4471                 else
4472                         tracing_reset_cpu(trace_buf, cpu);
4473         }
4474
4475         if (file->f_mode & FMODE_READ) {
4476                 iter = __tracing_open(inode, file, false);
4477                 if (IS_ERR(iter))
4478                         ret = PTR_ERR(iter);
4479                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4480                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4481         }
4482
4483         if (ret < 0)
4484                 trace_array_put(tr);
4485
4486         return ret;
4487 }
4488
4489 /*
4490  * Some tracers are not suitable for instance buffers.
4491  * A tracer is always available for the global array (toplevel)
4492  * or if it explicitly states that it is.
4493  */
4494 static bool
4495 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4496 {
4497         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4498 }
4499
4500 /* Find the next tracer that this trace array may use */
4501 static struct tracer *
4502 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4503 {
4504         while (t && !trace_ok_for_array(t, tr))
4505                 t = t->next;
4506
4507         return t;
4508 }
4509
4510 static void *
4511 t_next(struct seq_file *m, void *v, loff_t *pos)
4512 {
4513         struct trace_array *tr = m->private;
4514         struct tracer *t = v;
4515
4516         (*pos)++;
4517
4518         if (t)
4519                 t = get_tracer_for_array(tr, t->next);
4520
4521         return t;
4522 }
4523
4524 static void *t_start(struct seq_file *m, loff_t *pos)
4525 {
4526         struct trace_array *tr = m->private;
4527         struct tracer *t;
4528         loff_t l = 0;
4529
4530         mutex_lock(&trace_types_lock);
4531
4532         t = get_tracer_for_array(tr, trace_types);
4533         for (; t && l < *pos; t = t_next(m, t, &l))
4534                         ;
4535
4536         return t;
4537 }
4538
4539 static void t_stop(struct seq_file *m, void *p)
4540 {
4541         mutex_unlock(&trace_types_lock);
4542 }
4543
4544 static int t_show(struct seq_file *m, void *v)
4545 {
4546         struct tracer *t = v;
4547
4548         if (!t)
4549                 return 0;
4550
4551         seq_puts(m, t->name);
4552         if (t->next)
4553                 seq_putc(m, ' ');
4554         else
4555                 seq_putc(m, '\n');
4556
4557         return 0;
4558 }
4559
4560 static const struct seq_operations show_traces_seq_ops = {
4561         .start          = t_start,
4562         .next           = t_next,
4563         .stop           = t_stop,
4564         .show           = t_show,
4565 };
4566
4567 static int show_traces_open(struct inode *inode, struct file *file)
4568 {
4569         struct trace_array *tr = inode->i_private;
4570         struct seq_file *m;
4571         int ret;
4572
4573         ret = tracing_check_open_get_tr(tr);
4574         if (ret)
4575                 return ret;
4576
4577         ret = seq_open(file, &show_traces_seq_ops);
4578         if (ret) {
4579                 trace_array_put(tr);
4580                 return ret;
4581         }
4582
4583         m = file->private_data;
4584         m->private = tr;
4585
4586         return 0;
4587 }
4588
4589 static int show_traces_release(struct inode *inode, struct file *file)
4590 {
4591         struct trace_array *tr = inode->i_private;
4592
4593         trace_array_put(tr);
4594         return seq_release(inode, file);
4595 }
4596
4597 static ssize_t
4598 tracing_write_stub(struct file *filp, const char __user *ubuf,
4599                    size_t count, loff_t *ppos)
4600 {
4601         return count;
4602 }
4603
4604 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4605 {
4606         int ret;
4607
4608         if (file->f_mode & FMODE_READ)
4609                 ret = seq_lseek(file, offset, whence);
4610         else
4611                 file->f_pos = ret = 0;
4612
4613         return ret;
4614 }
4615
4616 static const struct file_operations tracing_fops = {
4617         .open           = tracing_open,
4618         .read           = seq_read,
4619         .write          = tracing_write_stub,
4620         .llseek         = tracing_lseek,
4621         .release        = tracing_release,
4622 };
4623
4624 static const struct file_operations show_traces_fops = {
4625         .open           = show_traces_open,
4626         .read           = seq_read,
4627         .llseek         = seq_lseek,
4628         .release        = show_traces_release,
4629 };
4630
4631 static ssize_t
4632 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4633                      size_t count, loff_t *ppos)
4634 {
4635         struct trace_array *tr = file_inode(filp)->i_private;
4636         char *mask_str;
4637         int len;
4638
4639         len = snprintf(NULL, 0, "%*pb\n",
4640                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4641         mask_str = kmalloc(len, GFP_KERNEL);
4642         if (!mask_str)
4643                 return -ENOMEM;
4644
4645         len = snprintf(mask_str, len, "%*pb\n",
4646                        cpumask_pr_args(tr->tracing_cpumask));
4647         if (len >= count) {
4648                 count = -EINVAL;
4649                 goto out_err;
4650         }
4651         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4652
4653 out_err:
4654         kfree(mask_str);
4655
4656         return count;
4657 }
4658
4659 int tracing_set_cpumask(struct trace_array *tr,
4660                         cpumask_var_t tracing_cpumask_new)
4661 {
4662         int cpu;
4663
4664         if (!tr)
4665                 return -EINVAL;
4666
4667         local_irq_disable();
4668         arch_spin_lock(&tr->max_lock);
4669         for_each_tracing_cpu(cpu) {
4670                 /*
4671                  * Increase/decrease the disabled counter if we are
4672                  * about to flip a bit in the cpumask:
4673                  */
4674                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4675                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4676                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4677                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4678                 }
4679                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4680                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4681                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4682                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4683                 }
4684         }
4685         arch_spin_unlock(&tr->max_lock);
4686         local_irq_enable();
4687
4688         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4689
4690         return 0;
4691 }
4692
4693 static ssize_t
4694 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4695                       size_t count, loff_t *ppos)
4696 {
4697         struct trace_array *tr = file_inode(filp)->i_private;
4698         cpumask_var_t tracing_cpumask_new;
4699         int err;
4700
4701         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4702                 return -ENOMEM;
4703
4704         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4705         if (err)
4706                 goto err_free;
4707
4708         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4709         if (err)
4710                 goto err_free;
4711
4712         free_cpumask_var(tracing_cpumask_new);
4713
4714         return count;
4715
4716 err_free:
4717         free_cpumask_var(tracing_cpumask_new);
4718
4719         return err;
4720 }
4721
4722 static const struct file_operations tracing_cpumask_fops = {
4723         .open           = tracing_open_generic_tr,
4724         .read           = tracing_cpumask_read,
4725         .write          = tracing_cpumask_write,
4726         .release        = tracing_release_generic_tr,
4727         .llseek         = generic_file_llseek,
4728 };
4729
4730 static int tracing_trace_options_show(struct seq_file *m, void *v)
4731 {
4732         struct tracer_opt *trace_opts;
4733         struct trace_array *tr = m->private;
4734         u32 tracer_flags;
4735         int i;
4736
4737         mutex_lock(&trace_types_lock);
4738         tracer_flags = tr->current_trace->flags->val;
4739         trace_opts = tr->current_trace->flags->opts;
4740
4741         for (i = 0; trace_options[i]; i++) {
4742                 if (tr->trace_flags & (1 << i))
4743                         seq_printf(m, "%s\n", trace_options[i]);
4744                 else
4745                         seq_printf(m, "no%s\n", trace_options[i]);
4746         }
4747
4748         for (i = 0; trace_opts[i].name; i++) {
4749                 if (tracer_flags & trace_opts[i].bit)
4750                         seq_printf(m, "%s\n", trace_opts[i].name);
4751                 else
4752                         seq_printf(m, "no%s\n", trace_opts[i].name);
4753         }
4754         mutex_unlock(&trace_types_lock);
4755
4756         return 0;
4757 }
4758
4759 static int __set_tracer_option(struct trace_array *tr,
4760                                struct tracer_flags *tracer_flags,
4761                                struct tracer_opt *opts, int neg)
4762 {
4763         struct tracer *trace = tracer_flags->trace;
4764         int ret;
4765
4766         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4767         if (ret)
4768                 return ret;
4769
4770         if (neg)
4771                 tracer_flags->val &= ~opts->bit;
4772         else
4773                 tracer_flags->val |= opts->bit;
4774         return 0;
4775 }
4776
4777 /* Try to assign a tracer specific option */
4778 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4779 {
4780         struct tracer *trace = tr->current_trace;
4781         struct tracer_flags *tracer_flags = trace->flags;
4782         struct tracer_opt *opts = NULL;
4783         int i;
4784
4785         for (i = 0; tracer_flags->opts[i].name; i++) {
4786                 opts = &tracer_flags->opts[i];
4787
4788                 if (strcmp(cmp, opts->name) == 0)
4789                         return __set_tracer_option(tr, trace->flags, opts, neg);
4790         }
4791
4792         return -EINVAL;
4793 }
4794
4795 /* Some tracers require overwrite to stay enabled */
4796 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4797 {
4798         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4799                 return -1;
4800
4801         return 0;
4802 }
4803
4804 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4805 {
4806         if ((mask == TRACE_ITER_RECORD_TGID) ||
4807             (mask == TRACE_ITER_RECORD_CMD))
4808                 lockdep_assert_held(&event_mutex);
4809
4810         /* do nothing if flag is already set */
4811         if (!!(tr->trace_flags & mask) == !!enabled)
4812                 return 0;
4813
4814         /* Give the tracer a chance to approve the change */
4815         if (tr->current_trace->flag_changed)
4816                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4817                         return -EINVAL;
4818
4819         if (enabled)
4820                 tr->trace_flags |= mask;
4821         else
4822                 tr->trace_flags &= ~mask;
4823
4824         if (mask == TRACE_ITER_RECORD_CMD)
4825                 trace_event_enable_cmd_record(enabled);
4826
4827         if (mask == TRACE_ITER_RECORD_TGID) {
4828                 if (!tgid_map)
4829                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4830                                            sizeof(*tgid_map),
4831                                            GFP_KERNEL);
4832                 if (!tgid_map) {
4833                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4834                         return -ENOMEM;
4835                 }
4836
4837                 trace_event_enable_tgid_record(enabled);
4838         }
4839
4840         if (mask == TRACE_ITER_EVENT_FORK)
4841                 trace_event_follow_fork(tr, enabled);
4842
4843         if (mask == TRACE_ITER_FUNC_FORK)
4844                 ftrace_pid_follow_fork(tr, enabled);
4845
4846         if (mask == TRACE_ITER_OVERWRITE) {
4847                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4848 #ifdef CONFIG_TRACER_MAX_TRACE
4849                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4850 #endif
4851         }
4852
4853         if (mask == TRACE_ITER_PRINTK) {
4854                 trace_printk_start_stop_comm(enabled);
4855                 trace_printk_control(enabled);
4856         }
4857
4858         return 0;
4859 }
4860
4861 int trace_set_options(struct trace_array *tr, char *option)
4862 {
4863         char *cmp;
4864         int neg = 0;
4865         int ret;
4866         size_t orig_len = strlen(option);
4867         int len;
4868
4869         cmp = strstrip(option);
4870
4871         len = str_has_prefix(cmp, "no");
4872         if (len)
4873                 neg = 1;
4874
4875         cmp += len;
4876
4877         mutex_lock(&event_mutex);
4878         mutex_lock(&trace_types_lock);
4879
4880         ret = match_string(trace_options, -1, cmp);
4881         /* If no option could be set, test the specific tracer options */
4882         if (ret < 0)
4883                 ret = set_tracer_option(tr, cmp, neg);
4884         else
4885                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4886
4887         mutex_unlock(&trace_types_lock);
4888         mutex_unlock(&event_mutex);
4889
4890         /*
4891          * If the first trailing whitespace is replaced with '\0' by strstrip,
4892          * turn it back into a space.
4893          */
4894         if (orig_len > strlen(option))
4895                 option[strlen(option)] = ' ';
4896
4897         return ret;
4898 }
4899
4900 static void __init apply_trace_boot_options(void)
4901 {
4902         char *buf = trace_boot_options_buf;
4903         char *option;
4904
4905         while (true) {
4906                 option = strsep(&buf, ",");
4907
4908                 if (!option)
4909                         break;
4910
4911                 if (*option)
4912                         trace_set_options(&global_trace, option);
4913
4914                 /* Put back the comma to allow this to be called again */
4915                 if (buf)
4916                         *(buf - 1) = ',';
4917         }
4918 }
4919
4920 static ssize_t
4921 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4922                         size_t cnt, loff_t *ppos)
4923 {
4924         struct seq_file *m = filp->private_data;
4925         struct trace_array *tr = m->private;
4926         char buf[64];
4927         int ret;
4928
4929         if (cnt >= sizeof(buf))
4930                 return -EINVAL;
4931
4932         if (copy_from_user(buf, ubuf, cnt))
4933                 return -EFAULT;
4934
4935         buf[cnt] = 0;
4936
4937         ret = trace_set_options(tr, buf);
4938         if (ret < 0)
4939                 return ret;
4940
4941         *ppos += cnt;
4942
4943         return cnt;
4944 }
4945
4946 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4947 {
4948         struct trace_array *tr = inode->i_private;
4949         int ret;
4950
4951         ret = tracing_check_open_get_tr(tr);
4952         if (ret)
4953                 return ret;
4954
4955         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4956         if (ret < 0)
4957                 trace_array_put(tr);
4958
4959         return ret;
4960 }
4961
4962 static const struct file_operations tracing_iter_fops = {
4963         .open           = tracing_trace_options_open,
4964         .read           = seq_read,
4965         .llseek         = seq_lseek,
4966         .release        = tracing_single_release_tr,
4967         .write          = tracing_trace_options_write,
4968 };
4969
4970 static const char readme_msg[] =
4971         "tracing mini-HOWTO:\n\n"
4972         "# echo 0 > tracing_on : quick way to disable tracing\n"
4973         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4974         " Important files:\n"
4975         "  trace\t\t\t- The static contents of the buffer\n"
4976         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4977         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4978         "  current_tracer\t- function and latency tracers\n"
4979         "  available_tracers\t- list of configured tracers for current_tracer\n"
4980         "  error_log\t- error log for failed commands (that support it)\n"
4981         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4982         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4983         "  trace_clock\t\t-change the clock used to order events\n"
4984         "       local:   Per cpu clock but may not be synced across CPUs\n"
4985         "      global:   Synced across CPUs but slows tracing down.\n"
4986         "     counter:   Not a clock, but just an increment\n"
4987         "      uptime:   Jiffy counter from time of boot\n"
4988         "        perf:   Same clock that perf events use\n"
4989 #ifdef CONFIG_X86_64
4990         "     x86-tsc:   TSC cycle counter\n"
4991 #endif
4992         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4993         "       delta:   Delta difference against a buffer-wide timestamp\n"
4994         "    absolute:   Absolute (standalone) timestamp\n"
4995         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4996         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4997         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4998         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4999         "\t\t\t  Remove sub-buffer with rmdir\n"
5000         "  trace_options\t\t- Set format or modify how tracing happens\n"
5001         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5002         "\t\t\t  option name\n"
5003         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5004 #ifdef CONFIG_DYNAMIC_FTRACE
5005         "\n  available_filter_functions - list of functions that can be filtered on\n"
5006         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5007         "\t\t\t  functions\n"
5008         "\t     accepts: func_full_name or glob-matching-pattern\n"
5009         "\t     modules: Can select a group via module\n"
5010         "\t      Format: :mod:<module-name>\n"
5011         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5012         "\t    triggers: a command to perform when function is hit\n"
5013         "\t      Format: <function>:<trigger>[:count]\n"
5014         "\t     trigger: traceon, traceoff\n"
5015         "\t\t      enable_event:<system>:<event>\n"
5016         "\t\t      disable_event:<system>:<event>\n"
5017 #ifdef CONFIG_STACKTRACE
5018         "\t\t      stacktrace\n"
5019 #endif
5020 #ifdef CONFIG_TRACER_SNAPSHOT
5021         "\t\t      snapshot\n"
5022 #endif
5023         "\t\t      dump\n"
5024         "\t\t      cpudump\n"
5025         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5026         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5027         "\t     The first one will disable tracing every time do_fault is hit\n"
5028         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5029         "\t       The first time do trap is hit and it disables tracing, the\n"
5030         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5031         "\t       the counter will not decrement. It only decrements when the\n"
5032         "\t       trigger did work\n"
5033         "\t     To remove trigger without count:\n"
5034         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5035         "\t     To remove trigger with a count:\n"
5036         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5037         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5038         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5039         "\t    modules: Can select a group via module command :mod:\n"
5040         "\t    Does not accept triggers\n"
5041 #endif /* CONFIG_DYNAMIC_FTRACE */
5042 #ifdef CONFIG_FUNCTION_TRACER
5043         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5044         "\t\t    (function)\n"
5045         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5046         "\t\t    (function)\n"
5047 #endif
5048 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5049         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5050         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5051         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5052 #endif
5053 #ifdef CONFIG_TRACER_SNAPSHOT
5054         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5055         "\t\t\t  snapshot buffer. Read the contents for more\n"
5056         "\t\t\t  information\n"
5057 #endif
5058 #ifdef CONFIG_STACK_TRACER
5059         "  stack_trace\t\t- Shows the max stack trace when active\n"
5060         "  stack_max_size\t- Shows current max stack size that was traced\n"
5061         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5062         "\t\t\t  new trace)\n"
5063 #ifdef CONFIG_DYNAMIC_FTRACE
5064         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5065         "\t\t\t  traces\n"
5066 #endif
5067 #endif /* CONFIG_STACK_TRACER */
5068 #ifdef CONFIG_DYNAMIC_EVENTS
5069         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5070         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5071 #endif
5072 #ifdef CONFIG_KPROBE_EVENTS
5073         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5074         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5075 #endif
5076 #ifdef CONFIG_UPROBE_EVENTS
5077         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5078         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5079 #endif
5080 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5081         "\t  accepts: event-definitions (one definition per line)\n"
5082         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5083         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5084 #ifdef CONFIG_HIST_TRIGGERS
5085         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5086 #endif
5087         "\t           -:[<group>/]<event>\n"
5088 #ifdef CONFIG_KPROBE_EVENTS
5089         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5090   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5091 #endif
5092 #ifdef CONFIG_UPROBE_EVENTS
5093   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5094 #endif
5095         "\t     args: <name>=fetcharg[:type]\n"
5096         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5097 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5098         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5099 #else
5100         "\t           $stack<index>, $stack, $retval, $comm,\n"
5101 #endif
5102         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5103         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5104         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5105         "\t           <type>\\[<array-size>\\]\n"
5106 #ifdef CONFIG_HIST_TRIGGERS
5107         "\t    field: <stype> <name>;\n"
5108         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5109         "\t           [unsigned] char/int/long\n"
5110 #endif
5111 #endif
5112         "  events/\t\t- Directory containing all trace event subsystems:\n"
5113         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5114         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5115         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5116         "\t\t\t  events\n"
5117         "      filter\t\t- If set, only events passing filter are traced\n"
5118         "  events/<system>/<event>/\t- Directory containing control files for\n"
5119         "\t\t\t  <event>:\n"
5120         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5121         "      filter\t\t- If set, only events passing filter are traced\n"
5122         "      trigger\t\t- If set, a command to perform when event is hit\n"
5123         "\t    Format: <trigger>[:count][if <filter>]\n"
5124         "\t   trigger: traceon, traceoff\n"
5125         "\t            enable_event:<system>:<event>\n"
5126         "\t            disable_event:<system>:<event>\n"
5127 #ifdef CONFIG_HIST_TRIGGERS
5128         "\t            enable_hist:<system>:<event>\n"
5129         "\t            disable_hist:<system>:<event>\n"
5130 #endif
5131 #ifdef CONFIG_STACKTRACE
5132         "\t\t    stacktrace\n"
5133 #endif
5134 #ifdef CONFIG_TRACER_SNAPSHOT
5135         "\t\t    snapshot\n"
5136 #endif
5137 #ifdef CONFIG_HIST_TRIGGERS
5138         "\t\t    hist (see below)\n"
5139 #endif
5140         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5141         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5142         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5143         "\t                  events/block/block_unplug/trigger\n"
5144         "\t   The first disables tracing every time block_unplug is hit.\n"
5145         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5146         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5147         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5148         "\t   Like function triggers, the counter is only decremented if it\n"
5149         "\t    enabled or disabled tracing.\n"
5150         "\t   To remove a trigger without a count:\n"
5151         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5152         "\t   To remove a trigger with a count:\n"
5153         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5154         "\t   Filters can be ignored when removing a trigger.\n"
5155 #ifdef CONFIG_HIST_TRIGGERS
5156         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5157         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5158         "\t            [:values=<field1[,field2,...]>]\n"
5159         "\t            [:sort=<field1[,field2,...]>]\n"
5160         "\t            [:size=#entries]\n"
5161         "\t            [:pause][:continue][:clear]\n"
5162         "\t            [:name=histname1]\n"
5163         "\t            [:<handler>.<action>]\n"
5164         "\t            [if <filter>]\n\n"
5165         "\t    When a matching event is hit, an entry is added to a hash\n"
5166         "\t    table using the key(s) and value(s) named, and the value of a\n"
5167         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5168         "\t    correspond to fields in the event's format description.  Keys\n"
5169         "\t    can be any field, or the special string 'stacktrace'.\n"
5170         "\t    Compound keys consisting of up to two fields can be specified\n"
5171         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5172         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5173         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5174         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5175         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5176         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5177         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5178         "\t    its histogram data will be shared with other triggers of the\n"
5179         "\t    same name, and trigger hits will update this common data.\n\n"
5180         "\t    Reading the 'hist' file for the event will dump the hash\n"
5181         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5182         "\t    triggers attached to an event, there will be a table for each\n"
5183         "\t    trigger in the output.  The table displayed for a named\n"
5184         "\t    trigger will be the same as any other instance having the\n"
5185         "\t    same name.  The default format used to display a given field\n"
5186         "\t    can be modified by appending any of the following modifiers\n"
5187         "\t    to the field name, as applicable:\n\n"
5188         "\t            .hex        display a number as a hex value\n"
5189         "\t            .sym        display an address as a symbol\n"
5190         "\t            .sym-offset display an address as a symbol and offset\n"
5191         "\t            .execname   display a common_pid as a program name\n"
5192         "\t            .syscall    display a syscall id as a syscall name\n"
5193         "\t            .log2       display log2 value rather than raw number\n"
5194         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5195         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5196         "\t    trigger or to start a hist trigger but not log any events\n"
5197         "\t    until told to do so.  'continue' can be used to start or\n"
5198         "\t    restart a paused hist trigger.\n\n"
5199         "\t    The 'clear' parameter will clear the contents of a running\n"
5200         "\t    hist trigger and leave its current paused/active state\n"
5201         "\t    unchanged.\n\n"
5202         "\t    The enable_hist and disable_hist triggers can be used to\n"
5203         "\t    have one event conditionally start and stop another event's\n"
5204         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5205         "\t    the enable_event and disable_event triggers.\n\n"
5206         "\t    Hist trigger handlers and actions are executed whenever a\n"
5207         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5208         "\t        <handler>.<action>\n\n"
5209         "\t    The available handlers are:\n\n"
5210         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5211         "\t        onmax(var)               - invoke if var exceeds current max\n"
5212         "\t        onchange(var)            - invoke action if var changes\n\n"
5213         "\t    The available actions are:\n\n"
5214         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5215         "\t        save(field,...)                      - save current event fields\n"
5216 #ifdef CONFIG_TRACER_SNAPSHOT
5217         "\t        snapshot()                           - snapshot the trace buffer\n"
5218 #endif
5219 #endif
5220 ;
5221
5222 static ssize_t
5223 tracing_readme_read(struct file *filp, char __user *ubuf,
5224                        size_t cnt, loff_t *ppos)
5225 {
5226         return simple_read_from_buffer(ubuf, cnt, ppos,
5227                                         readme_msg, strlen(readme_msg));
5228 }
5229
5230 static const struct file_operations tracing_readme_fops = {
5231         .open           = tracing_open_generic,
5232         .read           = tracing_readme_read,
5233         .llseek         = generic_file_llseek,
5234 };
5235
5236 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5237 {
5238         int *ptr = v;
5239
5240         if (*pos || m->count)
5241                 ptr++;
5242
5243         (*pos)++;
5244
5245         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5246                 if (trace_find_tgid(*ptr))
5247                         return ptr;
5248         }
5249
5250         return NULL;
5251 }
5252
5253 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5254 {
5255         void *v;
5256         loff_t l = 0;
5257
5258         if (!tgid_map)
5259                 return NULL;
5260
5261         v = &tgid_map[0];
5262         while (l <= *pos) {
5263                 v = saved_tgids_next(m, v, &l);
5264                 if (!v)
5265                         return NULL;
5266         }
5267
5268         return v;
5269 }
5270
5271 static void saved_tgids_stop(struct seq_file *m, void *v)
5272 {
5273 }
5274
5275 static int saved_tgids_show(struct seq_file *m, void *v)
5276 {
5277         int pid = (int *)v - tgid_map;
5278
5279         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5280         return 0;
5281 }
5282
5283 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5284         .start          = saved_tgids_start,
5285         .stop           = saved_tgids_stop,
5286         .next           = saved_tgids_next,
5287         .show           = saved_tgids_show,
5288 };
5289
5290 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5291 {
5292         int ret;
5293
5294         ret = tracing_check_open_get_tr(NULL);
5295         if (ret)
5296                 return ret;
5297
5298         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5299 }
5300
5301
5302 static const struct file_operations tracing_saved_tgids_fops = {
5303         .open           = tracing_saved_tgids_open,
5304         .read           = seq_read,
5305         .llseek         = seq_lseek,
5306         .release        = seq_release,
5307 };
5308
5309 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5310 {
5311         unsigned int *ptr = v;
5312
5313         if (*pos || m->count)
5314                 ptr++;
5315
5316         (*pos)++;
5317
5318         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5319              ptr++) {
5320                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5321                         continue;
5322
5323                 return ptr;
5324         }
5325
5326         return NULL;
5327 }
5328
5329 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5330 {
5331         void *v;
5332         loff_t l = 0;
5333
5334         preempt_disable();
5335         arch_spin_lock(&trace_cmdline_lock);
5336
5337         v = &savedcmd->map_cmdline_to_pid[0];
5338         while (l <= *pos) {
5339                 v = saved_cmdlines_next(m, v, &l);
5340                 if (!v)
5341                         return NULL;
5342         }
5343
5344         return v;
5345 }
5346
5347 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5348 {
5349         arch_spin_unlock(&trace_cmdline_lock);
5350         preempt_enable();
5351 }
5352
5353 static int saved_cmdlines_show(struct seq_file *m, void *v)
5354 {
5355         char buf[TASK_COMM_LEN];
5356         unsigned int *pid = v;
5357
5358         __trace_find_cmdline(*pid, buf);
5359         seq_printf(m, "%d %s\n", *pid, buf);
5360         return 0;
5361 }
5362
5363 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5364         .start          = saved_cmdlines_start,
5365         .next           = saved_cmdlines_next,
5366         .stop           = saved_cmdlines_stop,
5367         .show           = saved_cmdlines_show,
5368 };
5369
5370 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5371 {
5372         int ret;
5373
5374         ret = tracing_check_open_get_tr(NULL);
5375         if (ret)
5376                 return ret;
5377
5378         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5379 }
5380
5381 static const struct file_operations tracing_saved_cmdlines_fops = {
5382         .open           = tracing_saved_cmdlines_open,
5383         .read           = seq_read,
5384         .llseek         = seq_lseek,
5385         .release        = seq_release,
5386 };
5387
5388 static ssize_t
5389 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5390                                  size_t cnt, loff_t *ppos)
5391 {
5392         char buf[64];
5393         int r;
5394
5395         arch_spin_lock(&trace_cmdline_lock);
5396         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5397         arch_spin_unlock(&trace_cmdline_lock);
5398
5399         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5400 }
5401
5402 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5403 {
5404         kfree(s->saved_cmdlines);
5405         kfree(s->map_cmdline_to_pid);
5406         kfree(s);
5407 }
5408
5409 static int tracing_resize_saved_cmdlines(unsigned int val)
5410 {
5411         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5412
5413         s = kmalloc(sizeof(*s), GFP_KERNEL);
5414         if (!s)
5415                 return -ENOMEM;
5416
5417         if (allocate_cmdlines_buffer(val, s) < 0) {
5418                 kfree(s);
5419                 return -ENOMEM;
5420         }
5421
5422         arch_spin_lock(&trace_cmdline_lock);
5423         savedcmd_temp = savedcmd;
5424         savedcmd = s;
5425         arch_spin_unlock(&trace_cmdline_lock);
5426         free_saved_cmdlines_buffer(savedcmd_temp);
5427
5428         return 0;
5429 }
5430
5431 static ssize_t
5432 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5433                                   size_t cnt, loff_t *ppos)
5434 {
5435         unsigned long val;
5436         int ret;
5437
5438         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5439         if (ret)
5440                 return ret;
5441
5442         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5443         if (!val || val > PID_MAX_DEFAULT)
5444                 return -EINVAL;
5445
5446         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5447         if (ret < 0)
5448                 return ret;
5449
5450         *ppos += cnt;
5451
5452         return cnt;
5453 }
5454
5455 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5456         .open           = tracing_open_generic,
5457         .read           = tracing_saved_cmdlines_size_read,
5458         .write          = tracing_saved_cmdlines_size_write,
5459 };
5460
5461 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5462 static union trace_eval_map_item *
5463 update_eval_map(union trace_eval_map_item *ptr)
5464 {
5465         if (!ptr->map.eval_string) {
5466                 if (ptr->tail.next) {
5467                         ptr = ptr->tail.next;
5468                         /* Set ptr to the next real item (skip head) */
5469                         ptr++;
5470                 } else
5471                         return NULL;
5472         }
5473         return ptr;
5474 }
5475
5476 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5477 {
5478         union trace_eval_map_item *ptr = v;
5479
5480         /*
5481          * Paranoid! If ptr points to end, we don't want to increment past it.
5482          * This really should never happen.
5483          */
5484         (*pos)++;
5485         ptr = update_eval_map(ptr);
5486         if (WARN_ON_ONCE(!ptr))
5487                 return NULL;
5488
5489         ptr++;
5490         ptr = update_eval_map(ptr);
5491
5492         return ptr;
5493 }
5494
5495 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5496 {
5497         union trace_eval_map_item *v;
5498         loff_t l = 0;
5499
5500         mutex_lock(&trace_eval_mutex);
5501
5502         v = trace_eval_maps;
5503         if (v)
5504                 v++;
5505
5506         while (v && l < *pos) {
5507                 v = eval_map_next(m, v, &l);
5508         }
5509
5510         return v;
5511 }
5512
5513 static void eval_map_stop(struct seq_file *m, void *v)
5514 {
5515         mutex_unlock(&trace_eval_mutex);
5516 }
5517
5518 static int eval_map_show(struct seq_file *m, void *v)
5519 {
5520         union trace_eval_map_item *ptr = v;
5521
5522         seq_printf(m, "%s %ld (%s)\n",
5523                    ptr->map.eval_string, ptr->map.eval_value,
5524                    ptr->map.system);
5525
5526         return 0;
5527 }
5528
5529 static const struct seq_operations tracing_eval_map_seq_ops = {
5530         .start          = eval_map_start,
5531         .next           = eval_map_next,
5532         .stop           = eval_map_stop,
5533         .show           = eval_map_show,
5534 };
5535
5536 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5537 {
5538         int ret;
5539
5540         ret = tracing_check_open_get_tr(NULL);
5541         if (ret)
5542                 return ret;
5543
5544         return seq_open(filp, &tracing_eval_map_seq_ops);
5545 }
5546
5547 static const struct file_operations tracing_eval_map_fops = {
5548         .open           = tracing_eval_map_open,
5549         .read           = seq_read,
5550         .llseek         = seq_lseek,
5551         .release        = seq_release,
5552 };
5553
5554 static inline union trace_eval_map_item *
5555 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5556 {
5557         /* Return tail of array given the head */
5558         return ptr + ptr->head.length + 1;
5559 }
5560
5561 static void
5562 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5563                            int len)
5564 {
5565         struct trace_eval_map **stop;
5566         struct trace_eval_map **map;
5567         union trace_eval_map_item *map_array;
5568         union trace_eval_map_item *ptr;
5569
5570         stop = start + len;
5571
5572         /*
5573          * The trace_eval_maps contains the map plus a head and tail item,
5574          * where the head holds the module and length of array, and the
5575          * tail holds a pointer to the next list.
5576          */
5577         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5578         if (!map_array) {
5579                 pr_warn("Unable to allocate trace eval mapping\n");
5580                 return;
5581         }
5582
5583         mutex_lock(&trace_eval_mutex);
5584
5585         if (!trace_eval_maps)
5586                 trace_eval_maps = map_array;
5587         else {
5588                 ptr = trace_eval_maps;
5589                 for (;;) {
5590                         ptr = trace_eval_jmp_to_tail(ptr);
5591                         if (!ptr->tail.next)
5592                                 break;
5593                         ptr = ptr->tail.next;
5594
5595                 }
5596                 ptr->tail.next = map_array;
5597         }
5598         map_array->head.mod = mod;
5599         map_array->head.length = len;
5600         map_array++;
5601
5602         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5603                 map_array->map = **map;
5604                 map_array++;
5605         }
5606         memset(map_array, 0, sizeof(*map_array));
5607
5608         mutex_unlock(&trace_eval_mutex);
5609 }
5610
5611 static void trace_create_eval_file(struct dentry *d_tracer)
5612 {
5613         trace_create_file("eval_map", 0444, d_tracer,
5614                           NULL, &tracing_eval_map_fops);
5615 }
5616
5617 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5618 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5619 static inline void trace_insert_eval_map_file(struct module *mod,
5620                               struct trace_eval_map **start, int len) { }
5621 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5622
5623 static void trace_insert_eval_map(struct module *mod,
5624                                   struct trace_eval_map **start, int len)
5625 {
5626         struct trace_eval_map **map;
5627
5628         if (len <= 0)
5629                 return;
5630
5631         map = start;
5632
5633         trace_event_eval_update(map, len);
5634
5635         trace_insert_eval_map_file(mod, start, len);
5636 }
5637
5638 static ssize_t
5639 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5640                        size_t cnt, loff_t *ppos)
5641 {
5642         struct trace_array *tr = filp->private_data;
5643         char buf[MAX_TRACER_SIZE+2];
5644         int r;
5645
5646         mutex_lock(&trace_types_lock);
5647         r = sprintf(buf, "%s\n", tr->current_trace->name);
5648         mutex_unlock(&trace_types_lock);
5649
5650         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5651 }
5652
5653 int tracer_init(struct tracer *t, struct trace_array *tr)
5654 {
5655         tracing_reset_online_cpus(&tr->array_buffer);
5656         return t->init(tr);
5657 }
5658
5659 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5660 {
5661         int cpu;
5662
5663         for_each_tracing_cpu(cpu)
5664                 per_cpu_ptr(buf->data, cpu)->entries = val;
5665 }
5666
5667 #ifdef CONFIG_TRACER_MAX_TRACE
5668 /* resize @tr's buffer to the size of @size_tr's entries */
5669 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5670                                         struct array_buffer *size_buf, int cpu_id)
5671 {
5672         int cpu, ret = 0;
5673
5674         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5675                 for_each_tracing_cpu(cpu) {
5676                         ret = ring_buffer_resize(trace_buf->buffer,
5677                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5678                         if (ret < 0)
5679                                 break;
5680                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5681                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5682                 }
5683         } else {
5684                 ret = ring_buffer_resize(trace_buf->buffer,
5685                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5686                 if (ret == 0)
5687                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5688                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5689         }
5690
5691         return ret;
5692 }
5693 #endif /* CONFIG_TRACER_MAX_TRACE */
5694
5695 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5696                                         unsigned long size, int cpu)
5697 {
5698         int ret;
5699
5700         /*
5701          * If kernel or user changes the size of the ring buffer
5702          * we use the size that was given, and we can forget about
5703          * expanding it later.
5704          */
5705         ring_buffer_expanded = true;
5706
5707         /* May be called before buffers are initialized */
5708         if (!tr->array_buffer.buffer)
5709                 return 0;
5710
5711         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5712         if (ret < 0)
5713                 return ret;
5714
5715 #ifdef CONFIG_TRACER_MAX_TRACE
5716         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5717             !tr->current_trace->use_max_tr)
5718                 goto out;
5719
5720         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5721         if (ret < 0) {
5722                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5723                                                      &tr->array_buffer, cpu);
5724                 if (r < 0) {
5725                         /*
5726                          * AARGH! We are left with different
5727                          * size max buffer!!!!
5728                          * The max buffer is our "snapshot" buffer.
5729                          * When a tracer needs a snapshot (one of the
5730                          * latency tracers), it swaps the max buffer
5731                          * with the saved snap shot. We succeeded to
5732                          * update the size of the main buffer, but failed to
5733                          * update the size of the max buffer. But when we tried
5734                          * to reset the main buffer to the original size, we
5735                          * failed there too. This is very unlikely to
5736                          * happen, but if it does, warn and kill all
5737                          * tracing.
5738                          */
5739                         WARN_ON(1);
5740                         tracing_disabled = 1;
5741                 }
5742                 return ret;
5743         }
5744
5745         if (cpu == RING_BUFFER_ALL_CPUS)
5746                 set_buffer_entries(&tr->max_buffer, size);
5747         else
5748                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5749
5750  out:
5751 #endif /* CONFIG_TRACER_MAX_TRACE */
5752
5753         if (cpu == RING_BUFFER_ALL_CPUS)
5754                 set_buffer_entries(&tr->array_buffer, size);
5755         else
5756                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5757
5758         return ret;
5759 }
5760
5761 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5762                                   unsigned long size, int cpu_id)
5763 {
5764         int ret = size;
5765
5766         mutex_lock(&trace_types_lock);
5767
5768         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5769                 /* make sure, this cpu is enabled in the mask */
5770                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5771                         ret = -EINVAL;
5772                         goto out;
5773                 }
5774         }
5775
5776         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5777         if (ret < 0)
5778                 ret = -ENOMEM;
5779
5780 out:
5781         mutex_unlock(&trace_types_lock);
5782
5783         return ret;
5784 }
5785
5786
5787 /**
5788  * tracing_update_buffers - used by tracing facility to expand ring buffers
5789  *
5790  * To save on memory when the tracing is never used on a system with it
5791  * configured in. The ring buffers are set to a minimum size. But once
5792  * a user starts to use the tracing facility, then they need to grow
5793  * to their default size.
5794  *
5795  * This function is to be called when a tracer is about to be used.
5796  */
5797 int tracing_update_buffers(void)
5798 {
5799         int ret = 0;
5800
5801         mutex_lock(&trace_types_lock);
5802         if (!ring_buffer_expanded)
5803                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5804                                                 RING_BUFFER_ALL_CPUS);
5805         mutex_unlock(&trace_types_lock);
5806
5807         return ret;
5808 }
5809
5810 struct trace_option_dentry;
5811
5812 static void
5813 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5814
5815 /*
5816  * Used to clear out the tracer before deletion of an instance.
5817  * Must have trace_types_lock held.
5818  */
5819 static void tracing_set_nop(struct trace_array *tr)
5820 {
5821         if (tr->current_trace == &nop_trace)
5822                 return;
5823         
5824         tr->current_trace->enabled--;
5825
5826         if (tr->current_trace->reset)
5827                 tr->current_trace->reset(tr);
5828
5829         tr->current_trace = &nop_trace;
5830 }
5831
5832 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5833 {
5834         /* Only enable if the directory has been created already. */
5835         if (!tr->dir)
5836                 return;
5837
5838         create_trace_option_files(tr, t);
5839 }
5840
5841 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5842 {
5843         struct tracer *t;
5844 #ifdef CONFIG_TRACER_MAX_TRACE
5845         bool had_max_tr;
5846 #endif
5847         int ret = 0;
5848
5849         mutex_lock(&trace_types_lock);
5850
5851         if (!ring_buffer_expanded) {
5852                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5853                                                 RING_BUFFER_ALL_CPUS);
5854                 if (ret < 0)
5855                         goto out;
5856                 ret = 0;
5857         }
5858
5859         for (t = trace_types; t; t = t->next) {
5860                 if (strcmp(t->name, buf) == 0)
5861                         break;
5862         }
5863         if (!t) {
5864                 ret = -EINVAL;
5865                 goto out;
5866         }
5867         if (t == tr->current_trace)
5868                 goto out;
5869
5870 #ifdef CONFIG_TRACER_SNAPSHOT
5871         if (t->use_max_tr) {
5872                 arch_spin_lock(&tr->max_lock);
5873                 if (tr->cond_snapshot)
5874                         ret = -EBUSY;
5875                 arch_spin_unlock(&tr->max_lock);
5876                 if (ret)
5877                         goto out;
5878         }
5879 #endif
5880         /* Some tracers won't work on kernel command line */
5881         if (system_state < SYSTEM_RUNNING && t->noboot) {
5882                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5883                         t->name);
5884                 goto out;
5885         }
5886
5887         /* Some tracers are only allowed for the top level buffer */
5888         if (!trace_ok_for_array(t, tr)) {
5889                 ret = -EINVAL;
5890                 goto out;
5891         }
5892
5893         /* If trace pipe files are being read, we can't change the tracer */
5894         if (tr->trace_ref) {
5895                 ret = -EBUSY;
5896                 goto out;
5897         }
5898
5899         trace_branch_disable();
5900
5901         tr->current_trace->enabled--;
5902
5903         if (tr->current_trace->reset)
5904                 tr->current_trace->reset(tr);
5905
5906         /* Current trace needs to be nop_trace before synchronize_rcu */
5907         tr->current_trace = &nop_trace;
5908
5909 #ifdef CONFIG_TRACER_MAX_TRACE
5910         had_max_tr = tr->allocated_snapshot;
5911
5912         if (had_max_tr && !t->use_max_tr) {
5913                 /*
5914                  * We need to make sure that the update_max_tr sees that
5915                  * current_trace changed to nop_trace to keep it from
5916                  * swapping the buffers after we resize it.
5917                  * The update_max_tr is called from interrupts disabled
5918                  * so a synchronized_sched() is sufficient.
5919                  */
5920                 synchronize_rcu();
5921                 free_snapshot(tr);
5922         }
5923 #endif
5924
5925 #ifdef CONFIG_TRACER_MAX_TRACE
5926         if (t->use_max_tr && !had_max_tr) {
5927                 ret = tracing_alloc_snapshot_instance(tr);
5928                 if (ret < 0)
5929                         goto out;
5930         }
5931 #endif
5932
5933         if (t->init) {
5934                 ret = tracer_init(t, tr);
5935                 if (ret)
5936                         goto out;
5937         }
5938
5939         tr->current_trace = t;
5940         tr->current_trace->enabled++;
5941         trace_branch_enable(tr);
5942  out:
5943         mutex_unlock(&trace_types_lock);
5944
5945         return ret;
5946 }
5947
5948 static ssize_t
5949 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5950                         size_t cnt, loff_t *ppos)
5951 {
5952         struct trace_array *tr = filp->private_data;
5953         char buf[MAX_TRACER_SIZE+1];
5954         int i;
5955         size_t ret;
5956         int err;
5957
5958         ret = cnt;
5959
5960         if (cnt > MAX_TRACER_SIZE)
5961                 cnt = MAX_TRACER_SIZE;
5962
5963         if (copy_from_user(buf, ubuf, cnt))
5964                 return -EFAULT;
5965
5966         buf[cnt] = 0;
5967
5968         /* strip ending whitespace. */
5969         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5970                 buf[i] = 0;
5971
5972         err = tracing_set_tracer(tr, buf);
5973         if (err)
5974                 return err;
5975
5976         *ppos += ret;
5977
5978         return ret;
5979 }
5980
5981 static ssize_t
5982 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5983                    size_t cnt, loff_t *ppos)
5984 {
5985         char buf[64];
5986         int r;
5987
5988         r = snprintf(buf, sizeof(buf), "%ld\n",
5989                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5990         if (r > sizeof(buf))
5991                 r = sizeof(buf);
5992         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5993 }
5994
5995 static ssize_t
5996 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5997                     size_t cnt, loff_t *ppos)
5998 {
5999         unsigned long val;
6000         int ret;
6001
6002         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6003         if (ret)
6004                 return ret;
6005
6006         *ptr = val * 1000;
6007
6008         return cnt;
6009 }
6010
6011 static ssize_t
6012 tracing_thresh_read(struct file *filp, char __user *ubuf,
6013                     size_t cnt, loff_t *ppos)
6014 {
6015         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6016 }
6017
6018 static ssize_t
6019 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6020                      size_t cnt, loff_t *ppos)
6021 {
6022         struct trace_array *tr = filp->private_data;
6023         int ret;
6024
6025         mutex_lock(&trace_types_lock);
6026         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6027         if (ret < 0)
6028                 goto out;
6029
6030         if (tr->current_trace->update_thresh) {
6031                 ret = tr->current_trace->update_thresh(tr);
6032                 if (ret < 0)
6033                         goto out;
6034         }
6035
6036         ret = cnt;
6037 out:
6038         mutex_unlock(&trace_types_lock);
6039
6040         return ret;
6041 }
6042
6043 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6044
6045 static ssize_t
6046 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6047                      size_t cnt, loff_t *ppos)
6048 {
6049         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6050 }
6051
6052 static ssize_t
6053 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6054                       size_t cnt, loff_t *ppos)
6055 {
6056         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6057 }
6058
6059 #endif
6060
6061 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6062 {
6063         struct trace_array *tr = inode->i_private;
6064         struct trace_iterator *iter;
6065         int ret;
6066
6067         ret = tracing_check_open_get_tr(tr);
6068         if (ret)
6069                 return ret;
6070
6071         mutex_lock(&trace_types_lock);
6072
6073         /* create a buffer to store the information to pass to userspace */
6074         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6075         if (!iter) {
6076                 ret = -ENOMEM;
6077                 __trace_array_put(tr);
6078                 goto out;
6079         }
6080
6081         trace_seq_init(&iter->seq);
6082         iter->trace = tr->current_trace;
6083
6084         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6085                 ret = -ENOMEM;
6086                 goto fail;
6087         }
6088
6089         /* trace pipe does not show start of buffer */
6090         cpumask_setall(iter->started);
6091
6092         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6093                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6094
6095         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6096         if (trace_clocks[tr->clock_id].in_ns)
6097                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6098
6099         iter->tr = tr;
6100         iter->array_buffer = &tr->array_buffer;
6101         iter->cpu_file = tracing_get_cpu(inode);
6102         mutex_init(&iter->mutex);
6103         filp->private_data = iter;
6104
6105         if (iter->trace->pipe_open)
6106                 iter->trace->pipe_open(iter);
6107
6108         nonseekable_open(inode, filp);
6109
6110         tr->trace_ref++;
6111 out:
6112         mutex_unlock(&trace_types_lock);
6113         return ret;
6114
6115 fail:
6116         kfree(iter);
6117         __trace_array_put(tr);
6118         mutex_unlock(&trace_types_lock);
6119         return ret;
6120 }
6121
6122 static int tracing_release_pipe(struct inode *inode, struct file *file)
6123 {
6124         struct trace_iterator *iter = file->private_data;
6125         struct trace_array *tr = inode->i_private;
6126
6127         mutex_lock(&trace_types_lock);
6128
6129         tr->trace_ref--;
6130
6131         if (iter->trace->pipe_close)
6132                 iter->trace->pipe_close(iter);
6133
6134         mutex_unlock(&trace_types_lock);
6135
6136         free_cpumask_var(iter->started);
6137         mutex_destroy(&iter->mutex);
6138         kfree(iter);
6139
6140         trace_array_put(tr);
6141
6142         return 0;
6143 }
6144
6145 static __poll_t
6146 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6147 {
6148         struct trace_array *tr = iter->tr;
6149
6150         /* Iterators are static, they should be filled or empty */
6151         if (trace_buffer_iter(iter, iter->cpu_file))
6152                 return EPOLLIN | EPOLLRDNORM;
6153
6154         if (tr->trace_flags & TRACE_ITER_BLOCK)
6155                 /*
6156                  * Always select as readable when in blocking mode
6157                  */
6158                 return EPOLLIN | EPOLLRDNORM;
6159         else
6160                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6161                                              filp, poll_table);
6162 }
6163
6164 static __poll_t
6165 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6166 {
6167         struct trace_iterator *iter = filp->private_data;
6168
6169         return trace_poll(iter, filp, poll_table);
6170 }
6171
6172 /* Must be called with iter->mutex held. */
6173 static int tracing_wait_pipe(struct file *filp)
6174 {
6175         struct trace_iterator *iter = filp->private_data;
6176         int ret;
6177
6178         while (trace_empty(iter)) {
6179
6180                 if ((filp->f_flags & O_NONBLOCK)) {
6181                         return -EAGAIN;
6182                 }
6183
6184                 /*
6185                  * We block until we read something and tracing is disabled.
6186                  * We still block if tracing is disabled, but we have never
6187                  * read anything. This allows a user to cat this file, and
6188                  * then enable tracing. But after we have read something,
6189                  * we give an EOF when tracing is again disabled.
6190                  *
6191                  * iter->pos will be 0 if we haven't read anything.
6192                  */
6193                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6194                         break;
6195
6196                 mutex_unlock(&iter->mutex);
6197
6198                 ret = wait_on_pipe(iter, 0);
6199
6200                 mutex_lock(&iter->mutex);
6201
6202                 if (ret)
6203                         return ret;
6204         }
6205
6206         return 1;
6207 }
6208
6209 /*
6210  * Consumer reader.
6211  */
6212 static ssize_t
6213 tracing_read_pipe(struct file *filp, char __user *ubuf,
6214                   size_t cnt, loff_t *ppos)
6215 {
6216         struct trace_iterator *iter = filp->private_data;
6217         ssize_t sret;
6218
6219         /*
6220          * Avoid more than one consumer on a single file descriptor
6221          * This is just a matter of traces coherency, the ring buffer itself
6222          * is protected.
6223          */
6224         mutex_lock(&iter->mutex);
6225
6226         /* return any leftover data */
6227         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6228         if (sret != -EBUSY)
6229                 goto out;
6230
6231         trace_seq_init(&iter->seq);
6232
6233         if (iter->trace->read) {
6234                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6235                 if (sret)
6236                         goto out;
6237         }
6238
6239 waitagain:
6240         sret = tracing_wait_pipe(filp);
6241         if (sret <= 0)
6242                 goto out;
6243
6244         /* stop when tracing is finished */
6245         if (trace_empty(iter)) {
6246                 sret = 0;
6247                 goto out;
6248         }
6249
6250         if (cnt >= PAGE_SIZE)
6251                 cnt = PAGE_SIZE - 1;
6252
6253         /* reset all but tr, trace, and overruns */
6254         memset(&iter->seq, 0,
6255                sizeof(struct trace_iterator) -
6256                offsetof(struct trace_iterator, seq));
6257         cpumask_clear(iter->started);
6258         trace_seq_init(&iter->seq);
6259         iter->pos = -1;
6260
6261         trace_event_read_lock();
6262         trace_access_lock(iter->cpu_file);
6263         while (trace_find_next_entry_inc(iter) != NULL) {
6264                 enum print_line_t ret;
6265                 int save_len = iter->seq.seq.len;
6266
6267                 ret = print_trace_line(iter);
6268                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6269                         /* don't print partial lines */
6270                         iter->seq.seq.len = save_len;
6271                         break;
6272                 }
6273                 if (ret != TRACE_TYPE_NO_CONSUME)
6274                         trace_consume(iter);
6275
6276                 if (trace_seq_used(&iter->seq) >= cnt)
6277                         break;
6278
6279                 /*
6280                  * Setting the full flag means we reached the trace_seq buffer
6281                  * size and we should leave by partial output condition above.
6282                  * One of the trace_seq_* functions is not used properly.
6283                  */
6284                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6285                           iter->ent->type);
6286         }
6287         trace_access_unlock(iter->cpu_file);
6288         trace_event_read_unlock();
6289
6290         /* Now copy what we have to the user */
6291         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6292         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6293                 trace_seq_init(&iter->seq);
6294
6295         /*
6296          * If there was nothing to send to user, in spite of consuming trace
6297          * entries, go back to wait for more entries.
6298          */
6299         if (sret == -EBUSY)
6300                 goto waitagain;
6301
6302 out:
6303         mutex_unlock(&iter->mutex);
6304
6305         return sret;
6306 }
6307
6308 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6309                                      unsigned int idx)
6310 {
6311         __free_page(spd->pages[idx]);
6312 }
6313
6314 static size_t
6315 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6316 {
6317         size_t count;
6318         int save_len;
6319         int ret;
6320
6321         /* Seq buffer is page-sized, exactly what we need. */
6322         for (;;) {
6323                 save_len = iter->seq.seq.len;
6324                 ret = print_trace_line(iter);
6325
6326                 if (trace_seq_has_overflowed(&iter->seq)) {
6327                         iter->seq.seq.len = save_len;
6328                         break;
6329                 }
6330
6331                 /*
6332                  * This should not be hit, because it should only
6333                  * be set if the iter->seq overflowed. But check it
6334                  * anyway to be safe.
6335                  */
6336                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6337                         iter->seq.seq.len = save_len;
6338                         break;
6339                 }
6340
6341                 count = trace_seq_used(&iter->seq) - save_len;
6342                 if (rem < count) {
6343                         rem = 0;
6344                         iter->seq.seq.len = save_len;
6345                         break;
6346                 }
6347
6348                 if (ret != TRACE_TYPE_NO_CONSUME)
6349                         trace_consume(iter);
6350                 rem -= count;
6351                 if (!trace_find_next_entry_inc(iter))   {
6352                         rem = 0;
6353                         iter->ent = NULL;
6354                         break;
6355                 }
6356         }
6357
6358         return rem;
6359 }
6360
6361 static ssize_t tracing_splice_read_pipe(struct file *filp,
6362                                         loff_t *ppos,
6363                                         struct pipe_inode_info *pipe,
6364                                         size_t len,
6365                                         unsigned int flags)
6366 {
6367         struct page *pages_def[PIPE_DEF_BUFFERS];
6368         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6369         struct trace_iterator *iter = filp->private_data;
6370         struct splice_pipe_desc spd = {
6371                 .pages          = pages_def,
6372                 .partial        = partial_def,
6373                 .nr_pages       = 0, /* This gets updated below. */
6374                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6375                 .ops            = &default_pipe_buf_ops,
6376                 .spd_release    = tracing_spd_release_pipe,
6377         };
6378         ssize_t ret;
6379         size_t rem;
6380         unsigned int i;
6381
6382         if (splice_grow_spd(pipe, &spd))
6383                 return -ENOMEM;
6384
6385         mutex_lock(&iter->mutex);
6386
6387         if (iter->trace->splice_read) {
6388                 ret = iter->trace->splice_read(iter, filp,
6389                                                ppos, pipe, len, flags);
6390                 if (ret)
6391                         goto out_err;
6392         }
6393
6394         ret = tracing_wait_pipe(filp);
6395         if (ret <= 0)
6396                 goto out_err;
6397
6398         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6399                 ret = -EFAULT;
6400                 goto out_err;
6401         }
6402
6403         trace_event_read_lock();
6404         trace_access_lock(iter->cpu_file);
6405
6406         /* Fill as many pages as possible. */
6407         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6408                 spd.pages[i] = alloc_page(GFP_KERNEL);
6409                 if (!spd.pages[i])
6410                         break;
6411
6412                 rem = tracing_fill_pipe_page(rem, iter);
6413
6414                 /* Copy the data into the page, so we can start over. */
6415                 ret = trace_seq_to_buffer(&iter->seq,
6416                                           page_address(spd.pages[i]),
6417                                           trace_seq_used(&iter->seq));
6418                 if (ret < 0) {
6419                         __free_page(spd.pages[i]);
6420                         break;
6421                 }
6422                 spd.partial[i].offset = 0;
6423                 spd.partial[i].len = trace_seq_used(&iter->seq);
6424
6425                 trace_seq_init(&iter->seq);
6426         }
6427
6428         trace_access_unlock(iter->cpu_file);
6429         trace_event_read_unlock();
6430         mutex_unlock(&iter->mutex);
6431
6432         spd.nr_pages = i;
6433
6434         if (i)
6435                 ret = splice_to_pipe(pipe, &spd);
6436         else
6437                 ret = 0;
6438 out:
6439         splice_shrink_spd(&spd);
6440         return ret;
6441
6442 out_err:
6443         mutex_unlock(&iter->mutex);
6444         goto out;
6445 }
6446
6447 static ssize_t
6448 tracing_entries_read(struct file *filp, char __user *ubuf,
6449                      size_t cnt, loff_t *ppos)
6450 {
6451         struct inode *inode = file_inode(filp);
6452         struct trace_array *tr = inode->i_private;
6453         int cpu = tracing_get_cpu(inode);
6454         char buf[64];
6455         int r = 0;
6456         ssize_t ret;
6457
6458         mutex_lock(&trace_types_lock);
6459
6460         if (cpu == RING_BUFFER_ALL_CPUS) {
6461                 int cpu, buf_size_same;
6462                 unsigned long size;
6463
6464                 size = 0;
6465                 buf_size_same = 1;
6466                 /* check if all cpu sizes are same */
6467                 for_each_tracing_cpu(cpu) {
6468                         /* fill in the size from first enabled cpu */
6469                         if (size == 0)
6470                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6471                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6472                                 buf_size_same = 0;
6473                                 break;
6474                         }
6475                 }
6476
6477                 if (buf_size_same) {
6478                         if (!ring_buffer_expanded)
6479                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6480                                             size >> 10,
6481                                             trace_buf_size >> 10);
6482                         else
6483                                 r = sprintf(buf, "%lu\n", size >> 10);
6484                 } else
6485                         r = sprintf(buf, "X\n");
6486         } else
6487                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6488
6489         mutex_unlock(&trace_types_lock);
6490
6491         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6492         return ret;
6493 }
6494
6495 static ssize_t
6496 tracing_entries_write(struct file *filp, const char __user *ubuf,
6497                       size_t cnt, loff_t *ppos)
6498 {
6499         struct inode *inode = file_inode(filp);
6500         struct trace_array *tr = inode->i_private;
6501         unsigned long val;
6502         int ret;
6503
6504         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6505         if (ret)
6506                 return ret;
6507
6508         /* must have at least 1 entry */
6509         if (!val)
6510                 return -EINVAL;
6511
6512         /* value is in KB */
6513         val <<= 10;
6514         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6515         if (ret < 0)
6516                 return ret;
6517
6518         *ppos += cnt;
6519
6520         return cnt;
6521 }
6522
6523 static ssize_t
6524 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6525                                 size_t cnt, loff_t *ppos)
6526 {
6527         struct trace_array *tr = filp->private_data;
6528         char buf[64];
6529         int r, cpu;
6530         unsigned long size = 0, expanded_size = 0;
6531
6532         mutex_lock(&trace_types_lock);
6533         for_each_tracing_cpu(cpu) {
6534                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6535                 if (!ring_buffer_expanded)
6536                         expanded_size += trace_buf_size >> 10;
6537         }
6538         if (ring_buffer_expanded)
6539                 r = sprintf(buf, "%lu\n", size);
6540         else
6541                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6542         mutex_unlock(&trace_types_lock);
6543
6544         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6545 }
6546
6547 static ssize_t
6548 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6549                           size_t cnt, loff_t *ppos)
6550 {
6551         /*
6552          * There is no need to read what the user has written, this function
6553          * is just to make sure that there is no error when "echo" is used
6554          */
6555
6556         *ppos += cnt;
6557
6558         return cnt;
6559 }
6560
6561 static int
6562 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6563 {
6564         struct trace_array *tr = inode->i_private;
6565
6566         /* disable tracing ? */
6567         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6568                 tracer_tracing_off(tr);
6569         /* resize the ring buffer to 0 */
6570         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6571
6572         trace_array_put(tr);
6573
6574         return 0;
6575 }
6576
6577 static ssize_t
6578 tracing_mark_write(struct file *filp, const char __user *ubuf,
6579                                         size_t cnt, loff_t *fpos)
6580 {
6581         struct trace_array *tr = filp->private_data;
6582         struct ring_buffer_event *event;
6583         enum event_trigger_type tt = ETT_NONE;
6584         struct trace_buffer *buffer;
6585         struct print_entry *entry;
6586         unsigned long irq_flags;
6587         ssize_t written;
6588         int size;
6589         int len;
6590
6591 /* Used in tracing_mark_raw_write() as well */
6592 #define FAULTED_STR "<faulted>"
6593 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6594
6595         if (tracing_disabled)
6596                 return -EINVAL;
6597
6598         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6599                 return -EINVAL;
6600
6601         if (cnt > TRACE_BUF_SIZE)
6602                 cnt = TRACE_BUF_SIZE;
6603
6604         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6605
6606         local_save_flags(irq_flags);
6607         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6608
6609         /* If less than "<faulted>", then make sure we can still add that */
6610         if (cnt < FAULTED_SIZE)
6611                 size += FAULTED_SIZE - cnt;
6612
6613         buffer = tr->array_buffer.buffer;
6614         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6615                                             irq_flags, preempt_count());
6616         if (unlikely(!event))
6617                 /* Ring buffer disabled, return as if not open for write */
6618                 return -EBADF;
6619
6620         entry = ring_buffer_event_data(event);
6621         entry->ip = _THIS_IP_;
6622
6623         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6624         if (len) {
6625                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6626                 cnt = FAULTED_SIZE;
6627                 written = -EFAULT;
6628         } else
6629                 written = cnt;
6630         len = cnt;
6631
6632         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6633                 /* do not add \n before testing triggers, but add \0 */
6634                 entry->buf[cnt] = '\0';
6635                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6636         }
6637
6638         if (entry->buf[cnt - 1] != '\n') {
6639                 entry->buf[cnt] = '\n';
6640                 entry->buf[cnt + 1] = '\0';
6641         } else
6642                 entry->buf[cnt] = '\0';
6643
6644         __buffer_unlock_commit(buffer, event);
6645
6646         if (tt)
6647                 event_triggers_post_call(tr->trace_marker_file, tt);
6648
6649         if (written > 0)
6650                 *fpos += written;
6651
6652         return written;
6653 }
6654
6655 /* Limit it for now to 3K (including tag) */
6656 #define RAW_DATA_MAX_SIZE (1024*3)
6657
6658 static ssize_t
6659 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6660                                         size_t cnt, loff_t *fpos)
6661 {
6662         struct trace_array *tr = filp->private_data;
6663         struct ring_buffer_event *event;
6664         struct trace_buffer *buffer;
6665         struct raw_data_entry *entry;
6666         unsigned long irq_flags;
6667         ssize_t written;
6668         int size;
6669         int len;
6670
6671 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6672
6673         if (tracing_disabled)
6674                 return -EINVAL;
6675
6676         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6677                 return -EINVAL;
6678
6679         /* The marker must at least have a tag id */
6680         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6681                 return -EINVAL;
6682
6683         if (cnt > TRACE_BUF_SIZE)
6684                 cnt = TRACE_BUF_SIZE;
6685
6686         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6687
6688         local_save_flags(irq_flags);
6689         size = sizeof(*entry) + cnt;
6690         if (cnt < FAULT_SIZE_ID)
6691                 size += FAULT_SIZE_ID - cnt;
6692
6693         buffer = tr->array_buffer.buffer;
6694         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6695                                             irq_flags, preempt_count());
6696         if (!event)
6697                 /* Ring buffer disabled, return as if not open for write */
6698                 return -EBADF;
6699
6700         entry = ring_buffer_event_data(event);
6701
6702         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6703         if (len) {
6704                 entry->id = -1;
6705                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6706                 written = -EFAULT;
6707         } else
6708                 written = cnt;
6709
6710         __buffer_unlock_commit(buffer, event);
6711
6712         if (written > 0)
6713                 *fpos += written;
6714
6715         return written;
6716 }
6717
6718 static int tracing_clock_show(struct seq_file *m, void *v)
6719 {
6720         struct trace_array *tr = m->private;
6721         int i;
6722
6723         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6724                 seq_printf(m,
6725                         "%s%s%s%s", i ? " " : "",
6726                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6727                         i == tr->clock_id ? "]" : "");
6728         seq_putc(m, '\n');
6729
6730         return 0;
6731 }
6732
6733 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6734 {
6735         int i;
6736
6737         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6738                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6739                         break;
6740         }
6741         if (i == ARRAY_SIZE(trace_clocks))
6742                 return -EINVAL;
6743
6744         mutex_lock(&trace_types_lock);
6745
6746         tr->clock_id = i;
6747
6748         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6749
6750         /*
6751          * New clock may not be consistent with the previous clock.
6752          * Reset the buffer so that it doesn't have incomparable timestamps.
6753          */
6754         tracing_reset_online_cpus(&tr->array_buffer);
6755
6756 #ifdef CONFIG_TRACER_MAX_TRACE
6757         if (tr->max_buffer.buffer)
6758                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6759         tracing_reset_online_cpus(&tr->max_buffer);
6760 #endif
6761
6762         mutex_unlock(&trace_types_lock);
6763
6764         return 0;
6765 }
6766
6767 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6768                                    size_t cnt, loff_t *fpos)
6769 {
6770         struct seq_file *m = filp->private_data;
6771         struct trace_array *tr = m->private;
6772         char buf[64];
6773         const char *clockstr;
6774         int ret;
6775
6776         if (cnt >= sizeof(buf))
6777                 return -EINVAL;
6778
6779         if (copy_from_user(buf, ubuf, cnt))
6780                 return -EFAULT;
6781
6782         buf[cnt] = 0;
6783
6784         clockstr = strstrip(buf);
6785
6786         ret = tracing_set_clock(tr, clockstr);
6787         if (ret)
6788                 return ret;
6789
6790         *fpos += cnt;
6791
6792         return cnt;
6793 }
6794
6795 static int tracing_clock_open(struct inode *inode, struct file *file)
6796 {
6797         struct trace_array *tr = inode->i_private;
6798         int ret;
6799
6800         ret = tracing_check_open_get_tr(tr);
6801         if (ret)
6802                 return ret;
6803
6804         ret = single_open(file, tracing_clock_show, inode->i_private);
6805         if (ret < 0)
6806                 trace_array_put(tr);
6807
6808         return ret;
6809 }
6810
6811 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6812 {
6813         struct trace_array *tr = m->private;
6814
6815         mutex_lock(&trace_types_lock);
6816
6817         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6818                 seq_puts(m, "delta [absolute]\n");
6819         else
6820                 seq_puts(m, "[delta] absolute\n");
6821
6822         mutex_unlock(&trace_types_lock);
6823
6824         return 0;
6825 }
6826
6827 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6828 {
6829         struct trace_array *tr = inode->i_private;
6830         int ret;
6831
6832         ret = tracing_check_open_get_tr(tr);
6833         if (ret)
6834                 return ret;
6835
6836         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6837         if (ret < 0)
6838                 trace_array_put(tr);
6839
6840         return ret;
6841 }
6842
6843 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6844 {
6845         int ret = 0;
6846
6847         mutex_lock(&trace_types_lock);
6848
6849         if (abs && tr->time_stamp_abs_ref++)
6850                 goto out;
6851
6852         if (!abs) {
6853                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6854                         ret = -EINVAL;
6855                         goto out;
6856                 }
6857
6858                 if (--tr->time_stamp_abs_ref)
6859                         goto out;
6860         }
6861
6862         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6863
6864 #ifdef CONFIG_TRACER_MAX_TRACE
6865         if (tr->max_buffer.buffer)
6866                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6867 #endif
6868  out:
6869         mutex_unlock(&trace_types_lock);
6870
6871         return ret;
6872 }
6873
6874 struct ftrace_buffer_info {
6875         struct trace_iterator   iter;
6876         void                    *spare;
6877         unsigned int            spare_cpu;
6878         unsigned int            read;
6879 };
6880
6881 #ifdef CONFIG_TRACER_SNAPSHOT
6882 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6883 {
6884         struct trace_array *tr = inode->i_private;
6885         struct trace_iterator *iter;
6886         struct seq_file *m;
6887         int ret;
6888
6889         ret = tracing_check_open_get_tr(tr);
6890         if (ret)
6891                 return ret;
6892
6893         if (file->f_mode & FMODE_READ) {
6894                 iter = __tracing_open(inode, file, true);
6895                 if (IS_ERR(iter))
6896                         ret = PTR_ERR(iter);
6897         } else {
6898                 /* Writes still need the seq_file to hold the private data */
6899                 ret = -ENOMEM;
6900                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6901                 if (!m)
6902                         goto out;
6903                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6904                 if (!iter) {
6905                         kfree(m);
6906                         goto out;
6907                 }
6908                 ret = 0;
6909
6910                 iter->tr = tr;
6911                 iter->array_buffer = &tr->max_buffer;
6912                 iter->cpu_file = tracing_get_cpu(inode);
6913                 m->private = iter;
6914                 file->private_data = m;
6915         }
6916 out:
6917         if (ret < 0)
6918                 trace_array_put(tr);
6919
6920         return ret;
6921 }
6922
6923 static ssize_t
6924 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6925                        loff_t *ppos)
6926 {
6927         struct seq_file *m = filp->private_data;
6928         struct trace_iterator *iter = m->private;
6929         struct trace_array *tr = iter->tr;
6930         unsigned long val;
6931         int ret;
6932
6933         ret = tracing_update_buffers();
6934         if (ret < 0)
6935                 return ret;
6936
6937         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6938         if (ret)
6939                 return ret;
6940
6941         mutex_lock(&trace_types_lock);
6942
6943         if (tr->current_trace->use_max_tr) {
6944                 ret = -EBUSY;
6945                 goto out;
6946         }
6947
6948         arch_spin_lock(&tr->max_lock);
6949         if (tr->cond_snapshot)
6950                 ret = -EBUSY;
6951         arch_spin_unlock(&tr->max_lock);
6952         if (ret)
6953                 goto out;
6954
6955         switch (val) {
6956         case 0:
6957                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6958                         ret = -EINVAL;
6959                         break;
6960                 }
6961                 if (tr->allocated_snapshot)
6962                         free_snapshot(tr);
6963                 break;
6964         case 1:
6965 /* Only allow per-cpu swap if the ring buffer supports it */
6966 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6967                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6968                         ret = -EINVAL;
6969                         break;
6970                 }
6971 #endif
6972                 if (tr->allocated_snapshot)
6973                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6974                                         &tr->array_buffer, iter->cpu_file);
6975                 else
6976                         ret = tracing_alloc_snapshot_instance(tr);
6977                 if (ret < 0)
6978                         break;
6979                 local_irq_disable();
6980                 /* Now, we're going to swap */
6981                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6982                         update_max_tr(tr, current, smp_processor_id(), NULL);
6983                 else
6984                         update_max_tr_single(tr, current, iter->cpu_file);
6985                 local_irq_enable();
6986                 break;
6987         default:
6988                 if (tr->allocated_snapshot) {
6989                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6990                                 tracing_reset_online_cpus(&tr->max_buffer);
6991                         else
6992                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6993                 }
6994                 break;
6995         }
6996
6997         if (ret >= 0) {
6998                 *ppos += cnt;
6999                 ret = cnt;
7000         }
7001 out:
7002         mutex_unlock(&trace_types_lock);
7003         return ret;
7004 }
7005
7006 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7007 {
7008         struct seq_file *m = file->private_data;
7009         int ret;
7010
7011         ret = tracing_release(inode, file);
7012
7013         if (file->f_mode & FMODE_READ)
7014                 return ret;
7015
7016         /* If write only, the seq_file is just a stub */
7017         if (m)
7018                 kfree(m->private);
7019         kfree(m);
7020
7021         return 0;
7022 }
7023
7024 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7025 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7026                                     size_t count, loff_t *ppos);
7027 static int tracing_buffers_release(struct inode *inode, struct file *file);
7028 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7029                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7030
7031 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7032 {
7033         struct ftrace_buffer_info *info;
7034         int ret;
7035
7036         /* The following checks for tracefs lockdown */
7037         ret = tracing_buffers_open(inode, filp);
7038         if (ret < 0)
7039                 return ret;
7040
7041         info = filp->private_data;
7042
7043         if (info->iter.trace->use_max_tr) {
7044                 tracing_buffers_release(inode, filp);
7045                 return -EBUSY;
7046         }
7047
7048         info->iter.snapshot = true;
7049         info->iter.array_buffer = &info->iter.tr->max_buffer;
7050
7051         return ret;
7052 }
7053
7054 #endif /* CONFIG_TRACER_SNAPSHOT */
7055
7056
7057 static const struct file_operations tracing_thresh_fops = {
7058         .open           = tracing_open_generic,
7059         .read           = tracing_thresh_read,
7060         .write          = tracing_thresh_write,
7061         .llseek         = generic_file_llseek,
7062 };
7063
7064 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7065 static const struct file_operations tracing_max_lat_fops = {
7066         .open           = tracing_open_generic,
7067         .read           = tracing_max_lat_read,
7068         .write          = tracing_max_lat_write,
7069         .llseek         = generic_file_llseek,
7070 };
7071 #endif
7072
7073 static const struct file_operations set_tracer_fops = {
7074         .open           = tracing_open_generic,
7075         .read           = tracing_set_trace_read,
7076         .write          = tracing_set_trace_write,
7077         .llseek         = generic_file_llseek,
7078 };
7079
7080 static const struct file_operations tracing_pipe_fops = {
7081         .open           = tracing_open_pipe,
7082         .poll           = tracing_poll_pipe,
7083         .read           = tracing_read_pipe,
7084         .splice_read    = tracing_splice_read_pipe,
7085         .release        = tracing_release_pipe,
7086         .llseek         = no_llseek,
7087 };
7088
7089 static const struct file_operations tracing_entries_fops = {
7090         .open           = tracing_open_generic_tr,
7091         .read           = tracing_entries_read,
7092         .write          = tracing_entries_write,
7093         .llseek         = generic_file_llseek,
7094         .release        = tracing_release_generic_tr,
7095 };
7096
7097 static const struct file_operations tracing_total_entries_fops = {
7098         .open           = tracing_open_generic_tr,
7099         .read           = tracing_total_entries_read,
7100         .llseek         = generic_file_llseek,
7101         .release        = tracing_release_generic_tr,
7102 };
7103
7104 static const struct file_operations tracing_free_buffer_fops = {
7105         .open           = tracing_open_generic_tr,
7106         .write          = tracing_free_buffer_write,
7107         .release        = tracing_free_buffer_release,
7108 };
7109
7110 static const struct file_operations tracing_mark_fops = {
7111         .open           = tracing_open_generic_tr,
7112         .write          = tracing_mark_write,
7113         .llseek         = generic_file_llseek,
7114         .release        = tracing_release_generic_tr,
7115 };
7116
7117 static const struct file_operations tracing_mark_raw_fops = {
7118         .open           = tracing_open_generic_tr,
7119         .write          = tracing_mark_raw_write,
7120         .llseek         = generic_file_llseek,
7121         .release        = tracing_release_generic_tr,
7122 };
7123
7124 static const struct file_operations trace_clock_fops = {
7125         .open           = tracing_clock_open,
7126         .read           = seq_read,
7127         .llseek         = seq_lseek,
7128         .release        = tracing_single_release_tr,
7129         .write          = tracing_clock_write,
7130 };
7131
7132 static const struct file_operations trace_time_stamp_mode_fops = {
7133         .open           = tracing_time_stamp_mode_open,
7134         .read           = seq_read,
7135         .llseek         = seq_lseek,
7136         .release        = tracing_single_release_tr,
7137 };
7138
7139 #ifdef CONFIG_TRACER_SNAPSHOT
7140 static const struct file_operations snapshot_fops = {
7141         .open           = tracing_snapshot_open,
7142         .read           = seq_read,
7143         .write          = tracing_snapshot_write,
7144         .llseek         = tracing_lseek,
7145         .release        = tracing_snapshot_release,
7146 };
7147
7148 static const struct file_operations snapshot_raw_fops = {
7149         .open           = snapshot_raw_open,
7150         .read           = tracing_buffers_read,
7151         .release        = tracing_buffers_release,
7152         .splice_read    = tracing_buffers_splice_read,
7153         .llseek         = no_llseek,
7154 };
7155
7156 #endif /* CONFIG_TRACER_SNAPSHOT */
7157
7158 #define TRACING_LOG_ERRS_MAX    8
7159 #define TRACING_LOG_LOC_MAX     128
7160
7161 #define CMD_PREFIX "  Command: "
7162
7163 struct err_info {
7164         const char      **errs; /* ptr to loc-specific array of err strings */
7165         u8              type;   /* index into errs -> specific err string */
7166         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7167         u64             ts;
7168 };
7169
7170 struct tracing_log_err {
7171         struct list_head        list;
7172         struct err_info         info;
7173         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7174         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7175 };
7176
7177 static DEFINE_MUTEX(tracing_err_log_lock);
7178
7179 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7180 {
7181         struct tracing_log_err *err;
7182
7183         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7184                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7185                 if (!err)
7186                         err = ERR_PTR(-ENOMEM);
7187                 tr->n_err_log_entries++;
7188
7189                 return err;
7190         }
7191
7192         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7193         list_del(&err->list);
7194
7195         return err;
7196 }
7197
7198 /**
7199  * err_pos - find the position of a string within a command for error careting
7200  * @cmd: The tracing command that caused the error
7201  * @str: The string to position the caret at within @cmd
7202  *
7203  * Finds the position of the first occurence of @str within @cmd.  The
7204  * return value can be passed to tracing_log_err() for caret placement
7205  * within @cmd.
7206  *
7207  * Returns the index within @cmd of the first occurence of @str or 0
7208  * if @str was not found.
7209  */
7210 unsigned int err_pos(char *cmd, const char *str)
7211 {
7212         char *found;
7213
7214         if (WARN_ON(!strlen(cmd)))
7215                 return 0;
7216
7217         found = strstr(cmd, str);
7218         if (found)
7219                 return found - cmd;
7220
7221         return 0;
7222 }
7223
7224 /**
7225  * tracing_log_err - write an error to the tracing error log
7226  * @tr: The associated trace array for the error (NULL for top level array)
7227  * @loc: A string describing where the error occurred
7228  * @cmd: The tracing command that caused the error
7229  * @errs: The array of loc-specific static error strings
7230  * @type: The index into errs[], which produces the specific static err string
7231  * @pos: The position the caret should be placed in the cmd
7232  *
7233  * Writes an error into tracing/error_log of the form:
7234  *
7235  * <loc>: error: <text>
7236  *   Command: <cmd>
7237  *              ^
7238  *
7239  * tracing/error_log is a small log file containing the last
7240  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7241  * unless there has been a tracing error, and the error log can be
7242  * cleared and have its memory freed by writing the empty string in
7243  * truncation mode to it i.e. echo > tracing/error_log.
7244  *
7245  * NOTE: the @errs array along with the @type param are used to
7246  * produce a static error string - this string is not copied and saved
7247  * when the error is logged - only a pointer to it is saved.  See
7248  * existing callers for examples of how static strings are typically
7249  * defined for use with tracing_log_err().
7250  */
7251 void tracing_log_err(struct trace_array *tr,
7252                      const char *loc, const char *cmd,
7253                      const char **errs, u8 type, u8 pos)
7254 {
7255         struct tracing_log_err *err;
7256
7257         if (!tr)
7258                 tr = &global_trace;
7259
7260         mutex_lock(&tracing_err_log_lock);
7261         err = get_tracing_log_err(tr);
7262         if (PTR_ERR(err) == -ENOMEM) {
7263                 mutex_unlock(&tracing_err_log_lock);
7264                 return;
7265         }
7266
7267         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7268         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7269
7270         err->info.errs = errs;
7271         err->info.type = type;
7272         err->info.pos = pos;
7273         err->info.ts = local_clock();
7274
7275         list_add_tail(&err->list, &tr->err_log);
7276         mutex_unlock(&tracing_err_log_lock);
7277 }
7278
7279 static void clear_tracing_err_log(struct trace_array *tr)
7280 {
7281         struct tracing_log_err *err, *next;
7282
7283         mutex_lock(&tracing_err_log_lock);
7284         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7285                 list_del(&err->list);
7286                 kfree(err);
7287         }
7288
7289         tr->n_err_log_entries = 0;
7290         mutex_unlock(&tracing_err_log_lock);
7291 }
7292
7293 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7294 {
7295         struct trace_array *tr = m->private;
7296
7297         mutex_lock(&tracing_err_log_lock);
7298
7299         return seq_list_start(&tr->err_log, *pos);
7300 }
7301
7302 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7303 {
7304         struct trace_array *tr = m->private;
7305
7306         return seq_list_next(v, &tr->err_log, pos);
7307 }
7308
7309 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7310 {
7311         mutex_unlock(&tracing_err_log_lock);
7312 }
7313
7314 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7315 {
7316         u8 i;
7317
7318         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7319                 seq_putc(m, ' ');
7320         for (i = 0; i < pos; i++)
7321                 seq_putc(m, ' ');
7322         seq_puts(m, "^\n");
7323 }
7324
7325 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7326 {
7327         struct tracing_log_err *err = v;
7328
7329         if (err) {
7330                 const char *err_text = err->info.errs[err->info.type];
7331                 u64 sec = err->info.ts;
7332                 u32 nsec;
7333
7334                 nsec = do_div(sec, NSEC_PER_SEC);
7335                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7336                            err->loc, err_text);
7337                 seq_printf(m, "%s", err->cmd);
7338                 tracing_err_log_show_pos(m, err->info.pos);
7339         }
7340
7341         return 0;
7342 }
7343
7344 static const struct seq_operations tracing_err_log_seq_ops = {
7345         .start  = tracing_err_log_seq_start,
7346         .next   = tracing_err_log_seq_next,
7347         .stop   = tracing_err_log_seq_stop,
7348         .show   = tracing_err_log_seq_show
7349 };
7350
7351 static int tracing_err_log_open(struct inode *inode, struct file *file)
7352 {
7353         struct trace_array *tr = inode->i_private;
7354         int ret = 0;
7355
7356         ret = tracing_check_open_get_tr(tr);
7357         if (ret)
7358                 return ret;
7359
7360         /* If this file was opened for write, then erase contents */
7361         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7362                 clear_tracing_err_log(tr);
7363
7364         if (file->f_mode & FMODE_READ) {
7365                 ret = seq_open(file, &tracing_err_log_seq_ops);
7366                 if (!ret) {
7367                         struct seq_file *m = file->private_data;
7368                         m->private = tr;
7369                 } else {
7370                         trace_array_put(tr);
7371                 }
7372         }
7373         return ret;
7374 }
7375
7376 static ssize_t tracing_err_log_write(struct file *file,
7377                                      const char __user *buffer,
7378                                      size_t count, loff_t *ppos)
7379 {
7380         return count;
7381 }
7382
7383 static int tracing_err_log_release(struct inode *inode, struct file *file)
7384 {
7385         struct trace_array *tr = inode->i_private;
7386
7387         trace_array_put(tr);
7388
7389         if (file->f_mode & FMODE_READ)
7390                 seq_release(inode, file);
7391
7392         return 0;
7393 }
7394
7395 static const struct file_operations tracing_err_log_fops = {
7396         .open           = tracing_err_log_open,
7397         .write          = tracing_err_log_write,
7398         .read           = seq_read,
7399         .llseek         = seq_lseek,
7400         .release        = tracing_err_log_release,
7401 };
7402
7403 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7404 {
7405         struct trace_array *tr = inode->i_private;
7406         struct ftrace_buffer_info *info;
7407         int ret;
7408
7409         ret = tracing_check_open_get_tr(tr);
7410         if (ret)
7411                 return ret;
7412
7413         info = kzalloc(sizeof(*info), GFP_KERNEL);
7414         if (!info) {
7415                 trace_array_put(tr);
7416                 return -ENOMEM;
7417         }
7418
7419         mutex_lock(&trace_types_lock);
7420
7421         info->iter.tr           = tr;
7422         info->iter.cpu_file     = tracing_get_cpu(inode);
7423         info->iter.trace        = tr->current_trace;
7424         info->iter.array_buffer = &tr->array_buffer;
7425         info->spare             = NULL;
7426         /* Force reading ring buffer for first read */
7427         info->read              = (unsigned int)-1;
7428
7429         filp->private_data = info;
7430
7431         tr->trace_ref++;
7432
7433         mutex_unlock(&trace_types_lock);
7434
7435         ret = nonseekable_open(inode, filp);
7436         if (ret < 0)
7437                 trace_array_put(tr);
7438
7439         return ret;
7440 }
7441
7442 static __poll_t
7443 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7444 {
7445         struct ftrace_buffer_info *info = filp->private_data;
7446         struct trace_iterator *iter = &info->iter;
7447
7448         return trace_poll(iter, filp, poll_table);
7449 }
7450
7451 static ssize_t
7452 tracing_buffers_read(struct file *filp, char __user *ubuf,
7453                      size_t count, loff_t *ppos)
7454 {
7455         struct ftrace_buffer_info *info = filp->private_data;
7456         struct trace_iterator *iter = &info->iter;
7457         ssize_t ret = 0;
7458         ssize_t size;
7459
7460         if (!count)
7461                 return 0;
7462
7463 #ifdef CONFIG_TRACER_MAX_TRACE
7464         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7465                 return -EBUSY;
7466 #endif
7467
7468         if (!info->spare) {
7469                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7470                                                           iter->cpu_file);
7471                 if (IS_ERR(info->spare)) {
7472                         ret = PTR_ERR(info->spare);
7473                         info->spare = NULL;
7474                 } else {
7475                         info->spare_cpu = iter->cpu_file;
7476                 }
7477         }
7478         if (!info->spare)
7479                 return ret;
7480
7481         /* Do we have previous read data to read? */
7482         if (info->read < PAGE_SIZE)
7483                 goto read;
7484
7485  again:
7486         trace_access_lock(iter->cpu_file);
7487         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7488                                     &info->spare,
7489                                     count,
7490                                     iter->cpu_file, 0);
7491         trace_access_unlock(iter->cpu_file);
7492
7493         if (ret < 0) {
7494                 if (trace_empty(iter)) {
7495                         if ((filp->f_flags & O_NONBLOCK))
7496                                 return -EAGAIN;
7497
7498                         ret = wait_on_pipe(iter, 0);
7499                         if (ret)
7500                                 return ret;
7501
7502                         goto again;
7503                 }
7504                 return 0;
7505         }
7506
7507         info->read = 0;
7508  read:
7509         size = PAGE_SIZE - info->read;
7510         if (size > count)
7511                 size = count;
7512
7513         ret = copy_to_user(ubuf, info->spare + info->read, size);
7514         if (ret == size)
7515                 return -EFAULT;
7516
7517         size -= ret;
7518
7519         *ppos += size;
7520         info->read += size;
7521
7522         return size;
7523 }
7524
7525 static int tracing_buffers_release(struct inode *inode, struct file *file)
7526 {
7527         struct ftrace_buffer_info *info = file->private_data;
7528         struct trace_iterator *iter = &info->iter;
7529
7530         mutex_lock(&trace_types_lock);
7531
7532         iter->tr->trace_ref--;
7533
7534         __trace_array_put(iter->tr);
7535
7536         if (info->spare)
7537                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7538                                            info->spare_cpu, info->spare);
7539         kfree(info);
7540
7541         mutex_unlock(&trace_types_lock);
7542
7543         return 0;
7544 }
7545
7546 struct buffer_ref {
7547         struct trace_buffer     *buffer;
7548         void                    *page;
7549         int                     cpu;
7550         refcount_t              refcount;
7551 };
7552
7553 static void buffer_ref_release(struct buffer_ref *ref)
7554 {
7555         if (!refcount_dec_and_test(&ref->refcount))
7556                 return;
7557         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7558         kfree(ref);
7559 }
7560
7561 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7562                                     struct pipe_buffer *buf)
7563 {
7564         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7565
7566         buffer_ref_release(ref);
7567         buf->private = 0;
7568 }
7569
7570 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7571                                 struct pipe_buffer *buf)
7572 {
7573         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7574
7575         if (refcount_read(&ref->refcount) > INT_MAX/2)
7576                 return false;
7577
7578         refcount_inc(&ref->refcount);
7579         return true;
7580 }
7581
7582 /* Pipe buffer operations for a buffer. */
7583 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7584         .release                = buffer_pipe_buf_release,
7585         .get                    = buffer_pipe_buf_get,
7586 };
7587
7588 /*
7589  * Callback from splice_to_pipe(), if we need to release some pages
7590  * at the end of the spd in case we error'ed out in filling the pipe.
7591  */
7592 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7593 {
7594         struct buffer_ref *ref =
7595                 (struct buffer_ref *)spd->partial[i].private;
7596
7597         buffer_ref_release(ref);
7598         spd->partial[i].private = 0;
7599 }
7600
7601 static ssize_t
7602 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7603                             struct pipe_inode_info *pipe, size_t len,
7604                             unsigned int flags)
7605 {
7606         struct ftrace_buffer_info *info = file->private_data;
7607         struct trace_iterator *iter = &info->iter;
7608         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7609         struct page *pages_def[PIPE_DEF_BUFFERS];
7610         struct splice_pipe_desc spd = {
7611                 .pages          = pages_def,
7612                 .partial        = partial_def,
7613                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7614                 .ops            = &buffer_pipe_buf_ops,
7615                 .spd_release    = buffer_spd_release,
7616         };
7617         struct buffer_ref *ref;
7618         int entries, i;
7619         ssize_t ret = 0;
7620
7621 #ifdef CONFIG_TRACER_MAX_TRACE
7622         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7623                 return -EBUSY;
7624 #endif
7625
7626         if (*ppos & (PAGE_SIZE - 1))
7627                 return -EINVAL;
7628
7629         if (len & (PAGE_SIZE - 1)) {
7630                 if (len < PAGE_SIZE)
7631                         return -EINVAL;
7632                 len &= PAGE_MASK;
7633         }
7634
7635         if (splice_grow_spd(pipe, &spd))
7636                 return -ENOMEM;
7637
7638  again:
7639         trace_access_lock(iter->cpu_file);
7640         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7641
7642         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7643                 struct page *page;
7644                 int r;
7645
7646                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7647                 if (!ref) {
7648                         ret = -ENOMEM;
7649                         break;
7650                 }
7651
7652                 refcount_set(&ref->refcount, 1);
7653                 ref->buffer = iter->array_buffer->buffer;
7654                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7655                 if (IS_ERR(ref->page)) {
7656                         ret = PTR_ERR(ref->page);
7657                         ref->page = NULL;
7658                         kfree(ref);
7659                         break;
7660                 }
7661                 ref->cpu = iter->cpu_file;
7662
7663                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7664                                           len, iter->cpu_file, 1);
7665                 if (r < 0) {
7666                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7667                                                    ref->page);
7668                         kfree(ref);
7669                         break;
7670                 }
7671
7672                 page = virt_to_page(ref->page);
7673
7674                 spd.pages[i] = page;
7675                 spd.partial[i].len = PAGE_SIZE;
7676                 spd.partial[i].offset = 0;
7677                 spd.partial[i].private = (unsigned long)ref;
7678                 spd.nr_pages++;
7679                 *ppos += PAGE_SIZE;
7680
7681                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7682         }
7683
7684         trace_access_unlock(iter->cpu_file);
7685         spd.nr_pages = i;
7686
7687         /* did we read anything? */
7688         if (!spd.nr_pages) {
7689                 if (ret)
7690                         goto out;
7691
7692                 ret = -EAGAIN;
7693                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7694                         goto out;
7695
7696                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7697                 if (ret)
7698                         goto out;
7699
7700                 goto again;
7701         }
7702
7703         ret = splice_to_pipe(pipe, &spd);
7704 out:
7705         splice_shrink_spd(&spd);
7706
7707         return ret;
7708 }
7709
7710 static const struct file_operations tracing_buffers_fops = {
7711         .open           = tracing_buffers_open,
7712         .read           = tracing_buffers_read,
7713         .poll           = tracing_buffers_poll,
7714         .release        = tracing_buffers_release,
7715         .splice_read    = tracing_buffers_splice_read,
7716         .llseek         = no_llseek,
7717 };
7718
7719 static ssize_t
7720 tracing_stats_read(struct file *filp, char __user *ubuf,
7721                    size_t count, loff_t *ppos)
7722 {
7723         struct inode *inode = file_inode(filp);
7724         struct trace_array *tr = inode->i_private;
7725         struct array_buffer *trace_buf = &tr->array_buffer;
7726         int cpu = tracing_get_cpu(inode);
7727         struct trace_seq *s;
7728         unsigned long cnt;
7729         unsigned long long t;
7730         unsigned long usec_rem;
7731
7732         s = kmalloc(sizeof(*s), GFP_KERNEL);
7733         if (!s)
7734                 return -ENOMEM;
7735
7736         trace_seq_init(s);
7737
7738         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7739         trace_seq_printf(s, "entries: %ld\n", cnt);
7740
7741         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7742         trace_seq_printf(s, "overrun: %ld\n", cnt);
7743
7744         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7745         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7746
7747         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7748         trace_seq_printf(s, "bytes: %ld\n", cnt);
7749
7750         if (trace_clocks[tr->clock_id].in_ns) {
7751                 /* local or global for trace_clock */
7752                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7753                 usec_rem = do_div(t, USEC_PER_SEC);
7754                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7755                                                                 t, usec_rem);
7756
7757                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7758                 usec_rem = do_div(t, USEC_PER_SEC);
7759                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7760         } else {
7761                 /* counter or tsc mode for trace_clock */
7762                 trace_seq_printf(s, "oldest event ts: %llu\n",
7763                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7764
7765                 trace_seq_printf(s, "now ts: %llu\n",
7766                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7767         }
7768
7769         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7770         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7771
7772         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7773         trace_seq_printf(s, "read events: %ld\n", cnt);
7774
7775         count = simple_read_from_buffer(ubuf, count, ppos,
7776                                         s->buffer, trace_seq_used(s));
7777
7778         kfree(s);
7779
7780         return count;
7781 }
7782
7783 static const struct file_operations tracing_stats_fops = {
7784         .open           = tracing_open_generic_tr,
7785         .read           = tracing_stats_read,
7786         .llseek         = generic_file_llseek,
7787         .release        = tracing_release_generic_tr,
7788 };
7789
7790 #ifdef CONFIG_DYNAMIC_FTRACE
7791
7792 static ssize_t
7793 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7794                   size_t cnt, loff_t *ppos)
7795 {
7796         ssize_t ret;
7797         char *buf;
7798         int r;
7799
7800         /* 256 should be plenty to hold the amount needed */
7801         buf = kmalloc(256, GFP_KERNEL);
7802         if (!buf)
7803                 return -ENOMEM;
7804
7805         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7806                       ftrace_update_tot_cnt,
7807                       ftrace_number_of_pages,
7808                       ftrace_number_of_groups);
7809
7810         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7811         kfree(buf);
7812         return ret;
7813 }
7814
7815 static const struct file_operations tracing_dyn_info_fops = {
7816         .open           = tracing_open_generic,
7817         .read           = tracing_read_dyn_info,
7818         .llseek         = generic_file_llseek,
7819 };
7820 #endif /* CONFIG_DYNAMIC_FTRACE */
7821
7822 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7823 static void
7824 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7825                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7826                 void *data)
7827 {
7828         tracing_snapshot_instance(tr);
7829 }
7830
7831 static void
7832 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7833                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7834                       void *data)
7835 {
7836         struct ftrace_func_mapper *mapper = data;
7837         long *count = NULL;
7838
7839         if (mapper)
7840                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7841
7842         if (count) {
7843
7844                 if (*count <= 0)
7845                         return;
7846
7847                 (*count)--;
7848         }
7849
7850         tracing_snapshot_instance(tr);
7851 }
7852
7853 static int
7854 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7855                       struct ftrace_probe_ops *ops, void *data)
7856 {
7857         struct ftrace_func_mapper *mapper = data;
7858         long *count = NULL;
7859
7860         seq_printf(m, "%ps:", (void *)ip);
7861
7862         seq_puts(m, "snapshot");
7863
7864         if (mapper)
7865                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7866
7867         if (count)
7868                 seq_printf(m, ":count=%ld\n", *count);
7869         else
7870                 seq_puts(m, ":unlimited\n");
7871
7872         return 0;
7873 }
7874
7875 static int
7876 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7877                      unsigned long ip, void *init_data, void **data)
7878 {
7879         struct ftrace_func_mapper *mapper = *data;
7880
7881         if (!mapper) {
7882                 mapper = allocate_ftrace_func_mapper();
7883                 if (!mapper)
7884                         return -ENOMEM;
7885                 *data = mapper;
7886         }
7887
7888         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7889 }
7890
7891 static void
7892 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7893                      unsigned long ip, void *data)
7894 {
7895         struct ftrace_func_mapper *mapper = data;
7896
7897         if (!ip) {
7898                 if (!mapper)
7899                         return;
7900                 free_ftrace_func_mapper(mapper, NULL);
7901                 return;
7902         }
7903
7904         ftrace_func_mapper_remove_ip(mapper, ip);
7905 }
7906
7907 static struct ftrace_probe_ops snapshot_probe_ops = {
7908         .func                   = ftrace_snapshot,
7909         .print                  = ftrace_snapshot_print,
7910 };
7911
7912 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7913         .func                   = ftrace_count_snapshot,
7914         .print                  = ftrace_snapshot_print,
7915         .init                   = ftrace_snapshot_init,
7916         .free                   = ftrace_snapshot_free,
7917 };
7918
7919 static int
7920 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7921                                char *glob, char *cmd, char *param, int enable)
7922 {
7923         struct ftrace_probe_ops *ops;
7924         void *count = (void *)-1;
7925         char *number;
7926         int ret;
7927
7928         if (!tr)
7929                 return -ENODEV;
7930
7931         /* hash funcs only work with set_ftrace_filter */
7932         if (!enable)
7933                 return -EINVAL;
7934
7935         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7936
7937         if (glob[0] == '!')
7938                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7939
7940         if (!param)
7941                 goto out_reg;
7942
7943         number = strsep(&param, ":");
7944
7945         if (!strlen(number))
7946                 goto out_reg;
7947
7948         /*
7949          * We use the callback data field (which is a pointer)
7950          * as our counter.
7951          */
7952         ret = kstrtoul(number, 0, (unsigned long *)&count);
7953         if (ret)
7954                 return ret;
7955
7956  out_reg:
7957         ret = tracing_alloc_snapshot_instance(tr);
7958         if (ret < 0)
7959                 goto out;
7960
7961         ret = register_ftrace_function_probe(glob, tr, ops, count);
7962
7963  out:
7964         return ret < 0 ? ret : 0;
7965 }
7966
7967 static struct ftrace_func_command ftrace_snapshot_cmd = {
7968         .name                   = "snapshot",
7969         .func                   = ftrace_trace_snapshot_callback,
7970 };
7971
7972 static __init int register_snapshot_cmd(void)
7973 {
7974         return register_ftrace_command(&ftrace_snapshot_cmd);
7975 }
7976 #else
7977 static inline __init int register_snapshot_cmd(void) { return 0; }
7978 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7979
7980 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7981 {
7982         if (WARN_ON(!tr->dir))
7983                 return ERR_PTR(-ENODEV);
7984
7985         /* Top directory uses NULL as the parent */
7986         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7987                 return NULL;
7988
7989         /* All sub buffers have a descriptor */
7990         return tr->dir;
7991 }
7992
7993 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7994 {
7995         struct dentry *d_tracer;
7996
7997         if (tr->percpu_dir)
7998                 return tr->percpu_dir;
7999
8000         d_tracer = tracing_get_dentry(tr);
8001         if (IS_ERR(d_tracer))
8002                 return NULL;
8003
8004         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8005
8006         MEM_FAIL(!tr->percpu_dir,
8007                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8008
8009         return tr->percpu_dir;
8010 }
8011
8012 static struct dentry *
8013 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8014                       void *data, long cpu, const struct file_operations *fops)
8015 {
8016         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8017
8018         if (ret) /* See tracing_get_cpu() */
8019                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8020         return ret;
8021 }
8022
8023 static void
8024 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8025 {
8026         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8027         struct dentry *d_cpu;
8028         char cpu_dir[30]; /* 30 characters should be more than enough */
8029
8030         if (!d_percpu)
8031                 return;
8032
8033         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8034         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8035         if (!d_cpu) {
8036                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8037                 return;
8038         }
8039
8040         /* per cpu trace_pipe */
8041         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8042                                 tr, cpu, &tracing_pipe_fops);
8043
8044         /* per cpu trace */
8045         trace_create_cpu_file("trace", 0644, d_cpu,
8046                                 tr, cpu, &tracing_fops);
8047
8048         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8049                                 tr, cpu, &tracing_buffers_fops);
8050
8051         trace_create_cpu_file("stats", 0444, d_cpu,
8052                                 tr, cpu, &tracing_stats_fops);
8053
8054         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8055                                 tr, cpu, &tracing_entries_fops);
8056
8057 #ifdef CONFIG_TRACER_SNAPSHOT
8058         trace_create_cpu_file("snapshot", 0644, d_cpu,
8059                                 tr, cpu, &snapshot_fops);
8060
8061         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8062                                 tr, cpu, &snapshot_raw_fops);
8063 #endif
8064 }
8065
8066 #ifdef CONFIG_FTRACE_SELFTEST
8067 /* Let selftest have access to static functions in this file */
8068 #include "trace_selftest.c"
8069 #endif
8070
8071 static ssize_t
8072 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8073                         loff_t *ppos)
8074 {
8075         struct trace_option_dentry *topt = filp->private_data;
8076         char *buf;
8077
8078         if (topt->flags->val & topt->opt->bit)
8079                 buf = "1\n";
8080         else
8081                 buf = "0\n";
8082
8083         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8084 }
8085
8086 static ssize_t
8087 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8088                          loff_t *ppos)
8089 {
8090         struct trace_option_dentry *topt = filp->private_data;
8091         unsigned long val;
8092         int ret;
8093
8094         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8095         if (ret)
8096                 return ret;
8097
8098         if (val != 0 && val != 1)
8099                 return -EINVAL;
8100
8101         if (!!(topt->flags->val & topt->opt->bit) != val) {
8102                 mutex_lock(&trace_types_lock);
8103                 ret = __set_tracer_option(topt->tr, topt->flags,
8104                                           topt->opt, !val);
8105                 mutex_unlock(&trace_types_lock);
8106                 if (ret)
8107                         return ret;
8108         }
8109
8110         *ppos += cnt;
8111
8112         return cnt;
8113 }
8114
8115
8116 static const struct file_operations trace_options_fops = {
8117         .open = tracing_open_generic,
8118         .read = trace_options_read,
8119         .write = trace_options_write,
8120         .llseek = generic_file_llseek,
8121 };
8122
8123 /*
8124  * In order to pass in both the trace_array descriptor as well as the index
8125  * to the flag that the trace option file represents, the trace_array
8126  * has a character array of trace_flags_index[], which holds the index
8127  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8128  * The address of this character array is passed to the flag option file
8129  * read/write callbacks.
8130  *
8131  * In order to extract both the index and the trace_array descriptor,
8132  * get_tr_index() uses the following algorithm.
8133  *
8134  *   idx = *ptr;
8135  *
8136  * As the pointer itself contains the address of the index (remember
8137  * index[1] == 1).
8138  *
8139  * Then to get the trace_array descriptor, by subtracting that index
8140  * from the ptr, we get to the start of the index itself.
8141  *
8142  *   ptr - idx == &index[0]
8143  *
8144  * Then a simple container_of() from that pointer gets us to the
8145  * trace_array descriptor.
8146  */
8147 static void get_tr_index(void *data, struct trace_array **ptr,
8148                          unsigned int *pindex)
8149 {
8150         *pindex = *(unsigned char *)data;
8151
8152         *ptr = container_of(data - *pindex, struct trace_array,
8153                             trace_flags_index);
8154 }
8155
8156 static ssize_t
8157 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8158                         loff_t *ppos)
8159 {
8160         void *tr_index = filp->private_data;
8161         struct trace_array *tr;
8162         unsigned int index;
8163         char *buf;
8164
8165         get_tr_index(tr_index, &tr, &index);
8166
8167         if (tr->trace_flags & (1 << index))
8168                 buf = "1\n";
8169         else
8170                 buf = "0\n";
8171
8172         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8173 }
8174
8175 static ssize_t
8176 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8177                          loff_t *ppos)
8178 {
8179         void *tr_index = filp->private_data;
8180         struct trace_array *tr;
8181         unsigned int index;
8182         unsigned long val;
8183         int ret;
8184
8185         get_tr_index(tr_index, &tr, &index);
8186
8187         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8188         if (ret)
8189                 return ret;
8190
8191         if (val != 0 && val != 1)
8192                 return -EINVAL;
8193
8194         mutex_lock(&event_mutex);
8195         mutex_lock(&trace_types_lock);
8196         ret = set_tracer_flag(tr, 1 << index, val);
8197         mutex_unlock(&trace_types_lock);
8198         mutex_unlock(&event_mutex);
8199
8200         if (ret < 0)
8201                 return ret;
8202
8203         *ppos += cnt;
8204
8205         return cnt;
8206 }
8207
8208 static const struct file_operations trace_options_core_fops = {
8209         .open = tracing_open_generic,
8210         .read = trace_options_core_read,
8211         .write = trace_options_core_write,
8212         .llseek = generic_file_llseek,
8213 };
8214
8215 struct dentry *trace_create_file(const char *name,
8216                                  umode_t mode,
8217                                  struct dentry *parent,
8218                                  void *data,
8219                                  const struct file_operations *fops)
8220 {
8221         struct dentry *ret;
8222
8223         ret = tracefs_create_file(name, mode, parent, data, fops);
8224         if (!ret)
8225                 pr_warn("Could not create tracefs '%s' entry\n", name);
8226
8227         return ret;
8228 }
8229
8230
8231 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8232 {
8233         struct dentry *d_tracer;
8234
8235         if (tr->options)
8236                 return tr->options;
8237
8238         d_tracer = tracing_get_dentry(tr);
8239         if (IS_ERR(d_tracer))
8240                 return NULL;
8241
8242         tr->options = tracefs_create_dir("options", d_tracer);
8243         if (!tr->options) {
8244                 pr_warn("Could not create tracefs directory 'options'\n");
8245                 return NULL;
8246         }
8247
8248         return tr->options;
8249 }
8250
8251 static void
8252 create_trace_option_file(struct trace_array *tr,
8253                          struct trace_option_dentry *topt,
8254                          struct tracer_flags *flags,
8255                          struct tracer_opt *opt)
8256 {
8257         struct dentry *t_options;
8258
8259         t_options = trace_options_init_dentry(tr);
8260         if (!t_options)
8261                 return;
8262
8263         topt->flags = flags;
8264         topt->opt = opt;
8265         topt->tr = tr;
8266
8267         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8268                                     &trace_options_fops);
8269
8270 }
8271
8272 static void
8273 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8274 {
8275         struct trace_option_dentry *topts;
8276         struct trace_options *tr_topts;
8277         struct tracer_flags *flags;
8278         struct tracer_opt *opts;
8279         int cnt;
8280         int i;
8281
8282         if (!tracer)
8283                 return;
8284
8285         flags = tracer->flags;
8286
8287         if (!flags || !flags->opts)
8288                 return;
8289
8290         /*
8291          * If this is an instance, only create flags for tracers
8292          * the instance may have.
8293          */
8294         if (!trace_ok_for_array(tracer, tr))
8295                 return;
8296
8297         for (i = 0; i < tr->nr_topts; i++) {
8298                 /* Make sure there's no duplicate flags. */
8299                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8300                         return;
8301         }
8302
8303         opts = flags->opts;
8304
8305         for (cnt = 0; opts[cnt].name; cnt++)
8306                 ;
8307
8308         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8309         if (!topts)
8310                 return;
8311
8312         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8313                             GFP_KERNEL);
8314         if (!tr_topts) {
8315                 kfree(topts);
8316                 return;
8317         }
8318
8319         tr->topts = tr_topts;
8320         tr->topts[tr->nr_topts].tracer = tracer;
8321         tr->topts[tr->nr_topts].topts = topts;
8322         tr->nr_topts++;
8323
8324         for (cnt = 0; opts[cnt].name; cnt++) {
8325                 create_trace_option_file(tr, &topts[cnt], flags,
8326                                          &opts[cnt]);
8327                 MEM_FAIL(topts[cnt].entry == NULL,
8328                           "Failed to create trace option: %s",
8329                           opts[cnt].name);
8330         }
8331 }
8332
8333 static struct dentry *
8334 create_trace_option_core_file(struct trace_array *tr,
8335                               const char *option, long index)
8336 {
8337         struct dentry *t_options;
8338
8339         t_options = trace_options_init_dentry(tr);
8340         if (!t_options)
8341                 return NULL;
8342
8343         return trace_create_file(option, 0644, t_options,
8344                                  (void *)&tr->trace_flags_index[index],
8345                                  &trace_options_core_fops);
8346 }
8347
8348 static void create_trace_options_dir(struct trace_array *tr)
8349 {
8350         struct dentry *t_options;
8351         bool top_level = tr == &global_trace;
8352         int i;
8353
8354         t_options = trace_options_init_dentry(tr);
8355         if (!t_options)
8356                 return;
8357
8358         for (i = 0; trace_options[i]; i++) {
8359                 if (top_level ||
8360                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8361                         create_trace_option_core_file(tr, trace_options[i], i);
8362         }
8363 }
8364
8365 static ssize_t
8366 rb_simple_read(struct file *filp, char __user *ubuf,
8367                size_t cnt, loff_t *ppos)
8368 {
8369         struct trace_array *tr = filp->private_data;
8370         char buf[64];
8371         int r;
8372
8373         r = tracer_tracing_is_on(tr);
8374         r = sprintf(buf, "%d\n", r);
8375
8376         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8377 }
8378
8379 static ssize_t
8380 rb_simple_write(struct file *filp, const char __user *ubuf,
8381                 size_t cnt, loff_t *ppos)
8382 {
8383         struct trace_array *tr = filp->private_data;
8384         struct trace_buffer *buffer = tr->array_buffer.buffer;
8385         unsigned long val;
8386         int ret;
8387
8388         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8389         if (ret)
8390                 return ret;
8391
8392         if (buffer) {
8393                 mutex_lock(&trace_types_lock);
8394                 if (!!val == tracer_tracing_is_on(tr)) {
8395                         val = 0; /* do nothing */
8396                 } else if (val) {
8397                         tracer_tracing_on(tr);
8398                         if (tr->current_trace->start)
8399                                 tr->current_trace->start(tr);
8400                 } else {
8401                         tracer_tracing_off(tr);
8402                         if (tr->current_trace->stop)
8403                                 tr->current_trace->stop(tr);
8404                 }
8405                 mutex_unlock(&trace_types_lock);
8406         }
8407
8408         (*ppos)++;
8409
8410         return cnt;
8411 }
8412
8413 static const struct file_operations rb_simple_fops = {
8414         .open           = tracing_open_generic_tr,
8415         .read           = rb_simple_read,
8416         .write          = rb_simple_write,
8417         .release        = tracing_release_generic_tr,
8418         .llseek         = default_llseek,
8419 };
8420
8421 static ssize_t
8422 buffer_percent_read(struct file *filp, char __user *ubuf,
8423                     size_t cnt, loff_t *ppos)
8424 {
8425         struct trace_array *tr = filp->private_data;
8426         char buf[64];
8427         int r;
8428
8429         r = tr->buffer_percent;
8430         r = sprintf(buf, "%d\n", r);
8431
8432         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8433 }
8434
8435 static ssize_t
8436 buffer_percent_write(struct file *filp, const char __user *ubuf,
8437                      size_t cnt, loff_t *ppos)
8438 {
8439         struct trace_array *tr = filp->private_data;
8440         unsigned long val;
8441         int ret;
8442
8443         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8444         if (ret)
8445                 return ret;
8446
8447         if (val > 100)
8448                 return -EINVAL;
8449
8450         if (!val)
8451                 val = 1;
8452
8453         tr->buffer_percent = val;
8454
8455         (*ppos)++;
8456
8457         return cnt;
8458 }
8459
8460 static const struct file_operations buffer_percent_fops = {
8461         .open           = tracing_open_generic_tr,
8462         .read           = buffer_percent_read,
8463         .write          = buffer_percent_write,
8464         .release        = tracing_release_generic_tr,
8465         .llseek         = default_llseek,
8466 };
8467
8468 static struct dentry *trace_instance_dir;
8469
8470 static void
8471 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8472
8473 static int
8474 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8475 {
8476         enum ring_buffer_flags rb_flags;
8477
8478         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8479
8480         buf->tr = tr;
8481
8482         buf->buffer = ring_buffer_alloc(size, rb_flags);
8483         if (!buf->buffer)
8484                 return -ENOMEM;
8485
8486         buf->data = alloc_percpu(struct trace_array_cpu);
8487         if (!buf->data) {
8488                 ring_buffer_free(buf->buffer);
8489                 buf->buffer = NULL;
8490                 return -ENOMEM;
8491         }
8492
8493         /* Allocate the first page for all buffers */
8494         set_buffer_entries(&tr->array_buffer,
8495                            ring_buffer_size(tr->array_buffer.buffer, 0));
8496
8497         return 0;
8498 }
8499
8500 static int allocate_trace_buffers(struct trace_array *tr, int size)
8501 {
8502         int ret;
8503
8504         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8505         if (ret)
8506                 return ret;
8507
8508 #ifdef CONFIG_TRACER_MAX_TRACE
8509         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8510                                     allocate_snapshot ? size : 1);
8511         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8512                 ring_buffer_free(tr->array_buffer.buffer);
8513                 tr->array_buffer.buffer = NULL;
8514                 free_percpu(tr->array_buffer.data);
8515                 tr->array_buffer.data = NULL;
8516                 return -ENOMEM;
8517         }
8518         tr->allocated_snapshot = allocate_snapshot;
8519
8520         /*
8521          * Only the top level trace array gets its snapshot allocated
8522          * from the kernel command line.
8523          */
8524         allocate_snapshot = false;
8525 #endif
8526
8527         return 0;
8528 }
8529
8530 static void free_trace_buffer(struct array_buffer *buf)
8531 {
8532         if (buf->buffer) {
8533                 ring_buffer_free(buf->buffer);
8534                 buf->buffer = NULL;
8535                 free_percpu(buf->data);
8536                 buf->data = NULL;
8537         }
8538 }
8539
8540 static void free_trace_buffers(struct trace_array *tr)
8541 {
8542         if (!tr)
8543                 return;
8544
8545         free_trace_buffer(&tr->array_buffer);
8546
8547 #ifdef CONFIG_TRACER_MAX_TRACE
8548         free_trace_buffer(&tr->max_buffer);
8549 #endif
8550 }
8551
8552 static void init_trace_flags_index(struct trace_array *tr)
8553 {
8554         int i;
8555
8556         /* Used by the trace options files */
8557         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8558                 tr->trace_flags_index[i] = i;
8559 }
8560
8561 static void __update_tracer_options(struct trace_array *tr)
8562 {
8563         struct tracer *t;
8564
8565         for (t = trace_types; t; t = t->next)
8566                 add_tracer_options(tr, t);
8567 }
8568
8569 static void update_tracer_options(struct trace_array *tr)
8570 {
8571         mutex_lock(&trace_types_lock);
8572         __update_tracer_options(tr);
8573         mutex_unlock(&trace_types_lock);
8574 }
8575
8576 /* Must have trace_types_lock held */
8577 struct trace_array *trace_array_find(const char *instance)
8578 {
8579         struct trace_array *tr, *found = NULL;
8580
8581         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8582                 if (tr->name && strcmp(tr->name, instance) == 0) {
8583                         found = tr;
8584                         break;
8585                 }
8586         }
8587
8588         return found;
8589 }
8590
8591 struct trace_array *trace_array_find_get(const char *instance)
8592 {
8593         struct trace_array *tr;
8594
8595         mutex_lock(&trace_types_lock);
8596         tr = trace_array_find(instance);
8597         if (tr)
8598                 tr->ref++;
8599         mutex_unlock(&trace_types_lock);
8600
8601         return tr;
8602 }
8603
8604 static struct trace_array *trace_array_create(const char *name)
8605 {
8606         struct trace_array *tr;
8607         int ret;
8608
8609         ret = -ENOMEM;
8610         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8611         if (!tr)
8612                 return ERR_PTR(ret);
8613
8614         tr->name = kstrdup(name, GFP_KERNEL);
8615         if (!tr->name)
8616                 goto out_free_tr;
8617
8618         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8619                 goto out_free_tr;
8620
8621         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8622
8623         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8624
8625         raw_spin_lock_init(&tr->start_lock);
8626
8627         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8628
8629         tr->current_trace = &nop_trace;
8630
8631         INIT_LIST_HEAD(&tr->systems);
8632         INIT_LIST_HEAD(&tr->events);
8633         INIT_LIST_HEAD(&tr->hist_vars);
8634         INIT_LIST_HEAD(&tr->err_log);
8635
8636         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8637                 goto out_free_tr;
8638
8639         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8640         if (!tr->dir)
8641                 goto out_free_tr;
8642
8643         ret = event_trace_add_tracer(tr->dir, tr);
8644         if (ret) {
8645                 tracefs_remove(tr->dir);
8646                 goto out_free_tr;
8647         }
8648
8649         ftrace_init_trace_array(tr);
8650
8651         init_tracer_tracefs(tr, tr->dir);
8652         init_trace_flags_index(tr);
8653         __update_tracer_options(tr);
8654
8655         list_add(&tr->list, &ftrace_trace_arrays);
8656
8657         tr->ref++;
8658
8659
8660         return tr;
8661
8662  out_free_tr:
8663         free_trace_buffers(tr);
8664         free_cpumask_var(tr->tracing_cpumask);
8665         kfree(tr->name);
8666         kfree(tr);
8667
8668         return ERR_PTR(ret);
8669 }
8670
8671 static int instance_mkdir(const char *name)
8672 {
8673         struct trace_array *tr;
8674         int ret;
8675
8676         mutex_lock(&event_mutex);
8677         mutex_lock(&trace_types_lock);
8678
8679         ret = -EEXIST;
8680         if (trace_array_find(name))
8681                 goto out_unlock;
8682
8683         tr = trace_array_create(name);
8684
8685         ret = PTR_ERR_OR_ZERO(tr);
8686
8687 out_unlock:
8688         mutex_unlock(&trace_types_lock);
8689         mutex_unlock(&event_mutex);
8690         return ret;
8691 }
8692
8693 /**
8694  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8695  * @name: The name of the trace array to be looked up/created.
8696  *
8697  * Returns pointer to trace array with given name.
8698  * NULL, if it cannot be created.
8699  *
8700  * NOTE: This function increments the reference counter associated with the
8701  * trace array returned. This makes sure it cannot be freed while in use.
8702  * Use trace_array_put() once the trace array is no longer needed.
8703  * If the trace_array is to be freed, trace_array_destroy() needs to
8704  * be called after the trace_array_put(), or simply let user space delete
8705  * it from the tracefs instances directory. But until the
8706  * trace_array_put() is called, user space can not delete it.
8707  *
8708  */
8709 struct trace_array *trace_array_get_by_name(const char *name)
8710 {
8711         struct trace_array *tr;
8712
8713         mutex_lock(&event_mutex);
8714         mutex_lock(&trace_types_lock);
8715
8716         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8717                 if (tr->name && strcmp(tr->name, name) == 0)
8718                         goto out_unlock;
8719         }
8720
8721         tr = trace_array_create(name);
8722
8723         if (IS_ERR(tr))
8724                 tr = NULL;
8725 out_unlock:
8726         if (tr)
8727                 tr->ref++;
8728
8729         mutex_unlock(&trace_types_lock);
8730         mutex_unlock(&event_mutex);
8731         return tr;
8732 }
8733 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8734
8735 static int __remove_instance(struct trace_array *tr)
8736 {
8737         int i;
8738
8739         /* Reference counter for a newly created trace array = 1. */
8740         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8741                 return -EBUSY;
8742
8743         list_del(&tr->list);
8744
8745         /* Disable all the flags that were enabled coming in */
8746         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8747                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8748                         set_tracer_flag(tr, 1 << i, 0);
8749         }
8750
8751         tracing_set_nop(tr);
8752         clear_ftrace_function_probes(tr);
8753         event_trace_del_tracer(tr);
8754         ftrace_clear_pids(tr);
8755         ftrace_destroy_function_files(tr);
8756         tracefs_remove(tr->dir);
8757         free_trace_buffers(tr);
8758
8759         for (i = 0; i < tr->nr_topts; i++) {
8760                 kfree(tr->topts[i].topts);
8761         }
8762         kfree(tr->topts);
8763
8764         free_cpumask_var(tr->tracing_cpumask);
8765         kfree(tr->name);
8766         kfree(tr);
8767         tr = NULL;
8768
8769         return 0;
8770 }
8771
8772 int trace_array_destroy(struct trace_array *this_tr)
8773 {
8774         struct trace_array *tr;
8775         int ret;
8776
8777         if (!this_tr)
8778                 return -EINVAL;
8779
8780         mutex_lock(&event_mutex);
8781         mutex_lock(&trace_types_lock);
8782
8783         ret = -ENODEV;
8784
8785         /* Making sure trace array exists before destroying it. */
8786         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8787                 if (tr == this_tr) {
8788                         ret = __remove_instance(tr);
8789                         break;
8790                 }
8791         }
8792
8793         mutex_unlock(&trace_types_lock);
8794         mutex_unlock(&event_mutex);
8795
8796         return ret;
8797 }
8798 EXPORT_SYMBOL_GPL(trace_array_destroy);
8799
8800 static int instance_rmdir(const char *name)
8801 {
8802         struct trace_array *tr;
8803         int ret;
8804
8805         mutex_lock(&event_mutex);
8806         mutex_lock(&trace_types_lock);
8807
8808         ret = -ENODEV;
8809         tr = trace_array_find(name);
8810         if (tr)
8811                 ret = __remove_instance(tr);
8812
8813         mutex_unlock(&trace_types_lock);
8814         mutex_unlock(&event_mutex);
8815
8816         return ret;
8817 }
8818
8819 static __init void create_trace_instances(struct dentry *d_tracer)
8820 {
8821         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8822                                                          instance_mkdir,
8823                                                          instance_rmdir);
8824         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8825                 return;
8826 }
8827
8828 static void
8829 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8830 {
8831         struct trace_event_file *file;
8832         int cpu;
8833
8834         trace_create_file("available_tracers", 0444, d_tracer,
8835                         tr, &show_traces_fops);
8836
8837         trace_create_file("current_tracer", 0644, d_tracer,
8838                         tr, &set_tracer_fops);
8839
8840         trace_create_file("tracing_cpumask", 0644, d_tracer,
8841                           tr, &tracing_cpumask_fops);
8842
8843         trace_create_file("trace_options", 0644, d_tracer,
8844                           tr, &tracing_iter_fops);
8845
8846         trace_create_file("trace", 0644, d_tracer,
8847                           tr, &tracing_fops);
8848
8849         trace_create_file("trace_pipe", 0444, d_tracer,
8850                           tr, &tracing_pipe_fops);
8851
8852         trace_create_file("buffer_size_kb", 0644, d_tracer,
8853                           tr, &tracing_entries_fops);
8854
8855         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8856                           tr, &tracing_total_entries_fops);
8857
8858         trace_create_file("free_buffer", 0200, d_tracer,
8859                           tr, &tracing_free_buffer_fops);
8860
8861         trace_create_file("trace_marker", 0220, d_tracer,
8862                           tr, &tracing_mark_fops);
8863
8864         file = __find_event_file(tr, "ftrace", "print");
8865         if (file && file->dir)
8866                 trace_create_file("trigger", 0644, file->dir, file,
8867                                   &event_trigger_fops);
8868         tr->trace_marker_file = file;
8869
8870         trace_create_file("trace_marker_raw", 0220, d_tracer,
8871                           tr, &tracing_mark_raw_fops);
8872
8873         trace_create_file("trace_clock", 0644, d_tracer, tr,
8874                           &trace_clock_fops);
8875
8876         trace_create_file("tracing_on", 0644, d_tracer,
8877                           tr, &rb_simple_fops);
8878
8879         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8880                           &trace_time_stamp_mode_fops);
8881
8882         tr->buffer_percent = 50;
8883
8884         trace_create_file("buffer_percent", 0444, d_tracer,
8885                         tr, &buffer_percent_fops);
8886
8887         create_trace_options_dir(tr);
8888
8889 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8890         trace_create_maxlat_file(tr, d_tracer);
8891 #endif
8892
8893         if (ftrace_create_function_files(tr, d_tracer))
8894                 MEM_FAIL(1, "Could not allocate function filter files");
8895
8896 #ifdef CONFIG_TRACER_SNAPSHOT
8897         trace_create_file("snapshot", 0644, d_tracer,
8898                           tr, &snapshot_fops);
8899 #endif
8900
8901         trace_create_file("error_log", 0644, d_tracer,
8902                           tr, &tracing_err_log_fops);
8903
8904         for_each_tracing_cpu(cpu)
8905                 tracing_init_tracefs_percpu(tr, cpu);
8906
8907         ftrace_init_tracefs(tr, d_tracer);
8908 }
8909
8910 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8911 {
8912         struct vfsmount *mnt;
8913         struct file_system_type *type;
8914
8915         /*
8916          * To maintain backward compatibility for tools that mount
8917          * debugfs to get to the tracing facility, tracefs is automatically
8918          * mounted to the debugfs/tracing directory.
8919          */
8920         type = get_fs_type("tracefs");
8921         if (!type)
8922                 return NULL;
8923         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8924         put_filesystem(type);
8925         if (IS_ERR(mnt))
8926                 return NULL;
8927         mntget(mnt);
8928
8929         return mnt;
8930 }
8931
8932 /**
8933  * tracing_init_dentry - initialize top level trace array
8934  *
8935  * This is called when creating files or directories in the tracing
8936  * directory. It is called via fs_initcall() by any of the boot up code
8937  * and expects to return the dentry of the top level tracing directory.
8938  */
8939 struct dentry *tracing_init_dentry(void)
8940 {
8941         struct trace_array *tr = &global_trace;
8942
8943         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8944                 pr_warn("Tracing disabled due to lockdown\n");
8945                 return ERR_PTR(-EPERM);
8946         }
8947
8948         /* The top level trace array uses  NULL as parent */
8949         if (tr->dir)
8950                 return NULL;
8951
8952         if (WARN_ON(!tracefs_initialized()) ||
8953                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8954                  WARN_ON(!debugfs_initialized())))
8955                 return ERR_PTR(-ENODEV);
8956
8957         /*
8958          * As there may still be users that expect the tracing
8959          * files to exist in debugfs/tracing, we must automount
8960          * the tracefs file system there, so older tools still
8961          * work with the newer kerenl.
8962          */
8963         tr->dir = debugfs_create_automount("tracing", NULL,
8964                                            trace_automount, NULL);
8965
8966         return NULL;
8967 }
8968
8969 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8970 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8971
8972 static void __init trace_eval_init(void)
8973 {
8974         int len;
8975
8976         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8977         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8978 }
8979
8980 #ifdef CONFIG_MODULES
8981 static void trace_module_add_evals(struct module *mod)
8982 {
8983         if (!mod->num_trace_evals)
8984                 return;
8985
8986         /*
8987          * Modules with bad taint do not have events created, do
8988          * not bother with enums either.
8989          */
8990         if (trace_module_has_bad_taint(mod))
8991                 return;
8992
8993         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8994 }
8995
8996 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8997 static void trace_module_remove_evals(struct module *mod)
8998 {
8999         union trace_eval_map_item *map;
9000         union trace_eval_map_item **last = &trace_eval_maps;
9001
9002         if (!mod->num_trace_evals)
9003                 return;
9004
9005         mutex_lock(&trace_eval_mutex);
9006
9007         map = trace_eval_maps;
9008
9009         while (map) {
9010                 if (map->head.mod == mod)
9011                         break;
9012                 map = trace_eval_jmp_to_tail(map);
9013                 last = &map->tail.next;
9014                 map = map->tail.next;
9015         }
9016         if (!map)
9017                 goto out;
9018
9019         *last = trace_eval_jmp_to_tail(map)->tail.next;
9020         kfree(map);
9021  out:
9022         mutex_unlock(&trace_eval_mutex);
9023 }
9024 #else
9025 static inline void trace_module_remove_evals(struct module *mod) { }
9026 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9027
9028 static int trace_module_notify(struct notifier_block *self,
9029                                unsigned long val, void *data)
9030 {
9031         struct module *mod = data;
9032
9033         switch (val) {
9034         case MODULE_STATE_COMING:
9035                 trace_module_add_evals(mod);
9036                 break;
9037         case MODULE_STATE_GOING:
9038                 trace_module_remove_evals(mod);
9039                 break;
9040         }
9041
9042         return 0;
9043 }
9044
9045 static struct notifier_block trace_module_nb = {
9046         .notifier_call = trace_module_notify,
9047         .priority = 0,
9048 };
9049 #endif /* CONFIG_MODULES */
9050
9051 static __init int tracer_init_tracefs(void)
9052 {
9053         struct dentry *d_tracer;
9054
9055         trace_access_lock_init();
9056
9057         d_tracer = tracing_init_dentry();
9058         if (IS_ERR(d_tracer))
9059                 return 0;
9060
9061         event_trace_init();
9062
9063         init_tracer_tracefs(&global_trace, d_tracer);
9064         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9065
9066         trace_create_file("tracing_thresh", 0644, d_tracer,
9067                         &global_trace, &tracing_thresh_fops);
9068
9069         trace_create_file("README", 0444, d_tracer,
9070                         NULL, &tracing_readme_fops);
9071
9072         trace_create_file("saved_cmdlines", 0444, d_tracer,
9073                         NULL, &tracing_saved_cmdlines_fops);
9074
9075         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9076                           NULL, &tracing_saved_cmdlines_size_fops);
9077
9078         trace_create_file("saved_tgids", 0444, d_tracer,
9079                         NULL, &tracing_saved_tgids_fops);
9080
9081         trace_eval_init();
9082
9083         trace_create_eval_file(d_tracer);
9084
9085 #ifdef CONFIG_MODULES
9086         register_module_notifier(&trace_module_nb);
9087 #endif
9088
9089 #ifdef CONFIG_DYNAMIC_FTRACE
9090         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9091                         NULL, &tracing_dyn_info_fops);
9092 #endif
9093
9094         create_trace_instances(d_tracer);
9095
9096         update_tracer_options(&global_trace);
9097
9098         return 0;
9099 }
9100
9101 static int trace_panic_handler(struct notifier_block *this,
9102                                unsigned long event, void *unused)
9103 {
9104         if (ftrace_dump_on_oops)
9105                 ftrace_dump(ftrace_dump_on_oops);
9106         return NOTIFY_OK;
9107 }
9108
9109 static struct notifier_block trace_panic_notifier = {
9110         .notifier_call  = trace_panic_handler,
9111         .next           = NULL,
9112         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9113 };
9114
9115 static int trace_die_handler(struct notifier_block *self,
9116                              unsigned long val,
9117                              void *data)
9118 {
9119         switch (val) {
9120         case DIE_OOPS:
9121                 if (ftrace_dump_on_oops)
9122                         ftrace_dump(ftrace_dump_on_oops);
9123                 break;
9124         default:
9125                 break;
9126         }
9127         return NOTIFY_OK;
9128 }
9129
9130 static struct notifier_block trace_die_notifier = {
9131         .notifier_call = trace_die_handler,
9132         .priority = 200
9133 };
9134
9135 /*
9136  * printk is set to max of 1024, we really don't need it that big.
9137  * Nothing should be printing 1000 characters anyway.
9138  */
9139 #define TRACE_MAX_PRINT         1000
9140
9141 /*
9142  * Define here KERN_TRACE so that we have one place to modify
9143  * it if we decide to change what log level the ftrace dump
9144  * should be at.
9145  */
9146 #define KERN_TRACE              KERN_EMERG
9147
9148 void
9149 trace_printk_seq(struct trace_seq *s)
9150 {
9151         /* Probably should print a warning here. */
9152         if (s->seq.len >= TRACE_MAX_PRINT)
9153                 s->seq.len = TRACE_MAX_PRINT;
9154
9155         /*
9156          * More paranoid code. Although the buffer size is set to
9157          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9158          * an extra layer of protection.
9159          */
9160         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9161                 s->seq.len = s->seq.size - 1;
9162
9163         /* should be zero ended, but we are paranoid. */
9164         s->buffer[s->seq.len] = 0;
9165
9166         printk(KERN_TRACE "%s", s->buffer);
9167
9168         trace_seq_init(s);
9169 }
9170
9171 void trace_init_global_iter(struct trace_iterator *iter)
9172 {
9173         iter->tr = &global_trace;
9174         iter->trace = iter->tr->current_trace;
9175         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9176         iter->array_buffer = &global_trace.array_buffer;
9177
9178         if (iter->trace && iter->trace->open)
9179                 iter->trace->open(iter);
9180
9181         /* Annotate start of buffers if we had overruns */
9182         if (ring_buffer_overruns(iter->array_buffer->buffer))
9183                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9184
9185         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9186         if (trace_clocks[iter->tr->clock_id].in_ns)
9187                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9188 }
9189
9190 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9191 {
9192         /* use static because iter can be a bit big for the stack */
9193         static struct trace_iterator iter;
9194         static atomic_t dump_running;
9195         struct trace_array *tr = &global_trace;
9196         unsigned int old_userobj;
9197         unsigned long flags;
9198         int cnt = 0, cpu;
9199
9200         /* Only allow one dump user at a time. */
9201         if (atomic_inc_return(&dump_running) != 1) {
9202                 atomic_dec(&dump_running);
9203                 return;
9204         }
9205
9206         /*
9207          * Always turn off tracing when we dump.
9208          * We don't need to show trace output of what happens
9209          * between multiple crashes.
9210          *
9211          * If the user does a sysrq-z, then they can re-enable
9212          * tracing with echo 1 > tracing_on.
9213          */
9214         tracing_off();
9215
9216         local_irq_save(flags);
9217         printk_nmi_direct_enter();
9218
9219         /* Simulate the iterator */
9220         trace_init_global_iter(&iter);
9221         /* Can not use kmalloc for iter.temp */
9222         iter.temp = static_temp_buf;
9223         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9224
9225         for_each_tracing_cpu(cpu) {
9226                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9227         }
9228
9229         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9230
9231         /* don't look at user memory in panic mode */
9232         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9233
9234         switch (oops_dump_mode) {
9235         case DUMP_ALL:
9236                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9237                 break;
9238         case DUMP_ORIG:
9239                 iter.cpu_file = raw_smp_processor_id();
9240                 break;
9241         case DUMP_NONE:
9242                 goto out_enable;
9243         default:
9244                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9245                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9246         }
9247
9248         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9249
9250         /* Did function tracer already get disabled? */
9251         if (ftrace_is_dead()) {
9252                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9253                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9254         }
9255
9256         /*
9257          * We need to stop all tracing on all CPUS to read the
9258          * the next buffer. This is a bit expensive, but is
9259          * not done often. We fill all what we can read,
9260          * and then release the locks again.
9261          */
9262
9263         while (!trace_empty(&iter)) {
9264
9265                 if (!cnt)
9266                         printk(KERN_TRACE "---------------------------------\n");
9267
9268                 cnt++;
9269
9270                 trace_iterator_reset(&iter);
9271                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9272
9273                 if (trace_find_next_entry_inc(&iter) != NULL) {
9274                         int ret;
9275
9276                         ret = print_trace_line(&iter);
9277                         if (ret != TRACE_TYPE_NO_CONSUME)
9278                                 trace_consume(&iter);
9279                 }
9280                 touch_nmi_watchdog();
9281
9282                 trace_printk_seq(&iter.seq);
9283         }
9284
9285         if (!cnt)
9286                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9287         else
9288                 printk(KERN_TRACE "---------------------------------\n");
9289
9290  out_enable:
9291         tr->trace_flags |= old_userobj;
9292
9293         for_each_tracing_cpu(cpu) {
9294                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9295         }
9296         atomic_dec(&dump_running);
9297         printk_nmi_direct_exit();
9298         local_irq_restore(flags);
9299 }
9300 EXPORT_SYMBOL_GPL(ftrace_dump);
9301
9302 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9303 {
9304         char **argv;
9305         int argc, ret;
9306
9307         argc = 0;
9308         ret = 0;
9309         argv = argv_split(GFP_KERNEL, buf, &argc);
9310         if (!argv)
9311                 return -ENOMEM;
9312
9313         if (argc)
9314                 ret = createfn(argc, argv);
9315
9316         argv_free(argv);
9317
9318         return ret;
9319 }
9320
9321 #define WRITE_BUFSIZE  4096
9322
9323 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9324                                 size_t count, loff_t *ppos,
9325                                 int (*createfn)(int, char **))
9326 {
9327         char *kbuf, *buf, *tmp;
9328         int ret = 0;
9329         size_t done = 0;
9330         size_t size;
9331
9332         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9333         if (!kbuf)
9334                 return -ENOMEM;
9335
9336         while (done < count) {
9337                 size = count - done;
9338
9339                 if (size >= WRITE_BUFSIZE)
9340                         size = WRITE_BUFSIZE - 1;
9341
9342                 if (copy_from_user(kbuf, buffer + done, size)) {
9343                         ret = -EFAULT;
9344                         goto out;
9345                 }
9346                 kbuf[size] = '\0';
9347                 buf = kbuf;
9348                 do {
9349                         tmp = strchr(buf, '\n');
9350                         if (tmp) {
9351                                 *tmp = '\0';
9352                                 size = tmp - buf + 1;
9353                         } else {
9354                                 size = strlen(buf);
9355                                 if (done + size < count) {
9356                                         if (buf != kbuf)
9357                                                 break;
9358                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9359                                         pr_warn("Line length is too long: Should be less than %d\n",
9360                                                 WRITE_BUFSIZE - 2);
9361                                         ret = -EINVAL;
9362                                         goto out;
9363                                 }
9364                         }
9365                         done += size;
9366
9367                         /* Remove comments */
9368                         tmp = strchr(buf, '#');
9369
9370                         if (tmp)
9371                                 *tmp = '\0';
9372
9373                         ret = trace_run_command(buf, createfn);
9374                         if (ret)
9375                                 goto out;
9376                         buf += size;
9377
9378                 } while (done < count);
9379         }
9380         ret = done;
9381
9382 out:
9383         kfree(kbuf);
9384
9385         return ret;
9386 }
9387
9388 __init static int tracer_alloc_buffers(void)
9389 {
9390         int ring_buf_size;
9391         int ret = -ENOMEM;
9392
9393
9394         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9395                 pr_warn("Tracing disabled due to lockdown\n");
9396                 return -EPERM;
9397         }
9398
9399         /*
9400          * Make sure we don't accidently add more trace options
9401          * than we have bits for.
9402          */
9403         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9404
9405         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9406                 goto out;
9407
9408         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9409                 goto out_free_buffer_mask;
9410
9411         /* Only allocate trace_printk buffers if a trace_printk exists */
9412         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9413                 /* Must be called before global_trace.buffer is allocated */
9414                 trace_printk_init_buffers();
9415
9416         /* To save memory, keep the ring buffer size to its minimum */
9417         if (ring_buffer_expanded)
9418                 ring_buf_size = trace_buf_size;
9419         else
9420                 ring_buf_size = 1;
9421
9422         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9423         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9424
9425         raw_spin_lock_init(&global_trace.start_lock);
9426
9427         /*
9428          * The prepare callbacks allocates some memory for the ring buffer. We
9429          * don't free the buffer if the if the CPU goes down. If we were to free
9430          * the buffer, then the user would lose any trace that was in the
9431          * buffer. The memory will be removed once the "instance" is removed.
9432          */
9433         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9434                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9435                                       NULL);
9436         if (ret < 0)
9437                 goto out_free_cpumask;
9438         /* Used for event triggers */
9439         ret = -ENOMEM;
9440         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9441         if (!temp_buffer)
9442                 goto out_rm_hp_state;
9443
9444         if (trace_create_savedcmd() < 0)
9445                 goto out_free_temp_buffer;
9446
9447         /* TODO: make the number of buffers hot pluggable with CPUS */
9448         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9449                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9450                 goto out_free_savedcmd;
9451         }
9452
9453         if (global_trace.buffer_disabled)
9454                 tracing_off();
9455
9456         if (trace_boot_clock) {
9457                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9458                 if (ret < 0)
9459                         pr_warn("Trace clock %s not defined, going back to default\n",
9460                                 trace_boot_clock);
9461         }
9462
9463         /*
9464          * register_tracer() might reference current_trace, so it
9465          * needs to be set before we register anything. This is
9466          * just a bootstrap of current_trace anyway.
9467          */
9468         global_trace.current_trace = &nop_trace;
9469
9470         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9471
9472         ftrace_init_global_array_ops(&global_trace);
9473
9474         init_trace_flags_index(&global_trace);
9475
9476         register_tracer(&nop_trace);
9477
9478         /* Function tracing may start here (via kernel command line) */
9479         init_function_trace();
9480
9481         /* All seems OK, enable tracing */
9482         tracing_disabled = 0;
9483
9484         atomic_notifier_chain_register(&panic_notifier_list,
9485                                        &trace_panic_notifier);
9486
9487         register_die_notifier(&trace_die_notifier);
9488
9489         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9490
9491         INIT_LIST_HEAD(&global_trace.systems);
9492         INIT_LIST_HEAD(&global_trace.events);
9493         INIT_LIST_HEAD(&global_trace.hist_vars);
9494         INIT_LIST_HEAD(&global_trace.err_log);
9495         list_add(&global_trace.list, &ftrace_trace_arrays);
9496
9497         apply_trace_boot_options();
9498
9499         register_snapshot_cmd();
9500
9501         return 0;
9502
9503 out_free_savedcmd:
9504         free_saved_cmdlines_buffer(savedcmd);
9505 out_free_temp_buffer:
9506         ring_buffer_free(temp_buffer);
9507 out_rm_hp_state:
9508         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9509 out_free_cpumask:
9510         free_cpumask_var(global_trace.tracing_cpumask);
9511 out_free_buffer_mask:
9512         free_cpumask_var(tracing_buffer_mask);
9513 out:
9514         return ret;
9515 }
9516
9517 void __init early_trace_init(void)
9518 {
9519         if (tracepoint_printk) {
9520                 tracepoint_print_iter =
9521                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9522                 if (MEM_FAIL(!tracepoint_print_iter,
9523                              "Failed to allocate trace iterator\n"))
9524                         tracepoint_printk = 0;
9525                 else
9526                         static_key_enable(&tracepoint_printk_key.key);
9527         }
9528         tracer_alloc_buffers();
9529 }
9530
9531 void __init trace_init(void)
9532 {
9533         trace_event_init();
9534 }
9535
9536 __init static int clear_boot_tracer(void)
9537 {
9538         /*
9539          * The default tracer at boot buffer is an init section.
9540          * This function is called in lateinit. If we did not
9541          * find the boot tracer, then clear it out, to prevent
9542          * later registration from accessing the buffer that is
9543          * about to be freed.
9544          */
9545         if (!default_bootup_tracer)
9546                 return 0;
9547
9548         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9549                default_bootup_tracer);
9550         default_bootup_tracer = NULL;
9551
9552         return 0;
9553 }
9554
9555 fs_initcall(tracer_init_tracefs);
9556 late_initcall_sync(clear_boot_tracer);
9557
9558 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9559 __init static int tracing_set_default_clock(void)
9560 {
9561         /* sched_clock_stable() is determined in late_initcall */
9562         if (!trace_boot_clock && !sched_clock_stable()) {
9563                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9564                         pr_warn("Can not set tracing clock due to lockdown\n");
9565                         return -EPERM;
9566                 }
9567
9568                 printk(KERN_WARNING
9569                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9570                        "If you want to keep using the local clock, then add:\n"
9571                        "  \"trace_clock=local\"\n"
9572                        "on the kernel command line\n");
9573                 tracing_set_clock(&global_trace, "global");
9574         }
9575
9576         return 0;
9577 }
9578 late_initcall_sync(tracing_set_default_clock);
9579 #endif