trace: remove tracing_pipe_buf_ops
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
951 {
952         struct tracer *tracer = tr->current_trace;
953         unsigned long flags;
954
955         if (in_nmi()) {
956                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
957                 internal_trace_puts("*** snapshot is being ignored        ***\n");
958                 return;
959         }
960
961         if (!tr->allocated_snapshot) {
962                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
963                 internal_trace_puts("*** stopping trace here!   ***\n");
964                 tracing_off();
965                 return;
966         }
967
968         /* Note, snapshot can not be used when the tracer uses it */
969         if (tracer->use_max_tr) {
970                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
971                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
972                 return;
973         }
974
975         local_irq_save(flags);
976         update_max_tr(tr, current, smp_processor_id(), cond_data);
977         local_irq_restore(flags);
978 }
979
980 void tracing_snapshot_instance(struct trace_array *tr)
981 {
982         tracing_snapshot_instance_cond(tr, NULL);
983 }
984
985 /**
986  * tracing_snapshot - take a snapshot of the current buffer.
987  *
988  * This causes a swap between the snapshot buffer and the current live
989  * tracing buffer. You can use this to take snapshots of the live
990  * trace when some condition is triggered, but continue to trace.
991  *
992  * Note, make sure to allocate the snapshot with either
993  * a tracing_snapshot_alloc(), or by doing it manually
994  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
995  *
996  * If the snapshot buffer is not allocated, it will stop tracing.
997  * Basically making a permanent snapshot.
998  */
999 void tracing_snapshot(void)
1000 {
1001         struct trace_array *tr = &global_trace;
1002
1003         tracing_snapshot_instance(tr);
1004 }
1005 EXPORT_SYMBOL_GPL(tracing_snapshot);
1006
1007 /**
1008  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1009  * @tr:         The tracing instance to snapshot
1010  * @cond_data:  The data to be tested conditionally, and possibly saved
1011  *
1012  * This is the same as tracing_snapshot() except that the snapshot is
1013  * conditional - the snapshot will only happen if the
1014  * cond_snapshot.update() implementation receiving the cond_data
1015  * returns true, which means that the trace array's cond_snapshot
1016  * update() operation used the cond_data to determine whether the
1017  * snapshot should be taken, and if it was, presumably saved it along
1018  * with the snapshot.
1019  */
1020 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1021 {
1022         tracing_snapshot_instance_cond(tr, cond_data);
1023 }
1024 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1025
1026 /**
1027  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1028  * @tr:         The tracing instance
1029  *
1030  * When the user enables a conditional snapshot using
1031  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1032  * with the snapshot.  This accessor is used to retrieve it.
1033  *
1034  * Should not be called from cond_snapshot.update(), since it takes
1035  * the tr->max_lock lock, which the code calling
1036  * cond_snapshot.update() has already done.
1037  *
1038  * Returns the cond_data associated with the trace array's snapshot.
1039  */
1040 void *tracing_cond_snapshot_data(struct trace_array *tr)
1041 {
1042         void *cond_data = NULL;
1043
1044         arch_spin_lock(&tr->max_lock);
1045
1046         if (tr->cond_snapshot)
1047                 cond_data = tr->cond_snapshot->cond_data;
1048
1049         arch_spin_unlock(&tr->max_lock);
1050
1051         return cond_data;
1052 }
1053 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1054
1055 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1056                                         struct array_buffer *size_buf, int cpu_id);
1057 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1058
1059 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1060 {
1061         int ret;
1062
1063         if (!tr->allocated_snapshot) {
1064
1065                 /* allocate spare buffer */
1066                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1067                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1068                 if (ret < 0)
1069                         return ret;
1070
1071                 tr->allocated_snapshot = true;
1072         }
1073
1074         return 0;
1075 }
1076
1077 static void free_snapshot(struct trace_array *tr)
1078 {
1079         /*
1080          * We don't free the ring buffer. instead, resize it because
1081          * The max_tr ring buffer has some state (e.g. ring->clock) and
1082          * we want preserve it.
1083          */
1084         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1085         set_buffer_entries(&tr->max_buffer, 1);
1086         tracing_reset_online_cpus(&tr->max_buffer);
1087         tr->allocated_snapshot = false;
1088 }
1089
1090 /**
1091  * tracing_alloc_snapshot - allocate snapshot buffer.
1092  *
1093  * This only allocates the snapshot buffer if it isn't already
1094  * allocated - it doesn't also take a snapshot.
1095  *
1096  * This is meant to be used in cases where the snapshot buffer needs
1097  * to be set up for events that can't sleep but need to be able to
1098  * trigger a snapshot.
1099  */
1100 int tracing_alloc_snapshot(void)
1101 {
1102         struct trace_array *tr = &global_trace;
1103         int ret;
1104
1105         ret = tracing_alloc_snapshot_instance(tr);
1106         WARN_ON(ret < 0);
1107
1108         return ret;
1109 }
1110 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1111
1112 /**
1113  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1114  *
1115  * This is similar to tracing_snapshot(), but it will allocate the
1116  * snapshot buffer if it isn't already allocated. Use this only
1117  * where it is safe to sleep, as the allocation may sleep.
1118  *
1119  * This causes a swap between the snapshot buffer and the current live
1120  * tracing buffer. You can use this to take snapshots of the live
1121  * trace when some condition is triggered, but continue to trace.
1122  */
1123 void tracing_snapshot_alloc(void)
1124 {
1125         int ret;
1126
1127         ret = tracing_alloc_snapshot();
1128         if (ret < 0)
1129                 return;
1130
1131         tracing_snapshot();
1132 }
1133 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1134
1135 /**
1136  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1137  * @tr:         The tracing instance
1138  * @cond_data:  User data to associate with the snapshot
1139  * @update:     Implementation of the cond_snapshot update function
1140  *
1141  * Check whether the conditional snapshot for the given instance has
1142  * already been enabled, or if the current tracer is already using a
1143  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1144  * save the cond_data and update function inside.
1145  *
1146  * Returns 0 if successful, error otherwise.
1147  */
1148 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1149                                  cond_update_fn_t update)
1150 {
1151         struct cond_snapshot *cond_snapshot;
1152         int ret = 0;
1153
1154         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1155         if (!cond_snapshot)
1156                 return -ENOMEM;
1157
1158         cond_snapshot->cond_data = cond_data;
1159         cond_snapshot->update = update;
1160
1161         mutex_lock(&trace_types_lock);
1162
1163         ret = tracing_alloc_snapshot_instance(tr);
1164         if (ret)
1165                 goto fail_unlock;
1166
1167         if (tr->current_trace->use_max_tr) {
1168                 ret = -EBUSY;
1169                 goto fail_unlock;
1170         }
1171
1172         /*
1173          * The cond_snapshot can only change to NULL without the
1174          * trace_types_lock. We don't care if we race with it going
1175          * to NULL, but we want to make sure that it's not set to
1176          * something other than NULL when we get here, which we can
1177          * do safely with only holding the trace_types_lock and not
1178          * having to take the max_lock.
1179          */
1180         if (tr->cond_snapshot) {
1181                 ret = -EBUSY;
1182                 goto fail_unlock;
1183         }
1184
1185         arch_spin_lock(&tr->max_lock);
1186         tr->cond_snapshot = cond_snapshot;
1187         arch_spin_unlock(&tr->max_lock);
1188
1189         mutex_unlock(&trace_types_lock);
1190
1191         return ret;
1192
1193  fail_unlock:
1194         mutex_unlock(&trace_types_lock);
1195         kfree(cond_snapshot);
1196         return ret;
1197 }
1198 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1199
1200 /**
1201  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1202  * @tr:         The tracing instance
1203  *
1204  * Check whether the conditional snapshot for the given instance is
1205  * enabled; if so, free the cond_snapshot associated with it,
1206  * otherwise return -EINVAL.
1207  *
1208  * Returns 0 if successful, error otherwise.
1209  */
1210 int tracing_snapshot_cond_disable(struct trace_array *tr)
1211 {
1212         int ret = 0;
1213
1214         arch_spin_lock(&tr->max_lock);
1215
1216         if (!tr->cond_snapshot)
1217                 ret = -EINVAL;
1218         else {
1219                 kfree(tr->cond_snapshot);
1220                 tr->cond_snapshot = NULL;
1221         }
1222
1223         arch_spin_unlock(&tr->max_lock);
1224
1225         return ret;
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1228 #else
1229 void tracing_snapshot(void)
1230 {
1231         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1232 }
1233 EXPORT_SYMBOL_GPL(tracing_snapshot);
1234 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1235 {
1236         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1237 }
1238 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1239 int tracing_alloc_snapshot(void)
1240 {
1241         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1242         return -ENODEV;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245 void tracing_snapshot_alloc(void)
1246 {
1247         /* Give warning */
1248         tracing_snapshot();
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1251 void *tracing_cond_snapshot_data(struct trace_array *tr)
1252 {
1253         return NULL;
1254 }
1255 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1256 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1257 {
1258         return -ENODEV;
1259 }
1260 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1261 int tracing_snapshot_cond_disable(struct trace_array *tr)
1262 {
1263         return false;
1264 }
1265 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1266 #endif /* CONFIG_TRACER_SNAPSHOT */
1267
1268 void tracer_tracing_off(struct trace_array *tr)
1269 {
1270         if (tr->array_buffer.buffer)
1271                 ring_buffer_record_off(tr->array_buffer.buffer);
1272         /*
1273          * This flag is looked at when buffers haven't been allocated
1274          * yet, or by some tracers (like irqsoff), that just want to
1275          * know if the ring buffer has been disabled, but it can handle
1276          * races of where it gets disabled but we still do a record.
1277          * As the check is in the fast path of the tracers, it is more
1278          * important to be fast than accurate.
1279          */
1280         tr->buffer_disabled = 1;
1281         /* Make the flag seen by readers */
1282         smp_wmb();
1283 }
1284
1285 /**
1286  * tracing_off - turn off tracing buffers
1287  *
1288  * This function stops the tracing buffers from recording data.
1289  * It does not disable any overhead the tracers themselves may
1290  * be causing. This function simply causes all recording to
1291  * the ring buffers to fail.
1292  */
1293 void tracing_off(void)
1294 {
1295         tracer_tracing_off(&global_trace);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_off);
1298
1299 void disable_trace_on_warning(void)
1300 {
1301         if (__disable_trace_on_warning)
1302                 tracing_off();
1303 }
1304
1305 /**
1306  * tracer_tracing_is_on - show real state of ring buffer enabled
1307  * @tr : the trace array to know if ring buffer is enabled
1308  *
1309  * Shows real state of the ring buffer if it is enabled or not.
1310  */
1311 bool tracer_tracing_is_on(struct trace_array *tr)
1312 {
1313         if (tr->array_buffer.buffer)
1314                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1315         return !tr->buffer_disabled;
1316 }
1317
1318 /**
1319  * tracing_is_on - show state of ring buffers enabled
1320  */
1321 int tracing_is_on(void)
1322 {
1323         return tracer_tracing_is_on(&global_trace);
1324 }
1325 EXPORT_SYMBOL_GPL(tracing_is_on);
1326
1327 static int __init set_buf_size(char *str)
1328 {
1329         unsigned long buf_size;
1330
1331         if (!str)
1332                 return 0;
1333         buf_size = memparse(str, &str);
1334         /* nr_entries can not be zero */
1335         if (buf_size == 0)
1336                 return 0;
1337         trace_buf_size = buf_size;
1338         return 1;
1339 }
1340 __setup("trace_buf_size=", set_buf_size);
1341
1342 static int __init set_tracing_thresh(char *str)
1343 {
1344         unsigned long threshold;
1345         int ret;
1346
1347         if (!str)
1348                 return 0;
1349         ret = kstrtoul(str, 0, &threshold);
1350         if (ret < 0)
1351                 return 0;
1352         tracing_thresh = threshold * 1000;
1353         return 1;
1354 }
1355 __setup("tracing_thresh=", set_tracing_thresh);
1356
1357 unsigned long nsecs_to_usecs(unsigned long nsecs)
1358 {
1359         return nsecs / 1000;
1360 }
1361
1362 /*
1363  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1364  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1365  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1366  * of strings in the order that the evals (enum) were defined.
1367  */
1368 #undef C
1369 #define C(a, b) b
1370
1371 /* These must match the bit postions in trace_iterator_flags */
1372 static const char *trace_options[] = {
1373         TRACE_FLAGS
1374         NULL
1375 };
1376
1377 static struct {
1378         u64 (*func)(void);
1379         const char *name;
1380         int in_ns;              /* is this clock in nanoseconds? */
1381 } trace_clocks[] = {
1382         { trace_clock_local,            "local",        1 },
1383         { trace_clock_global,           "global",       1 },
1384         { trace_clock_counter,          "counter",      0 },
1385         { trace_clock_jiffies,          "uptime",       0 },
1386         { trace_clock,                  "perf",         1 },
1387         { ktime_get_mono_fast_ns,       "mono",         1 },
1388         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1389         { ktime_get_boot_fast_ns,       "boot",         1 },
1390         ARCH_TRACE_CLOCKS
1391 };
1392
1393 bool trace_clock_in_ns(struct trace_array *tr)
1394 {
1395         if (trace_clocks[tr->clock_id].in_ns)
1396                 return true;
1397
1398         return false;
1399 }
1400
1401 /*
1402  * trace_parser_get_init - gets the buffer for trace parser
1403  */
1404 int trace_parser_get_init(struct trace_parser *parser, int size)
1405 {
1406         memset(parser, 0, sizeof(*parser));
1407
1408         parser->buffer = kmalloc(size, GFP_KERNEL);
1409         if (!parser->buffer)
1410                 return 1;
1411
1412         parser->size = size;
1413         return 0;
1414 }
1415
1416 /*
1417  * trace_parser_put - frees the buffer for trace parser
1418  */
1419 void trace_parser_put(struct trace_parser *parser)
1420 {
1421         kfree(parser->buffer);
1422         parser->buffer = NULL;
1423 }
1424
1425 /*
1426  * trace_get_user - reads the user input string separated by  space
1427  * (matched by isspace(ch))
1428  *
1429  * For each string found the 'struct trace_parser' is updated,
1430  * and the function returns.
1431  *
1432  * Returns number of bytes read.
1433  *
1434  * See kernel/trace/trace.h for 'struct trace_parser' details.
1435  */
1436 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1437         size_t cnt, loff_t *ppos)
1438 {
1439         char ch;
1440         size_t read = 0;
1441         ssize_t ret;
1442
1443         if (!*ppos)
1444                 trace_parser_clear(parser);
1445
1446         ret = get_user(ch, ubuf++);
1447         if (ret)
1448                 goto out;
1449
1450         read++;
1451         cnt--;
1452
1453         /*
1454          * The parser is not finished with the last write,
1455          * continue reading the user input without skipping spaces.
1456          */
1457         if (!parser->cont) {
1458                 /* skip white space */
1459                 while (cnt && isspace(ch)) {
1460                         ret = get_user(ch, ubuf++);
1461                         if (ret)
1462                                 goto out;
1463                         read++;
1464                         cnt--;
1465                 }
1466
1467                 parser->idx = 0;
1468
1469                 /* only spaces were written */
1470                 if (isspace(ch) || !ch) {
1471                         *ppos += read;
1472                         ret = read;
1473                         goto out;
1474                 }
1475         }
1476
1477         /* read the non-space input */
1478         while (cnt && !isspace(ch) && ch) {
1479                 if (parser->idx < parser->size - 1)
1480                         parser->buffer[parser->idx++] = ch;
1481                 else {
1482                         ret = -EINVAL;
1483                         goto out;
1484                 }
1485                 ret = get_user(ch, ubuf++);
1486                 if (ret)
1487                         goto out;
1488                 read++;
1489                 cnt--;
1490         }
1491
1492         /* We either got finished input or we have to wait for another call. */
1493         if (isspace(ch) || !ch) {
1494                 parser->buffer[parser->idx] = 0;
1495                 parser->cont = false;
1496         } else if (parser->idx < parser->size - 1) {
1497                 parser->cont = true;
1498                 parser->buffer[parser->idx++] = ch;
1499                 /* Make sure the parsed string always terminates with '\0'. */
1500                 parser->buffer[parser->idx] = 0;
1501         } else {
1502                 ret = -EINVAL;
1503                 goto out;
1504         }
1505
1506         *ppos += read;
1507         ret = read;
1508
1509 out:
1510         return ret;
1511 }
1512
1513 /* TODO add a seq_buf_to_buffer() */
1514 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1515 {
1516         int len;
1517
1518         if (trace_seq_used(s) <= s->seq.readpos)
1519                 return -EBUSY;
1520
1521         len = trace_seq_used(s) - s->seq.readpos;
1522         if (cnt > len)
1523                 cnt = len;
1524         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1525
1526         s->seq.readpos += cnt;
1527         return cnt;
1528 }
1529
1530 unsigned long __read_mostly     tracing_thresh;
1531 static const struct file_operations tracing_max_lat_fops;
1532
1533 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1534         defined(CONFIG_FSNOTIFY)
1535
1536 static struct workqueue_struct *fsnotify_wq;
1537
1538 static void latency_fsnotify_workfn(struct work_struct *work)
1539 {
1540         struct trace_array *tr = container_of(work, struct trace_array,
1541                                               fsnotify_work);
1542         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1543                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1544 }
1545
1546 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1547 {
1548         struct trace_array *tr = container_of(iwork, struct trace_array,
1549                                               fsnotify_irqwork);
1550         queue_work(fsnotify_wq, &tr->fsnotify_work);
1551 }
1552
1553 static void trace_create_maxlat_file(struct trace_array *tr,
1554                                      struct dentry *d_tracer)
1555 {
1556         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1557         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1558         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1559                                               d_tracer, &tr->max_latency,
1560                                               &tracing_max_lat_fops);
1561 }
1562
1563 __init static int latency_fsnotify_init(void)
1564 {
1565         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1566                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1567         if (!fsnotify_wq) {
1568                 pr_err("Unable to allocate tr_max_lat_wq\n");
1569                 return -ENOMEM;
1570         }
1571         return 0;
1572 }
1573
1574 late_initcall_sync(latency_fsnotify_init);
1575
1576 void latency_fsnotify(struct trace_array *tr)
1577 {
1578         if (!fsnotify_wq)
1579                 return;
1580         /*
1581          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1582          * possible that we are called from __schedule() or do_idle(), which
1583          * could cause a deadlock.
1584          */
1585         irq_work_queue(&tr->fsnotify_irqwork);
1586 }
1587
1588 /*
1589  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1590  *  defined(CONFIG_FSNOTIFY)
1591  */
1592 #else
1593
1594 #define trace_create_maxlat_file(tr, d_tracer)                          \
1595         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1596                           &tr->max_latency, &tracing_max_lat_fops)
1597
1598 #endif
1599
1600 #ifdef CONFIG_TRACER_MAX_TRACE
1601 /*
1602  * Copy the new maximum trace into the separate maximum-trace
1603  * structure. (this way the maximum trace is permanently saved,
1604  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1605  */
1606 static void
1607 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1608 {
1609         struct array_buffer *trace_buf = &tr->array_buffer;
1610         struct array_buffer *max_buf = &tr->max_buffer;
1611         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1612         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1613
1614         max_buf->cpu = cpu;
1615         max_buf->time_start = data->preempt_timestamp;
1616
1617         max_data->saved_latency = tr->max_latency;
1618         max_data->critical_start = data->critical_start;
1619         max_data->critical_end = data->critical_end;
1620
1621         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1622         max_data->pid = tsk->pid;
1623         /*
1624          * If tsk == current, then use current_uid(), as that does not use
1625          * RCU. The irq tracer can be called out of RCU scope.
1626          */
1627         if (tsk == current)
1628                 max_data->uid = current_uid();
1629         else
1630                 max_data->uid = task_uid(tsk);
1631
1632         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1633         max_data->policy = tsk->policy;
1634         max_data->rt_priority = tsk->rt_priority;
1635
1636         /* record this tasks comm */
1637         tracing_record_cmdline(tsk);
1638         latency_fsnotify(tr);
1639 }
1640
1641 /**
1642  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1643  * @tr: tracer
1644  * @tsk: the task with the latency
1645  * @cpu: The cpu that initiated the trace.
1646  * @cond_data: User data associated with a conditional snapshot
1647  *
1648  * Flip the buffers between the @tr and the max_tr and record information
1649  * about which task was the cause of this latency.
1650  */
1651 void
1652 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1653               void *cond_data)
1654 {
1655         if (tr->stop_count)
1656                 return;
1657
1658         WARN_ON_ONCE(!irqs_disabled());
1659
1660         if (!tr->allocated_snapshot) {
1661                 /* Only the nop tracer should hit this when disabling */
1662                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1663                 return;
1664         }
1665
1666         arch_spin_lock(&tr->max_lock);
1667
1668         /* Inherit the recordable setting from array_buffer */
1669         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1670                 ring_buffer_record_on(tr->max_buffer.buffer);
1671         else
1672                 ring_buffer_record_off(tr->max_buffer.buffer);
1673
1674 #ifdef CONFIG_TRACER_SNAPSHOT
1675         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1676                 goto out_unlock;
1677 #endif
1678         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1679
1680         __update_max_tr(tr, tsk, cpu);
1681
1682  out_unlock:
1683         arch_spin_unlock(&tr->max_lock);
1684 }
1685
1686 /**
1687  * update_max_tr_single - only copy one trace over, and reset the rest
1688  * @tr: tracer
1689  * @tsk: task with the latency
1690  * @cpu: the cpu of the buffer to copy.
1691  *
1692  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1693  */
1694 void
1695 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1696 {
1697         int ret;
1698
1699         if (tr->stop_count)
1700                 return;
1701
1702         WARN_ON_ONCE(!irqs_disabled());
1703         if (!tr->allocated_snapshot) {
1704                 /* Only the nop tracer should hit this when disabling */
1705                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1706                 return;
1707         }
1708
1709         arch_spin_lock(&tr->max_lock);
1710
1711         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1712
1713         if (ret == -EBUSY) {
1714                 /*
1715                  * We failed to swap the buffer due to a commit taking
1716                  * place on this CPU. We fail to record, but we reset
1717                  * the max trace buffer (no one writes directly to it)
1718                  * and flag that it failed.
1719                  */
1720                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1721                         "Failed to swap buffers due to commit in progress\n");
1722         }
1723
1724         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1725
1726         __update_max_tr(tr, tsk, cpu);
1727         arch_spin_unlock(&tr->max_lock);
1728 }
1729 #endif /* CONFIG_TRACER_MAX_TRACE */
1730
1731 static int wait_on_pipe(struct trace_iterator *iter, int full)
1732 {
1733         /* Iterators are static, they should be filled or empty */
1734         if (trace_buffer_iter(iter, iter->cpu_file))
1735                 return 0;
1736
1737         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1738                                 full);
1739 }
1740
1741 #ifdef CONFIG_FTRACE_STARTUP_TEST
1742 static bool selftests_can_run;
1743
1744 struct trace_selftests {
1745         struct list_head                list;
1746         struct tracer                   *type;
1747 };
1748
1749 static LIST_HEAD(postponed_selftests);
1750
1751 static int save_selftest(struct tracer *type)
1752 {
1753         struct trace_selftests *selftest;
1754
1755         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1756         if (!selftest)
1757                 return -ENOMEM;
1758
1759         selftest->type = type;
1760         list_add(&selftest->list, &postponed_selftests);
1761         return 0;
1762 }
1763
1764 static int run_tracer_selftest(struct tracer *type)
1765 {
1766         struct trace_array *tr = &global_trace;
1767         struct tracer *saved_tracer = tr->current_trace;
1768         int ret;
1769
1770         if (!type->selftest || tracing_selftest_disabled)
1771                 return 0;
1772
1773         /*
1774          * If a tracer registers early in boot up (before scheduling is
1775          * initialized and such), then do not run its selftests yet.
1776          * Instead, run it a little later in the boot process.
1777          */
1778         if (!selftests_can_run)
1779                 return save_selftest(type);
1780
1781         /*
1782          * Run a selftest on this tracer.
1783          * Here we reset the trace buffer, and set the current
1784          * tracer to be this tracer. The tracer can then run some
1785          * internal tracing to verify that everything is in order.
1786          * If we fail, we do not register this tracer.
1787          */
1788         tracing_reset_online_cpus(&tr->array_buffer);
1789
1790         tr->current_trace = type;
1791
1792 #ifdef CONFIG_TRACER_MAX_TRACE
1793         if (type->use_max_tr) {
1794                 /* If we expanded the buffers, make sure the max is expanded too */
1795                 if (ring_buffer_expanded)
1796                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1797                                            RING_BUFFER_ALL_CPUS);
1798                 tr->allocated_snapshot = true;
1799         }
1800 #endif
1801
1802         /* the test is responsible for initializing and enabling */
1803         pr_info("Testing tracer %s: ", type->name);
1804         ret = type->selftest(type, tr);
1805         /* the test is responsible for resetting too */
1806         tr->current_trace = saved_tracer;
1807         if (ret) {
1808                 printk(KERN_CONT "FAILED!\n");
1809                 /* Add the warning after printing 'FAILED' */
1810                 WARN_ON(1);
1811                 return -1;
1812         }
1813         /* Only reset on passing, to avoid touching corrupted buffers */
1814         tracing_reset_online_cpus(&tr->array_buffer);
1815
1816 #ifdef CONFIG_TRACER_MAX_TRACE
1817         if (type->use_max_tr) {
1818                 tr->allocated_snapshot = false;
1819
1820                 /* Shrink the max buffer again */
1821                 if (ring_buffer_expanded)
1822                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1823                                            RING_BUFFER_ALL_CPUS);
1824         }
1825 #endif
1826
1827         printk(KERN_CONT "PASSED\n");
1828         return 0;
1829 }
1830
1831 static __init int init_trace_selftests(void)
1832 {
1833         struct trace_selftests *p, *n;
1834         struct tracer *t, **last;
1835         int ret;
1836
1837         selftests_can_run = true;
1838
1839         mutex_lock(&trace_types_lock);
1840
1841         if (list_empty(&postponed_selftests))
1842                 goto out;
1843
1844         pr_info("Running postponed tracer tests:\n");
1845
1846         tracing_selftest_running = true;
1847         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1848                 /* This loop can take minutes when sanitizers are enabled, so
1849                  * lets make sure we allow RCU processing.
1850                  */
1851                 cond_resched();
1852                 ret = run_tracer_selftest(p->type);
1853                 /* If the test fails, then warn and remove from available_tracers */
1854                 if (ret < 0) {
1855                         WARN(1, "tracer: %s failed selftest, disabling\n",
1856                              p->type->name);
1857                         last = &trace_types;
1858                         for (t = trace_types; t; t = t->next) {
1859                                 if (t == p->type) {
1860                                         *last = t->next;
1861                                         break;
1862                                 }
1863                                 last = &t->next;
1864                         }
1865                 }
1866                 list_del(&p->list);
1867                 kfree(p);
1868         }
1869         tracing_selftest_running = false;
1870
1871  out:
1872         mutex_unlock(&trace_types_lock);
1873
1874         return 0;
1875 }
1876 core_initcall(init_trace_selftests);
1877 #else
1878 static inline int run_tracer_selftest(struct tracer *type)
1879 {
1880         return 0;
1881 }
1882 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1883
1884 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1885
1886 static void __init apply_trace_boot_options(void);
1887
1888 /**
1889  * register_tracer - register a tracer with the ftrace system.
1890  * @type: the plugin for the tracer
1891  *
1892  * Register a new plugin tracer.
1893  */
1894 int __init register_tracer(struct tracer *type)
1895 {
1896         struct tracer *t;
1897         int ret = 0;
1898
1899         if (!type->name) {
1900                 pr_info("Tracer must have a name\n");
1901                 return -1;
1902         }
1903
1904         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1905                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1906                 return -1;
1907         }
1908
1909         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1910                 pr_warn("Can not register tracer %s due to lockdown\n",
1911                            type->name);
1912                 return -EPERM;
1913         }
1914
1915         mutex_lock(&trace_types_lock);
1916
1917         tracing_selftest_running = true;
1918
1919         for (t = trace_types; t; t = t->next) {
1920                 if (strcmp(type->name, t->name) == 0) {
1921                         /* already found */
1922                         pr_info("Tracer %s already registered\n",
1923                                 type->name);
1924                         ret = -1;
1925                         goto out;
1926                 }
1927         }
1928
1929         if (!type->set_flag)
1930                 type->set_flag = &dummy_set_flag;
1931         if (!type->flags) {
1932                 /*allocate a dummy tracer_flags*/
1933                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1934                 if (!type->flags) {
1935                         ret = -ENOMEM;
1936                         goto out;
1937                 }
1938                 type->flags->val = 0;
1939                 type->flags->opts = dummy_tracer_opt;
1940         } else
1941                 if (!type->flags->opts)
1942                         type->flags->opts = dummy_tracer_opt;
1943
1944         /* store the tracer for __set_tracer_option */
1945         type->flags->trace = type;
1946
1947         ret = run_tracer_selftest(type);
1948         if (ret < 0)
1949                 goto out;
1950
1951         type->next = trace_types;
1952         trace_types = type;
1953         add_tracer_options(&global_trace, type);
1954
1955  out:
1956         tracing_selftest_running = false;
1957         mutex_unlock(&trace_types_lock);
1958
1959         if (ret || !default_bootup_tracer)
1960                 goto out_unlock;
1961
1962         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1963                 goto out_unlock;
1964
1965         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1966         /* Do we want this tracer to start on bootup? */
1967         tracing_set_tracer(&global_trace, type->name);
1968         default_bootup_tracer = NULL;
1969
1970         apply_trace_boot_options();
1971
1972         /* disable other selftests, since this will break it. */
1973         tracing_selftest_disabled = true;
1974 #ifdef CONFIG_FTRACE_STARTUP_TEST
1975         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1976                type->name);
1977 #endif
1978
1979  out_unlock:
1980         return ret;
1981 }
1982
1983 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1984 {
1985         struct trace_buffer *buffer = buf->buffer;
1986
1987         if (!buffer)
1988                 return;
1989
1990         ring_buffer_record_disable(buffer);
1991
1992         /* Make sure all commits have finished */
1993         synchronize_rcu();
1994         ring_buffer_reset_cpu(buffer, cpu);
1995
1996         ring_buffer_record_enable(buffer);
1997 }
1998
1999 void tracing_reset_online_cpus(struct array_buffer *buf)
2000 {
2001         struct trace_buffer *buffer = buf->buffer;
2002         int cpu;
2003
2004         if (!buffer)
2005                 return;
2006
2007         ring_buffer_record_disable(buffer);
2008
2009         /* Make sure all commits have finished */
2010         synchronize_rcu();
2011
2012         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2013
2014         for_each_online_cpu(cpu)
2015                 ring_buffer_reset_cpu(buffer, cpu);
2016
2017         ring_buffer_record_enable(buffer);
2018 }
2019
2020 /* Must have trace_types_lock held */
2021 void tracing_reset_all_online_cpus(void)
2022 {
2023         struct trace_array *tr;
2024
2025         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2026                 if (!tr->clear_trace)
2027                         continue;
2028                 tr->clear_trace = false;
2029                 tracing_reset_online_cpus(&tr->array_buffer);
2030 #ifdef CONFIG_TRACER_MAX_TRACE
2031                 tracing_reset_online_cpus(&tr->max_buffer);
2032 #endif
2033         }
2034 }
2035
2036 static int *tgid_map;
2037
2038 #define SAVED_CMDLINES_DEFAULT 128
2039 #define NO_CMDLINE_MAP UINT_MAX
2040 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2041 struct saved_cmdlines_buffer {
2042         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2043         unsigned *map_cmdline_to_pid;
2044         unsigned cmdline_num;
2045         int cmdline_idx;
2046         char *saved_cmdlines;
2047 };
2048 static struct saved_cmdlines_buffer *savedcmd;
2049
2050 /* temporary disable recording */
2051 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2052
2053 static inline char *get_saved_cmdlines(int idx)
2054 {
2055         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2056 }
2057
2058 static inline void set_cmdline(int idx, const char *cmdline)
2059 {
2060         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2061 }
2062
2063 static int allocate_cmdlines_buffer(unsigned int val,
2064                                     struct saved_cmdlines_buffer *s)
2065 {
2066         s->map_cmdline_to_pid = kmalloc_array(val,
2067                                               sizeof(*s->map_cmdline_to_pid),
2068                                               GFP_KERNEL);
2069         if (!s->map_cmdline_to_pid)
2070                 return -ENOMEM;
2071
2072         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2073         if (!s->saved_cmdlines) {
2074                 kfree(s->map_cmdline_to_pid);
2075                 return -ENOMEM;
2076         }
2077
2078         s->cmdline_idx = 0;
2079         s->cmdline_num = val;
2080         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2081                sizeof(s->map_pid_to_cmdline));
2082         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2083                val * sizeof(*s->map_cmdline_to_pid));
2084
2085         return 0;
2086 }
2087
2088 static int trace_create_savedcmd(void)
2089 {
2090         int ret;
2091
2092         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2093         if (!savedcmd)
2094                 return -ENOMEM;
2095
2096         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2097         if (ret < 0) {
2098                 kfree(savedcmd);
2099                 savedcmd = NULL;
2100                 return -ENOMEM;
2101         }
2102
2103         return 0;
2104 }
2105
2106 int is_tracing_stopped(void)
2107 {
2108         return global_trace.stop_count;
2109 }
2110
2111 /**
2112  * tracing_start - quick start of the tracer
2113  *
2114  * If tracing is enabled but was stopped by tracing_stop,
2115  * this will start the tracer back up.
2116  */
2117 void tracing_start(void)
2118 {
2119         struct trace_buffer *buffer;
2120         unsigned long flags;
2121
2122         if (tracing_disabled)
2123                 return;
2124
2125         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2126         if (--global_trace.stop_count) {
2127                 if (global_trace.stop_count < 0) {
2128                         /* Someone screwed up their debugging */
2129                         WARN_ON_ONCE(1);
2130                         global_trace.stop_count = 0;
2131                 }
2132                 goto out;
2133         }
2134
2135         /* Prevent the buffers from switching */
2136         arch_spin_lock(&global_trace.max_lock);
2137
2138         buffer = global_trace.array_buffer.buffer;
2139         if (buffer)
2140                 ring_buffer_record_enable(buffer);
2141
2142 #ifdef CONFIG_TRACER_MAX_TRACE
2143         buffer = global_trace.max_buffer.buffer;
2144         if (buffer)
2145                 ring_buffer_record_enable(buffer);
2146 #endif
2147
2148         arch_spin_unlock(&global_trace.max_lock);
2149
2150  out:
2151         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2152 }
2153
2154 static void tracing_start_tr(struct trace_array *tr)
2155 {
2156         struct trace_buffer *buffer;
2157         unsigned long flags;
2158
2159         if (tracing_disabled)
2160                 return;
2161
2162         /* If global, we need to also start the max tracer */
2163         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2164                 return tracing_start();
2165
2166         raw_spin_lock_irqsave(&tr->start_lock, flags);
2167
2168         if (--tr->stop_count) {
2169                 if (tr->stop_count < 0) {
2170                         /* Someone screwed up their debugging */
2171                         WARN_ON_ONCE(1);
2172                         tr->stop_count = 0;
2173                 }
2174                 goto out;
2175         }
2176
2177         buffer = tr->array_buffer.buffer;
2178         if (buffer)
2179                 ring_buffer_record_enable(buffer);
2180
2181  out:
2182         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2183 }
2184
2185 /**
2186  * tracing_stop - quick stop of the tracer
2187  *
2188  * Light weight way to stop tracing. Use in conjunction with
2189  * tracing_start.
2190  */
2191 void tracing_stop(void)
2192 {
2193         struct trace_buffer *buffer;
2194         unsigned long flags;
2195
2196         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2197         if (global_trace.stop_count++)
2198                 goto out;
2199
2200         /* Prevent the buffers from switching */
2201         arch_spin_lock(&global_trace.max_lock);
2202
2203         buffer = global_trace.array_buffer.buffer;
2204         if (buffer)
2205                 ring_buffer_record_disable(buffer);
2206
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208         buffer = global_trace.max_buffer.buffer;
2209         if (buffer)
2210                 ring_buffer_record_disable(buffer);
2211 #endif
2212
2213         arch_spin_unlock(&global_trace.max_lock);
2214
2215  out:
2216         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2217 }
2218
2219 static void tracing_stop_tr(struct trace_array *tr)
2220 {
2221         struct trace_buffer *buffer;
2222         unsigned long flags;
2223
2224         /* If global, we need to also stop the max tracer */
2225         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2226                 return tracing_stop();
2227
2228         raw_spin_lock_irqsave(&tr->start_lock, flags);
2229         if (tr->stop_count++)
2230                 goto out;
2231
2232         buffer = tr->array_buffer.buffer;
2233         if (buffer)
2234                 ring_buffer_record_disable(buffer);
2235
2236  out:
2237         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2238 }
2239
2240 static int trace_save_cmdline(struct task_struct *tsk)
2241 {
2242         unsigned pid, idx;
2243
2244         /* treat recording of idle task as a success */
2245         if (!tsk->pid)
2246                 return 1;
2247
2248         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2249                 return 0;
2250
2251         /*
2252          * It's not the end of the world if we don't get
2253          * the lock, but we also don't want to spin
2254          * nor do we want to disable interrupts,
2255          * so if we miss here, then better luck next time.
2256          */
2257         if (!arch_spin_trylock(&trace_cmdline_lock))
2258                 return 0;
2259
2260         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2261         if (idx == NO_CMDLINE_MAP) {
2262                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2263
2264                 /*
2265                  * Check whether the cmdline buffer at idx has a pid
2266                  * mapped. We are going to overwrite that entry so we
2267                  * need to clear the map_pid_to_cmdline. Otherwise we
2268                  * would read the new comm for the old pid.
2269                  */
2270                 pid = savedcmd->map_cmdline_to_pid[idx];
2271                 if (pid != NO_CMDLINE_MAP)
2272                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2273
2274                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2275                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2276
2277                 savedcmd->cmdline_idx = idx;
2278         }
2279
2280         set_cmdline(idx, tsk->comm);
2281
2282         arch_spin_unlock(&trace_cmdline_lock);
2283
2284         return 1;
2285 }
2286
2287 static void __trace_find_cmdline(int pid, char comm[])
2288 {
2289         unsigned map;
2290
2291         if (!pid) {
2292                 strcpy(comm, "<idle>");
2293                 return;
2294         }
2295
2296         if (WARN_ON_ONCE(pid < 0)) {
2297                 strcpy(comm, "<XXX>");
2298                 return;
2299         }
2300
2301         if (pid > PID_MAX_DEFAULT) {
2302                 strcpy(comm, "<...>");
2303                 return;
2304         }
2305
2306         map = savedcmd->map_pid_to_cmdline[pid];
2307         if (map != NO_CMDLINE_MAP)
2308                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2309         else
2310                 strcpy(comm, "<...>");
2311 }
2312
2313 void trace_find_cmdline(int pid, char comm[])
2314 {
2315         preempt_disable();
2316         arch_spin_lock(&trace_cmdline_lock);
2317
2318         __trace_find_cmdline(pid, comm);
2319
2320         arch_spin_unlock(&trace_cmdline_lock);
2321         preempt_enable();
2322 }
2323
2324 int trace_find_tgid(int pid)
2325 {
2326         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2327                 return 0;
2328
2329         return tgid_map[pid];
2330 }
2331
2332 static int trace_save_tgid(struct task_struct *tsk)
2333 {
2334         /* treat recording of idle task as a success */
2335         if (!tsk->pid)
2336                 return 1;
2337
2338         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2339                 return 0;
2340
2341         tgid_map[tsk->pid] = tsk->tgid;
2342         return 1;
2343 }
2344
2345 static bool tracing_record_taskinfo_skip(int flags)
2346 {
2347         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2348                 return true;
2349         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2350                 return true;
2351         if (!__this_cpu_read(trace_taskinfo_save))
2352                 return true;
2353         return false;
2354 }
2355
2356 /**
2357  * tracing_record_taskinfo - record the task info of a task
2358  *
2359  * @task:  task to record
2360  * @flags: TRACE_RECORD_CMDLINE for recording comm
2361  *         TRACE_RECORD_TGID for recording tgid
2362  */
2363 void tracing_record_taskinfo(struct task_struct *task, int flags)
2364 {
2365         bool done;
2366
2367         if (tracing_record_taskinfo_skip(flags))
2368                 return;
2369
2370         /*
2371          * Record as much task information as possible. If some fail, continue
2372          * to try to record the others.
2373          */
2374         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2375         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2376
2377         /* If recording any information failed, retry again soon. */
2378         if (!done)
2379                 return;
2380
2381         __this_cpu_write(trace_taskinfo_save, false);
2382 }
2383
2384 /**
2385  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2386  *
2387  * @prev: previous task during sched_switch
2388  * @next: next task during sched_switch
2389  * @flags: TRACE_RECORD_CMDLINE for recording comm
2390  *         TRACE_RECORD_TGID for recording tgid
2391  */
2392 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2393                                           struct task_struct *next, int flags)
2394 {
2395         bool done;
2396
2397         if (tracing_record_taskinfo_skip(flags))
2398                 return;
2399
2400         /*
2401          * Record as much task information as possible. If some fail, continue
2402          * to try to record the others.
2403          */
2404         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2405         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2406         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2407         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2408
2409         /* If recording any information failed, retry again soon. */
2410         if (!done)
2411                 return;
2412
2413         __this_cpu_write(trace_taskinfo_save, false);
2414 }
2415
2416 /* Helpers to record a specific task information */
2417 void tracing_record_cmdline(struct task_struct *task)
2418 {
2419         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2420 }
2421
2422 void tracing_record_tgid(struct task_struct *task)
2423 {
2424         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2425 }
2426
2427 /*
2428  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2429  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2430  * simplifies those functions and keeps them in sync.
2431  */
2432 enum print_line_t trace_handle_return(struct trace_seq *s)
2433 {
2434         return trace_seq_has_overflowed(s) ?
2435                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2436 }
2437 EXPORT_SYMBOL_GPL(trace_handle_return);
2438
2439 void
2440 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2441                              unsigned long flags, int pc)
2442 {
2443         struct task_struct *tsk = current;
2444
2445         entry->preempt_count            = pc & 0xff;
2446         entry->pid                      = (tsk) ? tsk->pid : 0;
2447         entry->type                     = type;
2448         entry->flags =
2449 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2450                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2451 #else
2452                 TRACE_FLAG_IRQS_NOSUPPORT |
2453 #endif
2454                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2455                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2456                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2457                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2458                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2459 }
2460 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2461
2462 struct ring_buffer_event *
2463 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2464                           int type,
2465                           unsigned long len,
2466                           unsigned long flags, int pc)
2467 {
2468         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2469 }
2470
2471 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2472 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2473 static int trace_buffered_event_ref;
2474
2475 /**
2476  * trace_buffered_event_enable - enable buffering events
2477  *
2478  * When events are being filtered, it is quicker to use a temporary
2479  * buffer to write the event data into if there's a likely chance
2480  * that it will not be committed. The discard of the ring buffer
2481  * is not as fast as committing, and is much slower than copying
2482  * a commit.
2483  *
2484  * When an event is to be filtered, allocate per cpu buffers to
2485  * write the event data into, and if the event is filtered and discarded
2486  * it is simply dropped, otherwise, the entire data is to be committed
2487  * in one shot.
2488  */
2489 void trace_buffered_event_enable(void)
2490 {
2491         struct ring_buffer_event *event;
2492         struct page *page;
2493         int cpu;
2494
2495         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2496
2497         if (trace_buffered_event_ref++)
2498                 return;
2499
2500         for_each_tracing_cpu(cpu) {
2501                 page = alloc_pages_node(cpu_to_node(cpu),
2502                                         GFP_KERNEL | __GFP_NORETRY, 0);
2503                 if (!page)
2504                         goto failed;
2505
2506                 event = page_address(page);
2507                 memset(event, 0, sizeof(*event));
2508
2509                 per_cpu(trace_buffered_event, cpu) = event;
2510
2511                 preempt_disable();
2512                 if (cpu == smp_processor_id() &&
2513                     this_cpu_read(trace_buffered_event) !=
2514                     per_cpu(trace_buffered_event, cpu))
2515                         WARN_ON_ONCE(1);
2516                 preempt_enable();
2517         }
2518
2519         return;
2520  failed:
2521         trace_buffered_event_disable();
2522 }
2523
2524 static void enable_trace_buffered_event(void *data)
2525 {
2526         /* Probably not needed, but do it anyway */
2527         smp_rmb();
2528         this_cpu_dec(trace_buffered_event_cnt);
2529 }
2530
2531 static void disable_trace_buffered_event(void *data)
2532 {
2533         this_cpu_inc(trace_buffered_event_cnt);
2534 }
2535
2536 /**
2537  * trace_buffered_event_disable - disable buffering events
2538  *
2539  * When a filter is removed, it is faster to not use the buffered
2540  * events, and to commit directly into the ring buffer. Free up
2541  * the temp buffers when there are no more users. This requires
2542  * special synchronization with current events.
2543  */
2544 void trace_buffered_event_disable(void)
2545 {
2546         int cpu;
2547
2548         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2549
2550         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2551                 return;
2552
2553         if (--trace_buffered_event_ref)
2554                 return;
2555
2556         preempt_disable();
2557         /* For each CPU, set the buffer as used. */
2558         smp_call_function_many(tracing_buffer_mask,
2559                                disable_trace_buffered_event, NULL, 1);
2560         preempt_enable();
2561
2562         /* Wait for all current users to finish */
2563         synchronize_rcu();
2564
2565         for_each_tracing_cpu(cpu) {
2566                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2567                 per_cpu(trace_buffered_event, cpu) = NULL;
2568         }
2569         /*
2570          * Make sure trace_buffered_event is NULL before clearing
2571          * trace_buffered_event_cnt.
2572          */
2573         smp_wmb();
2574
2575         preempt_disable();
2576         /* Do the work on each cpu */
2577         smp_call_function_many(tracing_buffer_mask,
2578                                enable_trace_buffered_event, NULL, 1);
2579         preempt_enable();
2580 }
2581
2582 static struct trace_buffer *temp_buffer;
2583
2584 struct ring_buffer_event *
2585 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2586                           struct trace_event_file *trace_file,
2587                           int type, unsigned long len,
2588                           unsigned long flags, int pc)
2589 {
2590         struct ring_buffer_event *entry;
2591         int val;
2592
2593         *current_rb = trace_file->tr->array_buffer.buffer;
2594
2595         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2596              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2597             (entry = this_cpu_read(trace_buffered_event))) {
2598                 /* Try to use the per cpu buffer first */
2599                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2600                 if (val == 1) {
2601                         trace_event_setup(entry, type, flags, pc);
2602                         entry->array[0] = len;
2603                         return entry;
2604                 }
2605                 this_cpu_dec(trace_buffered_event_cnt);
2606         }
2607
2608         entry = __trace_buffer_lock_reserve(*current_rb,
2609                                             type, len, flags, pc);
2610         /*
2611          * If tracing is off, but we have triggers enabled
2612          * we still need to look at the event data. Use the temp_buffer
2613          * to store the trace event for the tigger to use. It's recusive
2614          * safe and will not be recorded anywhere.
2615          */
2616         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2617                 *current_rb = temp_buffer;
2618                 entry = __trace_buffer_lock_reserve(*current_rb,
2619                                                     type, len, flags, pc);
2620         }
2621         return entry;
2622 }
2623 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2624
2625 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2626 static DEFINE_MUTEX(tracepoint_printk_mutex);
2627
2628 static void output_printk(struct trace_event_buffer *fbuffer)
2629 {
2630         struct trace_event_call *event_call;
2631         struct trace_event_file *file;
2632         struct trace_event *event;
2633         unsigned long flags;
2634         struct trace_iterator *iter = tracepoint_print_iter;
2635
2636         /* We should never get here if iter is NULL */
2637         if (WARN_ON_ONCE(!iter))
2638                 return;
2639
2640         event_call = fbuffer->trace_file->event_call;
2641         if (!event_call || !event_call->event.funcs ||
2642             !event_call->event.funcs->trace)
2643                 return;
2644
2645         file = fbuffer->trace_file;
2646         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2647             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2648              !filter_match_preds(file->filter, fbuffer->entry)))
2649                 return;
2650
2651         event = &fbuffer->trace_file->event_call->event;
2652
2653         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2654         trace_seq_init(&iter->seq);
2655         iter->ent = fbuffer->entry;
2656         event_call->event.funcs->trace(iter, 0, event);
2657         trace_seq_putc(&iter->seq, 0);
2658         printk("%s", iter->seq.buffer);
2659
2660         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2661 }
2662
2663 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2664                              void __user *buffer, size_t *lenp,
2665                              loff_t *ppos)
2666 {
2667         int save_tracepoint_printk;
2668         int ret;
2669
2670         mutex_lock(&tracepoint_printk_mutex);
2671         save_tracepoint_printk = tracepoint_printk;
2672
2673         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2674
2675         /*
2676          * This will force exiting early, as tracepoint_printk
2677          * is always zero when tracepoint_printk_iter is not allocated
2678          */
2679         if (!tracepoint_print_iter)
2680                 tracepoint_printk = 0;
2681
2682         if (save_tracepoint_printk == tracepoint_printk)
2683                 goto out;
2684
2685         if (tracepoint_printk)
2686                 static_key_enable(&tracepoint_printk_key.key);
2687         else
2688                 static_key_disable(&tracepoint_printk_key.key);
2689
2690  out:
2691         mutex_unlock(&tracepoint_printk_mutex);
2692
2693         return ret;
2694 }
2695
2696 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2697 {
2698         if (static_key_false(&tracepoint_printk_key.key))
2699                 output_printk(fbuffer);
2700
2701         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2702                                     fbuffer->event, fbuffer->entry,
2703                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2704 }
2705 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2706
2707 /*
2708  * Skip 3:
2709  *
2710  *   trace_buffer_unlock_commit_regs()
2711  *   trace_event_buffer_commit()
2712  *   trace_event_raw_event_xxx()
2713  */
2714 # define STACK_SKIP 3
2715
2716 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2717                                      struct trace_buffer *buffer,
2718                                      struct ring_buffer_event *event,
2719                                      unsigned long flags, int pc,
2720                                      struct pt_regs *regs)
2721 {
2722         __buffer_unlock_commit(buffer, event);
2723
2724         /*
2725          * If regs is not set, then skip the necessary functions.
2726          * Note, we can still get here via blktrace, wakeup tracer
2727          * and mmiotrace, but that's ok if they lose a function or
2728          * two. They are not that meaningful.
2729          */
2730         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2731         ftrace_trace_userstack(buffer, flags, pc);
2732 }
2733
2734 /*
2735  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2736  */
2737 void
2738 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2739                                    struct ring_buffer_event *event)
2740 {
2741         __buffer_unlock_commit(buffer, event);
2742 }
2743
2744 static void
2745 trace_process_export(struct trace_export *export,
2746                struct ring_buffer_event *event)
2747 {
2748         struct trace_entry *entry;
2749         unsigned int size = 0;
2750
2751         entry = ring_buffer_event_data(event);
2752         size = ring_buffer_event_length(event);
2753         export->write(export, entry, size);
2754 }
2755
2756 static DEFINE_MUTEX(ftrace_export_lock);
2757
2758 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2759
2760 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2761
2762 static inline void ftrace_exports_enable(void)
2763 {
2764         static_branch_enable(&ftrace_exports_enabled);
2765 }
2766
2767 static inline void ftrace_exports_disable(void)
2768 {
2769         static_branch_disable(&ftrace_exports_enabled);
2770 }
2771
2772 static void ftrace_exports(struct ring_buffer_event *event)
2773 {
2774         struct trace_export *export;
2775
2776         preempt_disable_notrace();
2777
2778         export = rcu_dereference_raw_check(ftrace_exports_list);
2779         while (export) {
2780                 trace_process_export(export, event);
2781                 export = rcu_dereference_raw_check(export->next);
2782         }
2783
2784         preempt_enable_notrace();
2785 }
2786
2787 static inline void
2788 add_trace_export(struct trace_export **list, struct trace_export *export)
2789 {
2790         rcu_assign_pointer(export->next, *list);
2791         /*
2792          * We are entering export into the list but another
2793          * CPU might be walking that list. We need to make sure
2794          * the export->next pointer is valid before another CPU sees
2795          * the export pointer included into the list.
2796          */
2797         rcu_assign_pointer(*list, export);
2798 }
2799
2800 static inline int
2801 rm_trace_export(struct trace_export **list, struct trace_export *export)
2802 {
2803         struct trace_export **p;
2804
2805         for (p = list; *p != NULL; p = &(*p)->next)
2806                 if (*p == export)
2807                         break;
2808
2809         if (*p != export)
2810                 return -1;
2811
2812         rcu_assign_pointer(*p, (*p)->next);
2813
2814         return 0;
2815 }
2816
2817 static inline void
2818 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2819 {
2820         if (*list == NULL)
2821                 ftrace_exports_enable();
2822
2823         add_trace_export(list, export);
2824 }
2825
2826 static inline int
2827 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2828 {
2829         int ret;
2830
2831         ret = rm_trace_export(list, export);
2832         if (*list == NULL)
2833                 ftrace_exports_disable();
2834
2835         return ret;
2836 }
2837
2838 int register_ftrace_export(struct trace_export *export)
2839 {
2840         if (WARN_ON_ONCE(!export->write))
2841                 return -1;
2842
2843         mutex_lock(&ftrace_export_lock);
2844
2845         add_ftrace_export(&ftrace_exports_list, export);
2846
2847         mutex_unlock(&ftrace_export_lock);
2848
2849         return 0;
2850 }
2851 EXPORT_SYMBOL_GPL(register_ftrace_export);
2852
2853 int unregister_ftrace_export(struct trace_export *export)
2854 {
2855         int ret;
2856
2857         mutex_lock(&ftrace_export_lock);
2858
2859         ret = rm_ftrace_export(&ftrace_exports_list, export);
2860
2861         mutex_unlock(&ftrace_export_lock);
2862
2863         return ret;
2864 }
2865 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2866
2867 void
2868 trace_function(struct trace_array *tr,
2869                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2870                int pc)
2871 {
2872         struct trace_event_call *call = &event_function;
2873         struct trace_buffer *buffer = tr->array_buffer.buffer;
2874         struct ring_buffer_event *event;
2875         struct ftrace_entry *entry;
2876
2877         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2878                                             flags, pc);
2879         if (!event)
2880                 return;
2881         entry   = ring_buffer_event_data(event);
2882         entry->ip                       = ip;
2883         entry->parent_ip                = parent_ip;
2884
2885         if (!call_filter_check_discard(call, entry, buffer, event)) {
2886                 if (static_branch_unlikely(&ftrace_exports_enabled))
2887                         ftrace_exports(event);
2888                 __buffer_unlock_commit(buffer, event);
2889         }
2890 }
2891
2892 #ifdef CONFIG_STACKTRACE
2893
2894 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2895 #define FTRACE_KSTACK_NESTING   4
2896
2897 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2898
2899 struct ftrace_stack {
2900         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2901 };
2902
2903
2904 struct ftrace_stacks {
2905         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2906 };
2907
2908 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2909 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2910
2911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2912                                  unsigned long flags,
2913                                  int skip, int pc, struct pt_regs *regs)
2914 {
2915         struct trace_event_call *call = &event_kernel_stack;
2916         struct ring_buffer_event *event;
2917         unsigned int size, nr_entries;
2918         struct ftrace_stack *fstack;
2919         struct stack_entry *entry;
2920         int stackidx;
2921
2922         /*
2923          * Add one, for this function and the call to save_stack_trace()
2924          * If regs is set, then these functions will not be in the way.
2925          */
2926 #ifndef CONFIG_UNWINDER_ORC
2927         if (!regs)
2928                 skip++;
2929 #endif
2930
2931         /*
2932          * Since events can happen in NMIs there's no safe way to
2933          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2934          * or NMI comes in, it will just have to use the default
2935          * FTRACE_STACK_SIZE.
2936          */
2937         preempt_disable_notrace();
2938
2939         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2940
2941         /* This should never happen. If it does, yell once and skip */
2942         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2943                 goto out;
2944
2945         /*
2946          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2947          * interrupt will either see the value pre increment or post
2948          * increment. If the interrupt happens pre increment it will have
2949          * restored the counter when it returns.  We just need a barrier to
2950          * keep gcc from moving things around.
2951          */
2952         barrier();
2953
2954         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2955         size = ARRAY_SIZE(fstack->calls);
2956
2957         if (regs) {
2958                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2959                                                    size, skip);
2960         } else {
2961                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2962         }
2963
2964         size = nr_entries * sizeof(unsigned long);
2965         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2966                                             sizeof(*entry) + size, flags, pc);
2967         if (!event)
2968                 goto out;
2969         entry = ring_buffer_event_data(event);
2970
2971         memcpy(&entry->caller, fstack->calls, size);
2972         entry->size = nr_entries;
2973
2974         if (!call_filter_check_discard(call, entry, buffer, event))
2975                 __buffer_unlock_commit(buffer, event);
2976
2977  out:
2978         /* Again, don't let gcc optimize things here */
2979         barrier();
2980         __this_cpu_dec(ftrace_stack_reserve);
2981         preempt_enable_notrace();
2982
2983 }
2984
2985 static inline void ftrace_trace_stack(struct trace_array *tr,
2986                                       struct trace_buffer *buffer,
2987                                       unsigned long flags,
2988                                       int skip, int pc, struct pt_regs *regs)
2989 {
2990         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2991                 return;
2992
2993         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2994 }
2995
2996 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2997                    int pc)
2998 {
2999         struct trace_buffer *buffer = tr->array_buffer.buffer;
3000
3001         if (rcu_is_watching()) {
3002                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3003                 return;
3004         }
3005
3006         /*
3007          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3008          * but if the above rcu_is_watching() failed, then the NMI
3009          * triggered someplace critical, and rcu_irq_enter() should
3010          * not be called from NMI.
3011          */
3012         if (unlikely(in_nmi()))
3013                 return;
3014
3015         rcu_irq_enter_irqson();
3016         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3017         rcu_irq_exit_irqson();
3018 }
3019
3020 /**
3021  * trace_dump_stack - record a stack back trace in the trace buffer
3022  * @skip: Number of functions to skip (helper handlers)
3023  */
3024 void trace_dump_stack(int skip)
3025 {
3026         unsigned long flags;
3027
3028         if (tracing_disabled || tracing_selftest_running)
3029                 return;
3030
3031         local_save_flags(flags);
3032
3033 #ifndef CONFIG_UNWINDER_ORC
3034         /* Skip 1 to skip this function. */
3035         skip++;
3036 #endif
3037         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3038                              flags, skip, preempt_count(), NULL);
3039 }
3040 EXPORT_SYMBOL_GPL(trace_dump_stack);
3041
3042 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3043 static DEFINE_PER_CPU(int, user_stack_count);
3044
3045 static void
3046 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3047 {
3048         struct trace_event_call *call = &event_user_stack;
3049         struct ring_buffer_event *event;
3050         struct userstack_entry *entry;
3051
3052         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3053                 return;
3054
3055         /*
3056          * NMIs can not handle page faults, even with fix ups.
3057          * The save user stack can (and often does) fault.
3058          */
3059         if (unlikely(in_nmi()))
3060                 return;
3061
3062         /*
3063          * prevent recursion, since the user stack tracing may
3064          * trigger other kernel events.
3065          */
3066         preempt_disable();
3067         if (__this_cpu_read(user_stack_count))
3068                 goto out;
3069
3070         __this_cpu_inc(user_stack_count);
3071
3072         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3073                                             sizeof(*entry), flags, pc);
3074         if (!event)
3075                 goto out_drop_count;
3076         entry   = ring_buffer_event_data(event);
3077
3078         entry->tgid             = current->tgid;
3079         memset(&entry->caller, 0, sizeof(entry->caller));
3080
3081         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3082         if (!call_filter_check_discard(call, entry, buffer, event))
3083                 __buffer_unlock_commit(buffer, event);
3084
3085  out_drop_count:
3086         __this_cpu_dec(user_stack_count);
3087  out:
3088         preempt_enable();
3089 }
3090 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3091 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3092                                    unsigned long flags, int pc)
3093 {
3094 }
3095 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3096
3097 #endif /* CONFIG_STACKTRACE */
3098
3099 /* created for use with alloc_percpu */
3100 struct trace_buffer_struct {
3101         int nesting;
3102         char buffer[4][TRACE_BUF_SIZE];
3103 };
3104
3105 static struct trace_buffer_struct *trace_percpu_buffer;
3106
3107 /*
3108  * Thise allows for lockless recording.  If we're nested too deeply, then
3109  * this returns NULL.
3110  */
3111 static char *get_trace_buf(void)
3112 {
3113         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3114
3115         if (!buffer || buffer->nesting >= 4)
3116                 return NULL;
3117
3118         buffer->nesting++;
3119
3120         /* Interrupts must see nesting incremented before we use the buffer */
3121         barrier();
3122         return &buffer->buffer[buffer->nesting][0];
3123 }
3124
3125 static void put_trace_buf(void)
3126 {
3127         /* Don't let the decrement of nesting leak before this */
3128         barrier();
3129         this_cpu_dec(trace_percpu_buffer->nesting);
3130 }
3131
3132 static int alloc_percpu_trace_buffer(void)
3133 {
3134         struct trace_buffer_struct *buffers;
3135
3136         buffers = alloc_percpu(struct trace_buffer_struct);
3137         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3138                 return -ENOMEM;
3139
3140         trace_percpu_buffer = buffers;
3141         return 0;
3142 }
3143
3144 static int buffers_allocated;
3145
3146 void trace_printk_init_buffers(void)
3147 {
3148         if (buffers_allocated)
3149                 return;
3150
3151         if (alloc_percpu_trace_buffer())
3152                 return;
3153
3154         /* trace_printk() is for debug use only. Don't use it in production. */
3155
3156         pr_warn("\n");
3157         pr_warn("**********************************************************\n");
3158         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3159         pr_warn("**                                                      **\n");
3160         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3161         pr_warn("**                                                      **\n");
3162         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3163         pr_warn("** unsafe for production use.                           **\n");
3164         pr_warn("**                                                      **\n");
3165         pr_warn("** If you see this message and you are not debugging    **\n");
3166         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3167         pr_warn("**                                                      **\n");
3168         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3169         pr_warn("**********************************************************\n");
3170
3171         /* Expand the buffers to set size */
3172         tracing_update_buffers();
3173
3174         buffers_allocated = 1;
3175
3176         /*
3177          * trace_printk_init_buffers() can be called by modules.
3178          * If that happens, then we need to start cmdline recording
3179          * directly here. If the global_trace.buffer is already
3180          * allocated here, then this was called by module code.
3181          */
3182         if (global_trace.array_buffer.buffer)
3183                 tracing_start_cmdline_record();
3184 }
3185 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3186
3187 void trace_printk_start_comm(void)
3188 {
3189         /* Start tracing comms if trace printk is set */
3190         if (!buffers_allocated)
3191                 return;
3192         tracing_start_cmdline_record();
3193 }
3194
3195 static void trace_printk_start_stop_comm(int enabled)
3196 {
3197         if (!buffers_allocated)
3198                 return;
3199
3200         if (enabled)
3201                 tracing_start_cmdline_record();
3202         else
3203                 tracing_stop_cmdline_record();
3204 }
3205
3206 /**
3207  * trace_vbprintk - write binary msg to tracing buffer
3208  * @ip:    The address of the caller
3209  * @fmt:   The string format to write to the buffer
3210  * @args:  Arguments for @fmt
3211  */
3212 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3213 {
3214         struct trace_event_call *call = &event_bprint;
3215         struct ring_buffer_event *event;
3216         struct trace_buffer *buffer;
3217         struct trace_array *tr = &global_trace;
3218         struct bprint_entry *entry;
3219         unsigned long flags;
3220         char *tbuffer;
3221         int len = 0, size, pc;
3222
3223         if (unlikely(tracing_selftest_running || tracing_disabled))
3224                 return 0;
3225
3226         /* Don't pollute graph traces with trace_vprintk internals */
3227         pause_graph_tracing();
3228
3229         pc = preempt_count();
3230         preempt_disable_notrace();
3231
3232         tbuffer = get_trace_buf();
3233         if (!tbuffer) {
3234                 len = 0;
3235                 goto out_nobuffer;
3236         }
3237
3238         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3239
3240         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3241                 goto out_put;
3242
3243         local_save_flags(flags);
3244         size = sizeof(*entry) + sizeof(u32) * len;
3245         buffer = tr->array_buffer.buffer;
3246         ring_buffer_nest_start(buffer);
3247         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3248                                             flags, pc);
3249         if (!event)
3250                 goto out;
3251         entry = ring_buffer_event_data(event);
3252         entry->ip                       = ip;
3253         entry->fmt                      = fmt;
3254
3255         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3256         if (!call_filter_check_discard(call, entry, buffer, event)) {
3257                 __buffer_unlock_commit(buffer, event);
3258                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3259         }
3260
3261 out:
3262         ring_buffer_nest_end(buffer);
3263 out_put:
3264         put_trace_buf();
3265
3266 out_nobuffer:
3267         preempt_enable_notrace();
3268         unpause_graph_tracing();
3269
3270         return len;
3271 }
3272 EXPORT_SYMBOL_GPL(trace_vbprintk);
3273
3274 __printf(3, 0)
3275 static int
3276 __trace_array_vprintk(struct trace_buffer *buffer,
3277                       unsigned long ip, const char *fmt, va_list args)
3278 {
3279         struct trace_event_call *call = &event_print;
3280         struct ring_buffer_event *event;
3281         int len = 0, size, pc;
3282         struct print_entry *entry;
3283         unsigned long flags;
3284         char *tbuffer;
3285
3286         if (tracing_disabled || tracing_selftest_running)
3287                 return 0;
3288
3289         /* Don't pollute graph traces with trace_vprintk internals */
3290         pause_graph_tracing();
3291
3292         pc = preempt_count();
3293         preempt_disable_notrace();
3294
3295
3296         tbuffer = get_trace_buf();
3297         if (!tbuffer) {
3298                 len = 0;
3299                 goto out_nobuffer;
3300         }
3301
3302         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3303
3304         local_save_flags(flags);
3305         size = sizeof(*entry) + len + 1;
3306         ring_buffer_nest_start(buffer);
3307         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3308                                             flags, pc);
3309         if (!event)
3310                 goto out;
3311         entry = ring_buffer_event_data(event);
3312         entry->ip = ip;
3313
3314         memcpy(&entry->buf, tbuffer, len + 1);
3315         if (!call_filter_check_discard(call, entry, buffer, event)) {
3316                 __buffer_unlock_commit(buffer, event);
3317                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3318         }
3319
3320 out:
3321         ring_buffer_nest_end(buffer);
3322         put_trace_buf();
3323
3324 out_nobuffer:
3325         preempt_enable_notrace();
3326         unpause_graph_tracing();
3327
3328         return len;
3329 }
3330
3331 __printf(3, 0)
3332 int trace_array_vprintk(struct trace_array *tr,
3333                         unsigned long ip, const char *fmt, va_list args)
3334 {
3335         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3336 }
3337
3338 __printf(3, 0)
3339 int trace_array_printk(struct trace_array *tr,
3340                        unsigned long ip, const char *fmt, ...)
3341 {
3342         int ret;
3343         va_list ap;
3344
3345         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3346                 return 0;
3347
3348         if (!tr)
3349                 return -ENOENT;
3350
3351         va_start(ap, fmt);
3352         ret = trace_array_vprintk(tr, ip, fmt, ap);
3353         va_end(ap);
3354         return ret;
3355 }
3356 EXPORT_SYMBOL_GPL(trace_array_printk);
3357
3358 __printf(3, 4)
3359 int trace_array_printk_buf(struct trace_buffer *buffer,
3360                            unsigned long ip, const char *fmt, ...)
3361 {
3362         int ret;
3363         va_list ap;
3364
3365         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3366                 return 0;
3367
3368         va_start(ap, fmt);
3369         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3370         va_end(ap);
3371         return ret;
3372 }
3373
3374 __printf(2, 0)
3375 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3376 {
3377         return trace_array_vprintk(&global_trace, ip, fmt, args);
3378 }
3379 EXPORT_SYMBOL_GPL(trace_vprintk);
3380
3381 static void trace_iterator_increment(struct trace_iterator *iter)
3382 {
3383         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3384
3385         iter->idx++;
3386         if (buf_iter)
3387                 ring_buffer_iter_advance(buf_iter);
3388 }
3389
3390 static struct trace_entry *
3391 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3392                 unsigned long *lost_events)
3393 {
3394         struct ring_buffer_event *event;
3395         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3396
3397         if (buf_iter) {
3398                 event = ring_buffer_iter_peek(buf_iter, ts);
3399                 if (lost_events)
3400                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3401                                 (unsigned long)-1 : 0;
3402         } else {
3403                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3404                                          lost_events);
3405         }
3406
3407         if (event) {
3408                 iter->ent_size = ring_buffer_event_length(event);
3409                 return ring_buffer_event_data(event);
3410         }
3411         iter->ent_size = 0;
3412         return NULL;
3413 }
3414
3415 static struct trace_entry *
3416 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3417                   unsigned long *missing_events, u64 *ent_ts)
3418 {
3419         struct trace_buffer *buffer = iter->array_buffer->buffer;
3420         struct trace_entry *ent, *next = NULL;
3421         unsigned long lost_events = 0, next_lost = 0;
3422         int cpu_file = iter->cpu_file;
3423         u64 next_ts = 0, ts;
3424         int next_cpu = -1;
3425         int next_size = 0;
3426         int cpu;
3427
3428         /*
3429          * If we are in a per_cpu trace file, don't bother by iterating over
3430          * all cpu and peek directly.
3431          */
3432         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3433                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3434                         return NULL;
3435                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3436                 if (ent_cpu)
3437                         *ent_cpu = cpu_file;
3438
3439                 return ent;
3440         }
3441
3442         for_each_tracing_cpu(cpu) {
3443
3444                 if (ring_buffer_empty_cpu(buffer, cpu))
3445                         continue;
3446
3447                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3448
3449                 /*
3450                  * Pick the entry with the smallest timestamp:
3451                  */
3452                 if (ent && (!next || ts < next_ts)) {
3453                         next = ent;
3454                         next_cpu = cpu;
3455                         next_ts = ts;
3456                         next_lost = lost_events;
3457                         next_size = iter->ent_size;
3458                 }
3459         }
3460
3461         iter->ent_size = next_size;
3462
3463         if (ent_cpu)
3464                 *ent_cpu = next_cpu;
3465
3466         if (ent_ts)
3467                 *ent_ts = next_ts;
3468
3469         if (missing_events)
3470                 *missing_events = next_lost;
3471
3472         return next;
3473 }
3474
3475 #define STATIC_TEMP_BUF_SIZE    128
3476 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3477
3478 /* Find the next real entry, without updating the iterator itself */
3479 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3480                                           int *ent_cpu, u64 *ent_ts)
3481 {
3482         /* __find_next_entry will reset ent_size */
3483         int ent_size = iter->ent_size;
3484         struct trace_entry *entry;
3485
3486         /*
3487          * If called from ftrace_dump(), then the iter->temp buffer
3488          * will be the static_temp_buf and not created from kmalloc.
3489          * If the entry size is greater than the buffer, we can
3490          * not save it. Just return NULL in that case. This is only
3491          * used to add markers when two consecutive events' time
3492          * stamps have a large delta. See trace_print_lat_context()
3493          */
3494         if (iter->temp == static_temp_buf &&
3495             STATIC_TEMP_BUF_SIZE < ent_size)
3496                 return NULL;
3497
3498         /*
3499          * The __find_next_entry() may call peek_next_entry(), which may
3500          * call ring_buffer_peek() that may make the contents of iter->ent
3501          * undefined. Need to copy iter->ent now.
3502          */
3503         if (iter->ent && iter->ent != iter->temp) {
3504                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3505                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3506                         kfree(iter->temp);
3507                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3508                         if (!iter->temp)
3509                                 return NULL;
3510                 }
3511                 memcpy(iter->temp, iter->ent, iter->ent_size);
3512                 iter->temp_size = iter->ent_size;
3513                 iter->ent = iter->temp;
3514         }
3515         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3516         /* Put back the original ent_size */
3517         iter->ent_size = ent_size;
3518
3519         return entry;
3520 }
3521
3522 /* Find the next real entry, and increment the iterator to the next entry */
3523 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3524 {
3525         iter->ent = __find_next_entry(iter, &iter->cpu,
3526                                       &iter->lost_events, &iter->ts);
3527
3528         if (iter->ent)
3529                 trace_iterator_increment(iter);
3530
3531         return iter->ent ? iter : NULL;
3532 }
3533
3534 static void trace_consume(struct trace_iterator *iter)
3535 {
3536         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3537                             &iter->lost_events);
3538 }
3539
3540 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3541 {
3542         struct trace_iterator *iter = m->private;
3543         int i = (int)*pos;
3544         void *ent;
3545
3546         WARN_ON_ONCE(iter->leftover);
3547
3548         (*pos)++;
3549
3550         /* can't go backwards */
3551         if (iter->idx > i)
3552                 return NULL;
3553
3554         if (iter->idx < 0)
3555                 ent = trace_find_next_entry_inc(iter);
3556         else
3557                 ent = iter;
3558
3559         while (ent && iter->idx < i)
3560                 ent = trace_find_next_entry_inc(iter);
3561
3562         iter->pos = *pos;
3563
3564         return ent;
3565 }
3566
3567 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3568 {
3569         struct ring_buffer_event *event;
3570         struct ring_buffer_iter *buf_iter;
3571         unsigned long entries = 0;
3572         u64 ts;
3573
3574         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3575
3576         buf_iter = trace_buffer_iter(iter, cpu);
3577         if (!buf_iter)
3578                 return;
3579
3580         ring_buffer_iter_reset(buf_iter);
3581
3582         /*
3583          * We could have the case with the max latency tracers
3584          * that a reset never took place on a cpu. This is evident
3585          * by the timestamp being before the start of the buffer.
3586          */
3587         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3588                 if (ts >= iter->array_buffer->time_start)
3589                         break;
3590                 entries++;
3591                 ring_buffer_iter_advance(buf_iter);
3592         }
3593
3594         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3595 }
3596
3597 /*
3598  * The current tracer is copied to avoid a global locking
3599  * all around.
3600  */
3601 static void *s_start(struct seq_file *m, loff_t *pos)
3602 {
3603         struct trace_iterator *iter = m->private;
3604         struct trace_array *tr = iter->tr;
3605         int cpu_file = iter->cpu_file;
3606         void *p = NULL;
3607         loff_t l = 0;
3608         int cpu;
3609
3610         /*
3611          * copy the tracer to avoid using a global lock all around.
3612          * iter->trace is a copy of current_trace, the pointer to the
3613          * name may be used instead of a strcmp(), as iter->trace->name
3614          * will point to the same string as current_trace->name.
3615          */
3616         mutex_lock(&trace_types_lock);
3617         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3618                 *iter->trace = *tr->current_trace;
3619         mutex_unlock(&trace_types_lock);
3620
3621 #ifdef CONFIG_TRACER_MAX_TRACE
3622         if (iter->snapshot && iter->trace->use_max_tr)
3623                 return ERR_PTR(-EBUSY);
3624 #endif
3625
3626         if (!iter->snapshot)
3627                 atomic_inc(&trace_record_taskinfo_disabled);
3628
3629         if (*pos != iter->pos) {
3630                 iter->ent = NULL;
3631                 iter->cpu = 0;
3632                 iter->idx = -1;
3633
3634                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3635                         for_each_tracing_cpu(cpu)
3636                                 tracing_iter_reset(iter, cpu);
3637                 } else
3638                         tracing_iter_reset(iter, cpu_file);
3639
3640                 iter->leftover = 0;
3641                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3642                         ;
3643
3644         } else {
3645                 /*
3646                  * If we overflowed the seq_file before, then we want
3647                  * to just reuse the trace_seq buffer again.
3648                  */
3649                 if (iter->leftover)
3650                         p = iter;
3651                 else {
3652                         l = *pos - 1;
3653                         p = s_next(m, p, &l);
3654                 }
3655         }
3656
3657         trace_event_read_lock();
3658         trace_access_lock(cpu_file);
3659         return p;
3660 }
3661
3662 static void s_stop(struct seq_file *m, void *p)
3663 {
3664         struct trace_iterator *iter = m->private;
3665
3666 #ifdef CONFIG_TRACER_MAX_TRACE
3667         if (iter->snapshot && iter->trace->use_max_tr)
3668                 return;
3669 #endif
3670
3671         if (!iter->snapshot)
3672                 atomic_dec(&trace_record_taskinfo_disabled);
3673
3674         trace_access_unlock(iter->cpu_file);
3675         trace_event_read_unlock();
3676 }
3677
3678 static void
3679 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3680                       unsigned long *entries, int cpu)
3681 {
3682         unsigned long count;
3683
3684         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3685         /*
3686          * If this buffer has skipped entries, then we hold all
3687          * entries for the trace and we need to ignore the
3688          * ones before the time stamp.
3689          */
3690         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3691                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3692                 /* total is the same as the entries */
3693                 *total = count;
3694         } else
3695                 *total = count +
3696                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3697         *entries = count;
3698 }
3699
3700 static void
3701 get_total_entries(struct array_buffer *buf,
3702                   unsigned long *total, unsigned long *entries)
3703 {
3704         unsigned long t, e;
3705         int cpu;
3706
3707         *total = 0;
3708         *entries = 0;
3709
3710         for_each_tracing_cpu(cpu) {
3711                 get_total_entries_cpu(buf, &t, &e, cpu);
3712                 *total += t;
3713                 *entries += e;
3714         }
3715 }
3716
3717 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3718 {
3719         unsigned long total, entries;
3720
3721         if (!tr)
3722                 tr = &global_trace;
3723
3724         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3725
3726         return entries;
3727 }
3728
3729 unsigned long trace_total_entries(struct trace_array *tr)
3730 {
3731         unsigned long total, entries;
3732
3733         if (!tr)
3734                 tr = &global_trace;
3735
3736         get_total_entries(&tr->array_buffer, &total, &entries);
3737
3738         return entries;
3739 }
3740
3741 static void print_lat_help_header(struct seq_file *m)
3742 {
3743         seq_puts(m, "#                  _------=> CPU#            \n"
3744                     "#                 / _-----=> irqs-off        \n"
3745                     "#                | / _----=> need-resched    \n"
3746                     "#                || / _---=> hardirq/softirq \n"
3747                     "#                ||| / _--=> preempt-depth   \n"
3748                     "#                |||| /     delay            \n"
3749                     "#  cmd     pid   ||||| time  |   caller      \n"
3750                     "#     \\   /      |||||  \\    |   /         \n");
3751 }
3752
3753 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3754 {
3755         unsigned long total;
3756         unsigned long entries;
3757
3758         get_total_entries(buf, &total, &entries);
3759         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3760                    entries, total, num_online_cpus());
3761         seq_puts(m, "#\n");
3762 }
3763
3764 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3765                                    unsigned int flags)
3766 {
3767         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3768
3769         print_event_info(buf, m);
3770
3771         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3772         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3773 }
3774
3775 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3776                                        unsigned int flags)
3777 {
3778         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3779         const char *space = "          ";
3780         int prec = tgid ? 10 : 2;
3781
3782         print_event_info(buf, m);
3783
3784         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3785         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3786         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3787         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3788         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3789         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3790         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3791 }
3792
3793 void
3794 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3795 {
3796         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3797         struct array_buffer *buf = iter->array_buffer;
3798         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3799         struct tracer *type = iter->trace;
3800         unsigned long entries;
3801         unsigned long total;
3802         const char *name = "preemption";
3803
3804         name = type->name;
3805
3806         get_total_entries(buf, &total, &entries);
3807
3808         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3809                    name, UTS_RELEASE);
3810         seq_puts(m, "# -----------------------------------"
3811                  "---------------------------------\n");
3812         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3813                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3814                    nsecs_to_usecs(data->saved_latency),
3815                    entries,
3816                    total,
3817                    buf->cpu,
3818 #if defined(CONFIG_PREEMPT_NONE)
3819                    "server",
3820 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3821                    "desktop",
3822 #elif defined(CONFIG_PREEMPT)
3823                    "preempt",
3824 #elif defined(CONFIG_PREEMPT_RT)
3825                    "preempt_rt",
3826 #else
3827                    "unknown",
3828 #endif
3829                    /* These are reserved for later use */
3830                    0, 0, 0, 0);
3831 #ifdef CONFIG_SMP
3832         seq_printf(m, " #P:%d)\n", num_online_cpus());
3833 #else
3834         seq_puts(m, ")\n");
3835 #endif
3836         seq_puts(m, "#    -----------------\n");
3837         seq_printf(m, "#    | task: %.16s-%d "
3838                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3839                    data->comm, data->pid,
3840                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3841                    data->policy, data->rt_priority);
3842         seq_puts(m, "#    -----------------\n");
3843
3844         if (data->critical_start) {
3845                 seq_puts(m, "#  => started at: ");
3846                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3847                 trace_print_seq(m, &iter->seq);
3848                 seq_puts(m, "\n#  => ended at:   ");
3849                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3850                 trace_print_seq(m, &iter->seq);
3851                 seq_puts(m, "\n#\n");
3852         }
3853
3854         seq_puts(m, "#\n");
3855 }
3856
3857 static void test_cpu_buff_start(struct trace_iterator *iter)
3858 {
3859         struct trace_seq *s = &iter->seq;
3860         struct trace_array *tr = iter->tr;
3861
3862         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3863                 return;
3864
3865         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3866                 return;
3867
3868         if (cpumask_available(iter->started) &&
3869             cpumask_test_cpu(iter->cpu, iter->started))
3870                 return;
3871
3872         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3873                 return;
3874
3875         if (cpumask_available(iter->started))
3876                 cpumask_set_cpu(iter->cpu, iter->started);
3877
3878         /* Don't print started cpu buffer for the first entry of the trace */
3879         if (iter->idx > 1)
3880                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3881                                 iter->cpu);
3882 }
3883
3884 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3885 {
3886         struct trace_array *tr = iter->tr;
3887         struct trace_seq *s = &iter->seq;
3888         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3889         struct trace_entry *entry;
3890         struct trace_event *event;
3891
3892         entry = iter->ent;
3893
3894         test_cpu_buff_start(iter);
3895
3896         event = ftrace_find_event(entry->type);
3897
3898         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3899                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3900                         trace_print_lat_context(iter);
3901                 else
3902                         trace_print_context(iter);
3903         }
3904
3905         if (trace_seq_has_overflowed(s))
3906                 return TRACE_TYPE_PARTIAL_LINE;
3907
3908         if (event)
3909                 return event->funcs->trace(iter, sym_flags, event);
3910
3911         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3912
3913         return trace_handle_return(s);
3914 }
3915
3916 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3917 {
3918         struct trace_array *tr = iter->tr;
3919         struct trace_seq *s = &iter->seq;
3920         struct trace_entry *entry;
3921         struct trace_event *event;
3922
3923         entry = iter->ent;
3924
3925         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3926                 trace_seq_printf(s, "%d %d %llu ",
3927                                  entry->pid, iter->cpu, iter->ts);
3928
3929         if (trace_seq_has_overflowed(s))
3930                 return TRACE_TYPE_PARTIAL_LINE;
3931
3932         event = ftrace_find_event(entry->type);
3933         if (event)
3934                 return event->funcs->raw(iter, 0, event);
3935
3936         trace_seq_printf(s, "%d ?\n", entry->type);
3937
3938         return trace_handle_return(s);
3939 }
3940
3941 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3942 {
3943         struct trace_array *tr = iter->tr;
3944         struct trace_seq *s = &iter->seq;
3945         unsigned char newline = '\n';
3946         struct trace_entry *entry;
3947         struct trace_event *event;
3948
3949         entry = iter->ent;
3950
3951         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3952                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3953                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3954                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3955                 if (trace_seq_has_overflowed(s))
3956                         return TRACE_TYPE_PARTIAL_LINE;
3957         }
3958
3959         event = ftrace_find_event(entry->type);
3960         if (event) {
3961                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3962                 if (ret != TRACE_TYPE_HANDLED)
3963                         return ret;
3964         }
3965
3966         SEQ_PUT_FIELD(s, newline);
3967
3968         return trace_handle_return(s);
3969 }
3970
3971 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3972 {
3973         struct trace_array *tr = iter->tr;
3974         struct trace_seq *s = &iter->seq;
3975         struct trace_entry *entry;
3976         struct trace_event *event;
3977
3978         entry = iter->ent;
3979
3980         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3981                 SEQ_PUT_FIELD(s, entry->pid);
3982                 SEQ_PUT_FIELD(s, iter->cpu);
3983                 SEQ_PUT_FIELD(s, iter->ts);
3984                 if (trace_seq_has_overflowed(s))
3985                         return TRACE_TYPE_PARTIAL_LINE;
3986         }
3987
3988         event = ftrace_find_event(entry->type);
3989         return event ? event->funcs->binary(iter, 0, event) :
3990                 TRACE_TYPE_HANDLED;
3991 }
3992
3993 int trace_empty(struct trace_iterator *iter)
3994 {
3995         struct ring_buffer_iter *buf_iter;
3996         int cpu;
3997
3998         /* If we are looking at one CPU buffer, only check that one */
3999         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4000                 cpu = iter->cpu_file;
4001                 buf_iter = trace_buffer_iter(iter, cpu);
4002                 if (buf_iter) {
4003                         if (!ring_buffer_iter_empty(buf_iter))
4004                                 return 0;
4005                 } else {
4006                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4007                                 return 0;
4008                 }
4009                 return 1;
4010         }
4011
4012         for_each_tracing_cpu(cpu) {
4013                 buf_iter = trace_buffer_iter(iter, cpu);
4014                 if (buf_iter) {
4015                         if (!ring_buffer_iter_empty(buf_iter))
4016                                 return 0;
4017                 } else {
4018                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4019                                 return 0;
4020                 }
4021         }
4022
4023         return 1;
4024 }
4025
4026 /*  Called with trace_event_read_lock() held. */
4027 enum print_line_t print_trace_line(struct trace_iterator *iter)
4028 {
4029         struct trace_array *tr = iter->tr;
4030         unsigned long trace_flags = tr->trace_flags;
4031         enum print_line_t ret;
4032
4033         if (iter->lost_events) {
4034                 if (iter->lost_events == (unsigned long)-1)
4035                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4036                                          iter->cpu);
4037                 else
4038                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4039                                          iter->cpu, iter->lost_events);
4040                 if (trace_seq_has_overflowed(&iter->seq))
4041                         return TRACE_TYPE_PARTIAL_LINE;
4042         }
4043
4044         if (iter->trace && iter->trace->print_line) {
4045                 ret = iter->trace->print_line(iter);
4046                 if (ret != TRACE_TYPE_UNHANDLED)
4047                         return ret;
4048         }
4049
4050         if (iter->ent->type == TRACE_BPUTS &&
4051                         trace_flags & TRACE_ITER_PRINTK &&
4052                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4053                 return trace_print_bputs_msg_only(iter);
4054
4055         if (iter->ent->type == TRACE_BPRINT &&
4056                         trace_flags & TRACE_ITER_PRINTK &&
4057                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4058                 return trace_print_bprintk_msg_only(iter);
4059
4060         if (iter->ent->type == TRACE_PRINT &&
4061                         trace_flags & TRACE_ITER_PRINTK &&
4062                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4063                 return trace_print_printk_msg_only(iter);
4064
4065         if (trace_flags & TRACE_ITER_BIN)
4066                 return print_bin_fmt(iter);
4067
4068         if (trace_flags & TRACE_ITER_HEX)
4069                 return print_hex_fmt(iter);
4070
4071         if (trace_flags & TRACE_ITER_RAW)
4072                 return print_raw_fmt(iter);
4073
4074         return print_trace_fmt(iter);
4075 }
4076
4077 void trace_latency_header(struct seq_file *m)
4078 {
4079         struct trace_iterator *iter = m->private;
4080         struct trace_array *tr = iter->tr;
4081
4082         /* print nothing if the buffers are empty */
4083         if (trace_empty(iter))
4084                 return;
4085
4086         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4087                 print_trace_header(m, iter);
4088
4089         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4090                 print_lat_help_header(m);
4091 }
4092
4093 void trace_default_header(struct seq_file *m)
4094 {
4095         struct trace_iterator *iter = m->private;
4096         struct trace_array *tr = iter->tr;
4097         unsigned long trace_flags = tr->trace_flags;
4098
4099         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4100                 return;
4101
4102         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4103                 /* print nothing if the buffers are empty */
4104                 if (trace_empty(iter))
4105                         return;
4106                 print_trace_header(m, iter);
4107                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4108                         print_lat_help_header(m);
4109         } else {
4110                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4111                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4112                                 print_func_help_header_irq(iter->array_buffer,
4113                                                            m, trace_flags);
4114                         else
4115                                 print_func_help_header(iter->array_buffer, m,
4116                                                        trace_flags);
4117                 }
4118         }
4119 }
4120
4121 static void test_ftrace_alive(struct seq_file *m)
4122 {
4123         if (!ftrace_is_dead())
4124                 return;
4125         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4126                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4127 }
4128
4129 #ifdef CONFIG_TRACER_MAX_TRACE
4130 static void show_snapshot_main_help(struct seq_file *m)
4131 {
4132         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4133                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4134                     "#                      Takes a snapshot of the main buffer.\n"
4135                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4136                     "#                      (Doesn't have to be '2' works with any number that\n"
4137                     "#                       is not a '0' or '1')\n");
4138 }
4139
4140 static void show_snapshot_percpu_help(struct seq_file *m)
4141 {
4142         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4143 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4144         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4145                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4146 #else
4147         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4148                     "#                     Must use main snapshot file to allocate.\n");
4149 #endif
4150         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4151                     "#                      (Doesn't have to be '2' works with any number that\n"
4152                     "#                       is not a '0' or '1')\n");
4153 }
4154
4155 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4156 {
4157         if (iter->tr->allocated_snapshot)
4158                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4159         else
4160                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4161
4162         seq_puts(m, "# Snapshot commands:\n");
4163         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4164                 show_snapshot_main_help(m);
4165         else
4166                 show_snapshot_percpu_help(m);
4167 }
4168 #else
4169 /* Should never be called */
4170 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4171 #endif
4172
4173 static int s_show(struct seq_file *m, void *v)
4174 {
4175         struct trace_iterator *iter = v;
4176         int ret;
4177
4178         if (iter->ent == NULL) {
4179                 if (iter->tr) {
4180                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4181                         seq_puts(m, "#\n");
4182                         test_ftrace_alive(m);
4183                 }
4184                 if (iter->snapshot && trace_empty(iter))
4185                         print_snapshot_help(m, iter);
4186                 else if (iter->trace && iter->trace->print_header)
4187                         iter->trace->print_header(m);
4188                 else
4189                         trace_default_header(m);
4190
4191         } else if (iter->leftover) {
4192                 /*
4193                  * If we filled the seq_file buffer earlier, we
4194                  * want to just show it now.
4195                  */
4196                 ret = trace_print_seq(m, &iter->seq);
4197
4198                 /* ret should this time be zero, but you never know */
4199                 iter->leftover = ret;
4200
4201         } else {
4202                 print_trace_line(iter);
4203                 ret = trace_print_seq(m, &iter->seq);
4204                 /*
4205                  * If we overflow the seq_file buffer, then it will
4206                  * ask us for this data again at start up.
4207                  * Use that instead.
4208                  *  ret is 0 if seq_file write succeeded.
4209                  *        -1 otherwise.
4210                  */
4211                 iter->leftover = ret;
4212         }
4213
4214         return 0;
4215 }
4216
4217 /*
4218  * Should be used after trace_array_get(), trace_types_lock
4219  * ensures that i_cdev was already initialized.
4220  */
4221 static inline int tracing_get_cpu(struct inode *inode)
4222 {
4223         if (inode->i_cdev) /* See trace_create_cpu_file() */
4224                 return (long)inode->i_cdev - 1;
4225         return RING_BUFFER_ALL_CPUS;
4226 }
4227
4228 static const struct seq_operations tracer_seq_ops = {
4229         .start          = s_start,
4230         .next           = s_next,
4231         .stop           = s_stop,
4232         .show           = s_show,
4233 };
4234
4235 static struct trace_iterator *
4236 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4237 {
4238         struct trace_array *tr = inode->i_private;
4239         struct trace_iterator *iter;
4240         int cpu;
4241
4242         if (tracing_disabled)
4243                 return ERR_PTR(-ENODEV);
4244
4245         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4246         if (!iter)
4247                 return ERR_PTR(-ENOMEM);
4248
4249         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4250                                     GFP_KERNEL);
4251         if (!iter->buffer_iter)
4252                 goto release;
4253
4254         /*
4255          * trace_find_next_entry() may need to save off iter->ent.
4256          * It will place it into the iter->temp buffer. As most
4257          * events are less than 128, allocate a buffer of that size.
4258          * If one is greater, then trace_find_next_entry() will
4259          * allocate a new buffer to adjust for the bigger iter->ent.
4260          * It's not critical if it fails to get allocated here.
4261          */
4262         iter->temp = kmalloc(128, GFP_KERNEL);
4263         if (iter->temp)
4264                 iter->temp_size = 128;
4265
4266         /*
4267          * We make a copy of the current tracer to avoid concurrent
4268          * changes on it while we are reading.
4269          */
4270         mutex_lock(&trace_types_lock);
4271         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4272         if (!iter->trace)
4273                 goto fail;
4274
4275         *iter->trace = *tr->current_trace;
4276
4277         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4278                 goto fail;
4279
4280         iter->tr = tr;
4281
4282 #ifdef CONFIG_TRACER_MAX_TRACE
4283         /* Currently only the top directory has a snapshot */
4284         if (tr->current_trace->print_max || snapshot)
4285                 iter->array_buffer = &tr->max_buffer;
4286         else
4287 #endif
4288                 iter->array_buffer = &tr->array_buffer;
4289         iter->snapshot = snapshot;
4290         iter->pos = -1;
4291         iter->cpu_file = tracing_get_cpu(inode);
4292         mutex_init(&iter->mutex);
4293
4294         /* Notify the tracer early; before we stop tracing. */
4295         if (iter->trace->open)
4296                 iter->trace->open(iter);
4297
4298         /* Annotate start of buffers if we had overruns */
4299         if (ring_buffer_overruns(iter->array_buffer->buffer))
4300                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4301
4302         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4303         if (trace_clocks[tr->clock_id].in_ns)
4304                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4305
4306         /*
4307          * If pause-on-trace is enabled, then stop the trace while
4308          * dumping, unless this is the "snapshot" file
4309          */
4310         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4311                 tracing_stop_tr(tr);
4312
4313         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4314                 for_each_tracing_cpu(cpu) {
4315                         iter->buffer_iter[cpu] =
4316                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4317                                                          cpu, GFP_KERNEL);
4318                 }
4319                 ring_buffer_read_prepare_sync();
4320                 for_each_tracing_cpu(cpu) {
4321                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4322                         tracing_iter_reset(iter, cpu);
4323                 }
4324         } else {
4325                 cpu = iter->cpu_file;
4326                 iter->buffer_iter[cpu] =
4327                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4328                                                  cpu, GFP_KERNEL);
4329                 ring_buffer_read_prepare_sync();
4330                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4331                 tracing_iter_reset(iter, cpu);
4332         }
4333
4334         mutex_unlock(&trace_types_lock);
4335
4336         return iter;
4337
4338  fail:
4339         mutex_unlock(&trace_types_lock);
4340         kfree(iter->trace);
4341         kfree(iter->temp);
4342         kfree(iter->buffer_iter);
4343 release:
4344         seq_release_private(inode, file);
4345         return ERR_PTR(-ENOMEM);
4346 }
4347
4348 int tracing_open_generic(struct inode *inode, struct file *filp)
4349 {
4350         int ret;
4351
4352         ret = tracing_check_open_get_tr(NULL);
4353         if (ret)
4354                 return ret;
4355
4356         filp->private_data = inode->i_private;
4357         return 0;
4358 }
4359
4360 bool tracing_is_disabled(void)
4361 {
4362         return (tracing_disabled) ? true: false;
4363 }
4364
4365 /*
4366  * Open and update trace_array ref count.
4367  * Must have the current trace_array passed to it.
4368  */
4369 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4370 {
4371         struct trace_array *tr = inode->i_private;
4372         int ret;
4373
4374         ret = tracing_check_open_get_tr(tr);
4375         if (ret)
4376                 return ret;
4377
4378         filp->private_data = inode->i_private;
4379
4380         return 0;
4381 }
4382
4383 static int tracing_release(struct inode *inode, struct file *file)
4384 {
4385         struct trace_array *tr = inode->i_private;
4386         struct seq_file *m = file->private_data;
4387         struct trace_iterator *iter;
4388         int cpu;
4389
4390         if (!(file->f_mode & FMODE_READ)) {
4391                 trace_array_put(tr);
4392                 return 0;
4393         }
4394
4395         /* Writes do not use seq_file */
4396         iter = m->private;
4397         mutex_lock(&trace_types_lock);
4398
4399         for_each_tracing_cpu(cpu) {
4400                 if (iter->buffer_iter[cpu])
4401                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4402         }
4403
4404         if (iter->trace && iter->trace->close)
4405                 iter->trace->close(iter);
4406
4407         if (!iter->snapshot && tr->stop_count)
4408                 /* reenable tracing if it was previously enabled */
4409                 tracing_start_tr(tr);
4410
4411         __trace_array_put(tr);
4412
4413         mutex_unlock(&trace_types_lock);
4414
4415         mutex_destroy(&iter->mutex);
4416         free_cpumask_var(iter->started);
4417         kfree(iter->temp);
4418         kfree(iter->trace);
4419         kfree(iter->buffer_iter);
4420         seq_release_private(inode, file);
4421
4422         return 0;
4423 }
4424
4425 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4426 {
4427         struct trace_array *tr = inode->i_private;
4428
4429         trace_array_put(tr);
4430         return 0;
4431 }
4432
4433 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4434 {
4435         struct trace_array *tr = inode->i_private;
4436
4437         trace_array_put(tr);
4438
4439         return single_release(inode, file);
4440 }
4441
4442 static int tracing_open(struct inode *inode, struct file *file)
4443 {
4444         struct trace_array *tr = inode->i_private;
4445         struct trace_iterator *iter;
4446         int ret;
4447
4448         ret = tracing_check_open_get_tr(tr);
4449         if (ret)
4450                 return ret;
4451
4452         /* If this file was open for write, then erase contents */
4453         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4454                 int cpu = tracing_get_cpu(inode);
4455                 struct array_buffer *trace_buf = &tr->array_buffer;
4456
4457 #ifdef CONFIG_TRACER_MAX_TRACE
4458                 if (tr->current_trace->print_max)
4459                         trace_buf = &tr->max_buffer;
4460 #endif
4461
4462                 if (cpu == RING_BUFFER_ALL_CPUS)
4463                         tracing_reset_online_cpus(trace_buf);
4464                 else
4465                         tracing_reset_cpu(trace_buf, cpu);
4466         }
4467
4468         if (file->f_mode & FMODE_READ) {
4469                 iter = __tracing_open(inode, file, false);
4470                 if (IS_ERR(iter))
4471                         ret = PTR_ERR(iter);
4472                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4473                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4474         }
4475
4476         if (ret < 0)
4477                 trace_array_put(tr);
4478
4479         return ret;
4480 }
4481
4482 /*
4483  * Some tracers are not suitable for instance buffers.
4484  * A tracer is always available for the global array (toplevel)
4485  * or if it explicitly states that it is.
4486  */
4487 static bool
4488 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4489 {
4490         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4491 }
4492
4493 /* Find the next tracer that this trace array may use */
4494 static struct tracer *
4495 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4496 {
4497         while (t && !trace_ok_for_array(t, tr))
4498                 t = t->next;
4499
4500         return t;
4501 }
4502
4503 static void *
4504 t_next(struct seq_file *m, void *v, loff_t *pos)
4505 {
4506         struct trace_array *tr = m->private;
4507         struct tracer *t = v;
4508
4509         (*pos)++;
4510
4511         if (t)
4512                 t = get_tracer_for_array(tr, t->next);
4513
4514         return t;
4515 }
4516
4517 static void *t_start(struct seq_file *m, loff_t *pos)
4518 {
4519         struct trace_array *tr = m->private;
4520         struct tracer *t;
4521         loff_t l = 0;
4522
4523         mutex_lock(&trace_types_lock);
4524
4525         t = get_tracer_for_array(tr, trace_types);
4526         for (; t && l < *pos; t = t_next(m, t, &l))
4527                         ;
4528
4529         return t;
4530 }
4531
4532 static void t_stop(struct seq_file *m, void *p)
4533 {
4534         mutex_unlock(&trace_types_lock);
4535 }
4536
4537 static int t_show(struct seq_file *m, void *v)
4538 {
4539         struct tracer *t = v;
4540
4541         if (!t)
4542                 return 0;
4543
4544         seq_puts(m, t->name);
4545         if (t->next)
4546                 seq_putc(m, ' ');
4547         else
4548                 seq_putc(m, '\n');
4549
4550         return 0;
4551 }
4552
4553 static const struct seq_operations show_traces_seq_ops = {
4554         .start          = t_start,
4555         .next           = t_next,
4556         .stop           = t_stop,
4557         .show           = t_show,
4558 };
4559
4560 static int show_traces_open(struct inode *inode, struct file *file)
4561 {
4562         struct trace_array *tr = inode->i_private;
4563         struct seq_file *m;
4564         int ret;
4565
4566         ret = tracing_check_open_get_tr(tr);
4567         if (ret)
4568                 return ret;
4569
4570         ret = seq_open(file, &show_traces_seq_ops);
4571         if (ret) {
4572                 trace_array_put(tr);
4573                 return ret;
4574         }
4575
4576         m = file->private_data;
4577         m->private = tr;
4578
4579         return 0;
4580 }
4581
4582 static int show_traces_release(struct inode *inode, struct file *file)
4583 {
4584         struct trace_array *tr = inode->i_private;
4585
4586         trace_array_put(tr);
4587         return seq_release(inode, file);
4588 }
4589
4590 static ssize_t
4591 tracing_write_stub(struct file *filp, const char __user *ubuf,
4592                    size_t count, loff_t *ppos)
4593 {
4594         return count;
4595 }
4596
4597 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4598 {
4599         int ret;
4600
4601         if (file->f_mode & FMODE_READ)
4602                 ret = seq_lseek(file, offset, whence);
4603         else
4604                 file->f_pos = ret = 0;
4605
4606         return ret;
4607 }
4608
4609 static const struct file_operations tracing_fops = {
4610         .open           = tracing_open,
4611         .read           = seq_read,
4612         .write          = tracing_write_stub,
4613         .llseek         = tracing_lseek,
4614         .release        = tracing_release,
4615 };
4616
4617 static const struct file_operations show_traces_fops = {
4618         .open           = show_traces_open,
4619         .read           = seq_read,
4620         .llseek         = seq_lseek,
4621         .release        = show_traces_release,
4622 };
4623
4624 static ssize_t
4625 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4626                      size_t count, loff_t *ppos)
4627 {
4628         struct trace_array *tr = file_inode(filp)->i_private;
4629         char *mask_str;
4630         int len;
4631
4632         len = snprintf(NULL, 0, "%*pb\n",
4633                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4634         mask_str = kmalloc(len, GFP_KERNEL);
4635         if (!mask_str)
4636                 return -ENOMEM;
4637
4638         len = snprintf(mask_str, len, "%*pb\n",
4639                        cpumask_pr_args(tr->tracing_cpumask));
4640         if (len >= count) {
4641                 count = -EINVAL;
4642                 goto out_err;
4643         }
4644         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4645
4646 out_err:
4647         kfree(mask_str);
4648
4649         return count;
4650 }
4651
4652 int tracing_set_cpumask(struct trace_array *tr,
4653                         cpumask_var_t tracing_cpumask_new)
4654 {
4655         int cpu;
4656
4657         if (!tr)
4658                 return -EINVAL;
4659
4660         local_irq_disable();
4661         arch_spin_lock(&tr->max_lock);
4662         for_each_tracing_cpu(cpu) {
4663                 /*
4664                  * Increase/decrease the disabled counter if we are
4665                  * about to flip a bit in the cpumask:
4666                  */
4667                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4668                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4669                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4670                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4671                 }
4672                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4673                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4674                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4675                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4676                 }
4677         }
4678         arch_spin_unlock(&tr->max_lock);
4679         local_irq_enable();
4680
4681         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4682
4683         return 0;
4684 }
4685
4686 static ssize_t
4687 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4688                       size_t count, loff_t *ppos)
4689 {
4690         struct trace_array *tr = file_inode(filp)->i_private;
4691         cpumask_var_t tracing_cpumask_new;
4692         int err;
4693
4694         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4695                 return -ENOMEM;
4696
4697         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4698         if (err)
4699                 goto err_free;
4700
4701         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4702         if (err)
4703                 goto err_free;
4704
4705         free_cpumask_var(tracing_cpumask_new);
4706
4707         return count;
4708
4709 err_free:
4710         free_cpumask_var(tracing_cpumask_new);
4711
4712         return err;
4713 }
4714
4715 static const struct file_operations tracing_cpumask_fops = {
4716         .open           = tracing_open_generic_tr,
4717         .read           = tracing_cpumask_read,
4718         .write          = tracing_cpumask_write,
4719         .release        = tracing_release_generic_tr,
4720         .llseek         = generic_file_llseek,
4721 };
4722
4723 static int tracing_trace_options_show(struct seq_file *m, void *v)
4724 {
4725         struct tracer_opt *trace_opts;
4726         struct trace_array *tr = m->private;
4727         u32 tracer_flags;
4728         int i;
4729
4730         mutex_lock(&trace_types_lock);
4731         tracer_flags = tr->current_trace->flags->val;
4732         trace_opts = tr->current_trace->flags->opts;
4733
4734         for (i = 0; trace_options[i]; i++) {
4735                 if (tr->trace_flags & (1 << i))
4736                         seq_printf(m, "%s\n", trace_options[i]);
4737                 else
4738                         seq_printf(m, "no%s\n", trace_options[i]);
4739         }
4740
4741         for (i = 0; trace_opts[i].name; i++) {
4742                 if (tracer_flags & trace_opts[i].bit)
4743                         seq_printf(m, "%s\n", trace_opts[i].name);
4744                 else
4745                         seq_printf(m, "no%s\n", trace_opts[i].name);
4746         }
4747         mutex_unlock(&trace_types_lock);
4748
4749         return 0;
4750 }
4751
4752 static int __set_tracer_option(struct trace_array *tr,
4753                                struct tracer_flags *tracer_flags,
4754                                struct tracer_opt *opts, int neg)
4755 {
4756         struct tracer *trace = tracer_flags->trace;
4757         int ret;
4758
4759         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4760         if (ret)
4761                 return ret;
4762
4763         if (neg)
4764                 tracer_flags->val &= ~opts->bit;
4765         else
4766                 tracer_flags->val |= opts->bit;
4767         return 0;
4768 }
4769
4770 /* Try to assign a tracer specific option */
4771 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4772 {
4773         struct tracer *trace = tr->current_trace;
4774         struct tracer_flags *tracer_flags = trace->flags;
4775         struct tracer_opt *opts = NULL;
4776         int i;
4777
4778         for (i = 0; tracer_flags->opts[i].name; i++) {
4779                 opts = &tracer_flags->opts[i];
4780
4781                 if (strcmp(cmp, opts->name) == 0)
4782                         return __set_tracer_option(tr, trace->flags, opts, neg);
4783         }
4784
4785         return -EINVAL;
4786 }
4787
4788 /* Some tracers require overwrite to stay enabled */
4789 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4790 {
4791         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4792                 return -1;
4793
4794         return 0;
4795 }
4796
4797 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4798 {
4799         if ((mask == TRACE_ITER_RECORD_TGID) ||
4800             (mask == TRACE_ITER_RECORD_CMD))
4801                 lockdep_assert_held(&event_mutex);
4802
4803         /* do nothing if flag is already set */
4804         if (!!(tr->trace_flags & mask) == !!enabled)
4805                 return 0;
4806
4807         /* Give the tracer a chance to approve the change */
4808         if (tr->current_trace->flag_changed)
4809                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4810                         return -EINVAL;
4811
4812         if (enabled)
4813                 tr->trace_flags |= mask;
4814         else
4815                 tr->trace_flags &= ~mask;
4816
4817         if (mask == TRACE_ITER_RECORD_CMD)
4818                 trace_event_enable_cmd_record(enabled);
4819
4820         if (mask == TRACE_ITER_RECORD_TGID) {
4821                 if (!tgid_map)
4822                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4823                                            sizeof(*tgid_map),
4824                                            GFP_KERNEL);
4825                 if (!tgid_map) {
4826                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4827                         return -ENOMEM;
4828                 }
4829
4830                 trace_event_enable_tgid_record(enabled);
4831         }
4832
4833         if (mask == TRACE_ITER_EVENT_FORK)
4834                 trace_event_follow_fork(tr, enabled);
4835
4836         if (mask == TRACE_ITER_FUNC_FORK)
4837                 ftrace_pid_follow_fork(tr, enabled);
4838
4839         if (mask == TRACE_ITER_OVERWRITE) {
4840                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4841 #ifdef CONFIG_TRACER_MAX_TRACE
4842                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4843 #endif
4844         }
4845
4846         if (mask == TRACE_ITER_PRINTK) {
4847                 trace_printk_start_stop_comm(enabled);
4848                 trace_printk_control(enabled);
4849         }
4850
4851         return 0;
4852 }
4853
4854 int trace_set_options(struct trace_array *tr, char *option)
4855 {
4856         char *cmp;
4857         int neg = 0;
4858         int ret;
4859         size_t orig_len = strlen(option);
4860         int len;
4861
4862         cmp = strstrip(option);
4863
4864         len = str_has_prefix(cmp, "no");
4865         if (len)
4866                 neg = 1;
4867
4868         cmp += len;
4869
4870         mutex_lock(&event_mutex);
4871         mutex_lock(&trace_types_lock);
4872
4873         ret = match_string(trace_options, -1, cmp);
4874         /* If no option could be set, test the specific tracer options */
4875         if (ret < 0)
4876                 ret = set_tracer_option(tr, cmp, neg);
4877         else
4878                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4879
4880         mutex_unlock(&trace_types_lock);
4881         mutex_unlock(&event_mutex);
4882
4883         /*
4884          * If the first trailing whitespace is replaced with '\0' by strstrip,
4885          * turn it back into a space.
4886          */
4887         if (orig_len > strlen(option))
4888                 option[strlen(option)] = ' ';
4889
4890         return ret;
4891 }
4892
4893 static void __init apply_trace_boot_options(void)
4894 {
4895         char *buf = trace_boot_options_buf;
4896         char *option;
4897
4898         while (true) {
4899                 option = strsep(&buf, ",");
4900
4901                 if (!option)
4902                         break;
4903
4904                 if (*option)
4905                         trace_set_options(&global_trace, option);
4906
4907                 /* Put back the comma to allow this to be called again */
4908                 if (buf)
4909                         *(buf - 1) = ',';
4910         }
4911 }
4912
4913 static ssize_t
4914 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4915                         size_t cnt, loff_t *ppos)
4916 {
4917         struct seq_file *m = filp->private_data;
4918         struct trace_array *tr = m->private;
4919         char buf[64];
4920         int ret;
4921
4922         if (cnt >= sizeof(buf))
4923                 return -EINVAL;
4924
4925         if (copy_from_user(buf, ubuf, cnt))
4926                 return -EFAULT;
4927
4928         buf[cnt] = 0;
4929
4930         ret = trace_set_options(tr, buf);
4931         if (ret < 0)
4932                 return ret;
4933
4934         *ppos += cnt;
4935
4936         return cnt;
4937 }
4938
4939 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4940 {
4941         struct trace_array *tr = inode->i_private;
4942         int ret;
4943
4944         ret = tracing_check_open_get_tr(tr);
4945         if (ret)
4946                 return ret;
4947
4948         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4949         if (ret < 0)
4950                 trace_array_put(tr);
4951
4952         return ret;
4953 }
4954
4955 static const struct file_operations tracing_iter_fops = {
4956         .open           = tracing_trace_options_open,
4957         .read           = seq_read,
4958         .llseek         = seq_lseek,
4959         .release        = tracing_single_release_tr,
4960         .write          = tracing_trace_options_write,
4961 };
4962
4963 static const char readme_msg[] =
4964         "tracing mini-HOWTO:\n\n"
4965         "# echo 0 > tracing_on : quick way to disable tracing\n"
4966         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4967         " Important files:\n"
4968         "  trace\t\t\t- The static contents of the buffer\n"
4969         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4970         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4971         "  current_tracer\t- function and latency tracers\n"
4972         "  available_tracers\t- list of configured tracers for current_tracer\n"
4973         "  error_log\t- error log for failed commands (that support it)\n"
4974         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4975         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4976         "  trace_clock\t\t-change the clock used to order events\n"
4977         "       local:   Per cpu clock but may not be synced across CPUs\n"
4978         "      global:   Synced across CPUs but slows tracing down.\n"
4979         "     counter:   Not a clock, but just an increment\n"
4980         "      uptime:   Jiffy counter from time of boot\n"
4981         "        perf:   Same clock that perf events use\n"
4982 #ifdef CONFIG_X86_64
4983         "     x86-tsc:   TSC cycle counter\n"
4984 #endif
4985         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4986         "       delta:   Delta difference against a buffer-wide timestamp\n"
4987         "    absolute:   Absolute (standalone) timestamp\n"
4988         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4989         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4990         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4991         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4992         "\t\t\t  Remove sub-buffer with rmdir\n"
4993         "  trace_options\t\t- Set format or modify how tracing happens\n"
4994         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4995         "\t\t\t  option name\n"
4996         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4997 #ifdef CONFIG_DYNAMIC_FTRACE
4998         "\n  available_filter_functions - list of functions that can be filtered on\n"
4999         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5000         "\t\t\t  functions\n"
5001         "\t     accepts: func_full_name or glob-matching-pattern\n"
5002         "\t     modules: Can select a group via module\n"
5003         "\t      Format: :mod:<module-name>\n"
5004         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5005         "\t    triggers: a command to perform when function is hit\n"
5006         "\t      Format: <function>:<trigger>[:count]\n"
5007         "\t     trigger: traceon, traceoff\n"
5008         "\t\t      enable_event:<system>:<event>\n"
5009         "\t\t      disable_event:<system>:<event>\n"
5010 #ifdef CONFIG_STACKTRACE
5011         "\t\t      stacktrace\n"
5012 #endif
5013 #ifdef CONFIG_TRACER_SNAPSHOT
5014         "\t\t      snapshot\n"
5015 #endif
5016         "\t\t      dump\n"
5017         "\t\t      cpudump\n"
5018         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5019         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5020         "\t     The first one will disable tracing every time do_fault is hit\n"
5021         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5022         "\t       The first time do trap is hit and it disables tracing, the\n"
5023         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5024         "\t       the counter will not decrement. It only decrements when the\n"
5025         "\t       trigger did work\n"
5026         "\t     To remove trigger without count:\n"
5027         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5028         "\t     To remove trigger with a count:\n"
5029         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5030         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5031         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5032         "\t    modules: Can select a group via module command :mod:\n"
5033         "\t    Does not accept triggers\n"
5034 #endif /* CONFIG_DYNAMIC_FTRACE */
5035 #ifdef CONFIG_FUNCTION_TRACER
5036         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5037         "\t\t    (function)\n"
5038         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5039         "\t\t    (function)\n"
5040 #endif
5041 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5042         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5043         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5044         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5045 #endif
5046 #ifdef CONFIG_TRACER_SNAPSHOT
5047         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5048         "\t\t\t  snapshot buffer. Read the contents for more\n"
5049         "\t\t\t  information\n"
5050 #endif
5051 #ifdef CONFIG_STACK_TRACER
5052         "  stack_trace\t\t- Shows the max stack trace when active\n"
5053         "  stack_max_size\t- Shows current max stack size that was traced\n"
5054         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5055         "\t\t\t  new trace)\n"
5056 #ifdef CONFIG_DYNAMIC_FTRACE
5057         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5058         "\t\t\t  traces\n"
5059 #endif
5060 #endif /* CONFIG_STACK_TRACER */
5061 #ifdef CONFIG_DYNAMIC_EVENTS
5062         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5063         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5064 #endif
5065 #ifdef CONFIG_KPROBE_EVENTS
5066         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5067         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5068 #endif
5069 #ifdef CONFIG_UPROBE_EVENTS
5070         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5071         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5072 #endif
5073 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5074         "\t  accepts: event-definitions (one definition per line)\n"
5075         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5076         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5077 #ifdef CONFIG_HIST_TRIGGERS
5078         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5079 #endif
5080         "\t           -:[<group>/]<event>\n"
5081 #ifdef CONFIG_KPROBE_EVENTS
5082         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5083   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5084 #endif
5085 #ifdef CONFIG_UPROBE_EVENTS
5086   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5087 #endif
5088         "\t     args: <name>=fetcharg[:type]\n"
5089         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5091         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5092 #else
5093         "\t           $stack<index>, $stack, $retval, $comm,\n"
5094 #endif
5095         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5096         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5097         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5098         "\t           <type>\\[<array-size>\\]\n"
5099 #ifdef CONFIG_HIST_TRIGGERS
5100         "\t    field: <stype> <name>;\n"
5101         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5102         "\t           [unsigned] char/int/long\n"
5103 #endif
5104 #endif
5105         "  events/\t\t- Directory containing all trace event subsystems:\n"
5106         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5107         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5108         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5109         "\t\t\t  events\n"
5110         "      filter\t\t- If set, only events passing filter are traced\n"
5111         "  events/<system>/<event>/\t- Directory containing control files for\n"
5112         "\t\t\t  <event>:\n"
5113         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5114         "      filter\t\t- If set, only events passing filter are traced\n"
5115         "      trigger\t\t- If set, a command to perform when event is hit\n"
5116         "\t    Format: <trigger>[:count][if <filter>]\n"
5117         "\t   trigger: traceon, traceoff\n"
5118         "\t            enable_event:<system>:<event>\n"
5119         "\t            disable_event:<system>:<event>\n"
5120 #ifdef CONFIG_HIST_TRIGGERS
5121         "\t            enable_hist:<system>:<event>\n"
5122         "\t            disable_hist:<system>:<event>\n"
5123 #endif
5124 #ifdef CONFIG_STACKTRACE
5125         "\t\t    stacktrace\n"
5126 #endif
5127 #ifdef CONFIG_TRACER_SNAPSHOT
5128         "\t\t    snapshot\n"
5129 #endif
5130 #ifdef CONFIG_HIST_TRIGGERS
5131         "\t\t    hist (see below)\n"
5132 #endif
5133         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5134         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5135         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5136         "\t                  events/block/block_unplug/trigger\n"
5137         "\t   The first disables tracing every time block_unplug is hit.\n"
5138         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5139         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5140         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5141         "\t   Like function triggers, the counter is only decremented if it\n"
5142         "\t    enabled or disabled tracing.\n"
5143         "\t   To remove a trigger without a count:\n"
5144         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5145         "\t   To remove a trigger with a count:\n"
5146         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5147         "\t   Filters can be ignored when removing a trigger.\n"
5148 #ifdef CONFIG_HIST_TRIGGERS
5149         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5150         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5151         "\t            [:values=<field1[,field2,...]>]\n"
5152         "\t            [:sort=<field1[,field2,...]>]\n"
5153         "\t            [:size=#entries]\n"
5154         "\t            [:pause][:continue][:clear]\n"
5155         "\t            [:name=histname1]\n"
5156         "\t            [:<handler>.<action>]\n"
5157         "\t            [if <filter>]\n\n"
5158         "\t    When a matching event is hit, an entry is added to a hash\n"
5159         "\t    table using the key(s) and value(s) named, and the value of a\n"
5160         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5161         "\t    correspond to fields in the event's format description.  Keys\n"
5162         "\t    can be any field, or the special string 'stacktrace'.\n"
5163         "\t    Compound keys consisting of up to two fields can be specified\n"
5164         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5165         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5166         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5167         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5168         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5169         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5170         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5171         "\t    its histogram data will be shared with other triggers of the\n"
5172         "\t    same name, and trigger hits will update this common data.\n\n"
5173         "\t    Reading the 'hist' file for the event will dump the hash\n"
5174         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5175         "\t    triggers attached to an event, there will be a table for each\n"
5176         "\t    trigger in the output.  The table displayed for a named\n"
5177         "\t    trigger will be the same as any other instance having the\n"
5178         "\t    same name.  The default format used to display a given field\n"
5179         "\t    can be modified by appending any of the following modifiers\n"
5180         "\t    to the field name, as applicable:\n\n"
5181         "\t            .hex        display a number as a hex value\n"
5182         "\t            .sym        display an address as a symbol\n"
5183         "\t            .sym-offset display an address as a symbol and offset\n"
5184         "\t            .execname   display a common_pid as a program name\n"
5185         "\t            .syscall    display a syscall id as a syscall name\n"
5186         "\t            .log2       display log2 value rather than raw number\n"
5187         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5188         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5189         "\t    trigger or to start a hist trigger but not log any events\n"
5190         "\t    until told to do so.  'continue' can be used to start or\n"
5191         "\t    restart a paused hist trigger.\n\n"
5192         "\t    The 'clear' parameter will clear the contents of a running\n"
5193         "\t    hist trigger and leave its current paused/active state\n"
5194         "\t    unchanged.\n\n"
5195         "\t    The enable_hist and disable_hist triggers can be used to\n"
5196         "\t    have one event conditionally start and stop another event's\n"
5197         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5198         "\t    the enable_event and disable_event triggers.\n\n"
5199         "\t    Hist trigger handlers and actions are executed whenever a\n"
5200         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5201         "\t        <handler>.<action>\n\n"
5202         "\t    The available handlers are:\n\n"
5203         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5204         "\t        onmax(var)               - invoke if var exceeds current max\n"
5205         "\t        onchange(var)            - invoke action if var changes\n\n"
5206         "\t    The available actions are:\n\n"
5207         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5208         "\t        save(field,...)                      - save current event fields\n"
5209 #ifdef CONFIG_TRACER_SNAPSHOT
5210         "\t        snapshot()                           - snapshot the trace buffer\n"
5211 #endif
5212 #endif
5213 ;
5214
5215 static ssize_t
5216 tracing_readme_read(struct file *filp, char __user *ubuf,
5217                        size_t cnt, loff_t *ppos)
5218 {
5219         return simple_read_from_buffer(ubuf, cnt, ppos,
5220                                         readme_msg, strlen(readme_msg));
5221 }
5222
5223 static const struct file_operations tracing_readme_fops = {
5224         .open           = tracing_open_generic,
5225         .read           = tracing_readme_read,
5226         .llseek         = generic_file_llseek,
5227 };
5228
5229 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5230 {
5231         int *ptr = v;
5232
5233         if (*pos || m->count)
5234                 ptr++;
5235
5236         (*pos)++;
5237
5238         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5239                 if (trace_find_tgid(*ptr))
5240                         return ptr;
5241         }
5242
5243         return NULL;
5244 }
5245
5246 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5247 {
5248         void *v;
5249         loff_t l = 0;
5250
5251         if (!tgid_map)
5252                 return NULL;
5253
5254         v = &tgid_map[0];
5255         while (l <= *pos) {
5256                 v = saved_tgids_next(m, v, &l);
5257                 if (!v)
5258                         return NULL;
5259         }
5260
5261         return v;
5262 }
5263
5264 static void saved_tgids_stop(struct seq_file *m, void *v)
5265 {
5266 }
5267
5268 static int saved_tgids_show(struct seq_file *m, void *v)
5269 {
5270         int pid = (int *)v - tgid_map;
5271
5272         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5273         return 0;
5274 }
5275
5276 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5277         .start          = saved_tgids_start,
5278         .stop           = saved_tgids_stop,
5279         .next           = saved_tgids_next,
5280         .show           = saved_tgids_show,
5281 };
5282
5283 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5284 {
5285         int ret;
5286
5287         ret = tracing_check_open_get_tr(NULL);
5288         if (ret)
5289                 return ret;
5290
5291         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5292 }
5293
5294
5295 static const struct file_operations tracing_saved_tgids_fops = {
5296         .open           = tracing_saved_tgids_open,
5297         .read           = seq_read,
5298         .llseek         = seq_lseek,
5299         .release        = seq_release,
5300 };
5301
5302 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5303 {
5304         unsigned int *ptr = v;
5305
5306         if (*pos || m->count)
5307                 ptr++;
5308
5309         (*pos)++;
5310
5311         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5312              ptr++) {
5313                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5314                         continue;
5315
5316                 return ptr;
5317         }
5318
5319         return NULL;
5320 }
5321
5322 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5323 {
5324         void *v;
5325         loff_t l = 0;
5326
5327         preempt_disable();
5328         arch_spin_lock(&trace_cmdline_lock);
5329
5330         v = &savedcmd->map_cmdline_to_pid[0];
5331         while (l <= *pos) {
5332                 v = saved_cmdlines_next(m, v, &l);
5333                 if (!v)
5334                         return NULL;
5335         }
5336
5337         return v;
5338 }
5339
5340 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5341 {
5342         arch_spin_unlock(&trace_cmdline_lock);
5343         preempt_enable();
5344 }
5345
5346 static int saved_cmdlines_show(struct seq_file *m, void *v)
5347 {
5348         char buf[TASK_COMM_LEN];
5349         unsigned int *pid = v;
5350
5351         __trace_find_cmdline(*pid, buf);
5352         seq_printf(m, "%d %s\n", *pid, buf);
5353         return 0;
5354 }
5355
5356 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5357         .start          = saved_cmdlines_start,
5358         .next           = saved_cmdlines_next,
5359         .stop           = saved_cmdlines_stop,
5360         .show           = saved_cmdlines_show,
5361 };
5362
5363 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5364 {
5365         int ret;
5366
5367         ret = tracing_check_open_get_tr(NULL);
5368         if (ret)
5369                 return ret;
5370
5371         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5372 }
5373
5374 static const struct file_operations tracing_saved_cmdlines_fops = {
5375         .open           = tracing_saved_cmdlines_open,
5376         .read           = seq_read,
5377         .llseek         = seq_lseek,
5378         .release        = seq_release,
5379 };
5380
5381 static ssize_t
5382 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5383                                  size_t cnt, loff_t *ppos)
5384 {
5385         char buf[64];
5386         int r;
5387
5388         arch_spin_lock(&trace_cmdline_lock);
5389         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5390         arch_spin_unlock(&trace_cmdline_lock);
5391
5392         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5393 }
5394
5395 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5396 {
5397         kfree(s->saved_cmdlines);
5398         kfree(s->map_cmdline_to_pid);
5399         kfree(s);
5400 }
5401
5402 static int tracing_resize_saved_cmdlines(unsigned int val)
5403 {
5404         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5405
5406         s = kmalloc(sizeof(*s), GFP_KERNEL);
5407         if (!s)
5408                 return -ENOMEM;
5409
5410         if (allocate_cmdlines_buffer(val, s) < 0) {
5411                 kfree(s);
5412                 return -ENOMEM;
5413         }
5414
5415         arch_spin_lock(&trace_cmdline_lock);
5416         savedcmd_temp = savedcmd;
5417         savedcmd = s;
5418         arch_spin_unlock(&trace_cmdline_lock);
5419         free_saved_cmdlines_buffer(savedcmd_temp);
5420
5421         return 0;
5422 }
5423
5424 static ssize_t
5425 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5426                                   size_t cnt, loff_t *ppos)
5427 {
5428         unsigned long val;
5429         int ret;
5430
5431         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5432         if (ret)
5433                 return ret;
5434
5435         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5436         if (!val || val > PID_MAX_DEFAULT)
5437                 return -EINVAL;
5438
5439         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5440         if (ret < 0)
5441                 return ret;
5442
5443         *ppos += cnt;
5444
5445         return cnt;
5446 }
5447
5448 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5449         .open           = tracing_open_generic,
5450         .read           = tracing_saved_cmdlines_size_read,
5451         .write          = tracing_saved_cmdlines_size_write,
5452 };
5453
5454 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5455 static union trace_eval_map_item *
5456 update_eval_map(union trace_eval_map_item *ptr)
5457 {
5458         if (!ptr->map.eval_string) {
5459                 if (ptr->tail.next) {
5460                         ptr = ptr->tail.next;
5461                         /* Set ptr to the next real item (skip head) */
5462                         ptr++;
5463                 } else
5464                         return NULL;
5465         }
5466         return ptr;
5467 }
5468
5469 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5470 {
5471         union trace_eval_map_item *ptr = v;
5472
5473         /*
5474          * Paranoid! If ptr points to end, we don't want to increment past it.
5475          * This really should never happen.
5476          */
5477         (*pos)++;
5478         ptr = update_eval_map(ptr);
5479         if (WARN_ON_ONCE(!ptr))
5480                 return NULL;
5481
5482         ptr++;
5483         ptr = update_eval_map(ptr);
5484
5485         return ptr;
5486 }
5487
5488 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5489 {
5490         union trace_eval_map_item *v;
5491         loff_t l = 0;
5492
5493         mutex_lock(&trace_eval_mutex);
5494
5495         v = trace_eval_maps;
5496         if (v)
5497                 v++;
5498
5499         while (v && l < *pos) {
5500                 v = eval_map_next(m, v, &l);
5501         }
5502
5503         return v;
5504 }
5505
5506 static void eval_map_stop(struct seq_file *m, void *v)
5507 {
5508         mutex_unlock(&trace_eval_mutex);
5509 }
5510
5511 static int eval_map_show(struct seq_file *m, void *v)
5512 {
5513         union trace_eval_map_item *ptr = v;
5514
5515         seq_printf(m, "%s %ld (%s)\n",
5516                    ptr->map.eval_string, ptr->map.eval_value,
5517                    ptr->map.system);
5518
5519         return 0;
5520 }
5521
5522 static const struct seq_operations tracing_eval_map_seq_ops = {
5523         .start          = eval_map_start,
5524         .next           = eval_map_next,
5525         .stop           = eval_map_stop,
5526         .show           = eval_map_show,
5527 };
5528
5529 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5530 {
5531         int ret;
5532
5533         ret = tracing_check_open_get_tr(NULL);
5534         if (ret)
5535                 return ret;
5536
5537         return seq_open(filp, &tracing_eval_map_seq_ops);
5538 }
5539
5540 static const struct file_operations tracing_eval_map_fops = {
5541         .open           = tracing_eval_map_open,
5542         .read           = seq_read,
5543         .llseek         = seq_lseek,
5544         .release        = seq_release,
5545 };
5546
5547 static inline union trace_eval_map_item *
5548 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5549 {
5550         /* Return tail of array given the head */
5551         return ptr + ptr->head.length + 1;
5552 }
5553
5554 static void
5555 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5556                            int len)
5557 {
5558         struct trace_eval_map **stop;
5559         struct trace_eval_map **map;
5560         union trace_eval_map_item *map_array;
5561         union trace_eval_map_item *ptr;
5562
5563         stop = start + len;
5564
5565         /*
5566          * The trace_eval_maps contains the map plus a head and tail item,
5567          * where the head holds the module and length of array, and the
5568          * tail holds a pointer to the next list.
5569          */
5570         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5571         if (!map_array) {
5572                 pr_warn("Unable to allocate trace eval mapping\n");
5573                 return;
5574         }
5575
5576         mutex_lock(&trace_eval_mutex);
5577
5578         if (!trace_eval_maps)
5579                 trace_eval_maps = map_array;
5580         else {
5581                 ptr = trace_eval_maps;
5582                 for (;;) {
5583                         ptr = trace_eval_jmp_to_tail(ptr);
5584                         if (!ptr->tail.next)
5585                                 break;
5586                         ptr = ptr->tail.next;
5587
5588                 }
5589                 ptr->tail.next = map_array;
5590         }
5591         map_array->head.mod = mod;
5592         map_array->head.length = len;
5593         map_array++;
5594
5595         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5596                 map_array->map = **map;
5597                 map_array++;
5598         }
5599         memset(map_array, 0, sizeof(*map_array));
5600
5601         mutex_unlock(&trace_eval_mutex);
5602 }
5603
5604 static void trace_create_eval_file(struct dentry *d_tracer)
5605 {
5606         trace_create_file("eval_map", 0444, d_tracer,
5607                           NULL, &tracing_eval_map_fops);
5608 }
5609
5610 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5611 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5612 static inline void trace_insert_eval_map_file(struct module *mod,
5613                               struct trace_eval_map **start, int len) { }
5614 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5615
5616 static void trace_insert_eval_map(struct module *mod,
5617                                   struct trace_eval_map **start, int len)
5618 {
5619         struct trace_eval_map **map;
5620
5621         if (len <= 0)
5622                 return;
5623
5624         map = start;
5625
5626         trace_event_eval_update(map, len);
5627
5628         trace_insert_eval_map_file(mod, start, len);
5629 }
5630
5631 static ssize_t
5632 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5633                        size_t cnt, loff_t *ppos)
5634 {
5635         struct trace_array *tr = filp->private_data;
5636         char buf[MAX_TRACER_SIZE+2];
5637         int r;
5638
5639         mutex_lock(&trace_types_lock);
5640         r = sprintf(buf, "%s\n", tr->current_trace->name);
5641         mutex_unlock(&trace_types_lock);
5642
5643         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5644 }
5645
5646 int tracer_init(struct tracer *t, struct trace_array *tr)
5647 {
5648         tracing_reset_online_cpus(&tr->array_buffer);
5649         return t->init(tr);
5650 }
5651
5652 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5653 {
5654         int cpu;
5655
5656         for_each_tracing_cpu(cpu)
5657                 per_cpu_ptr(buf->data, cpu)->entries = val;
5658 }
5659
5660 #ifdef CONFIG_TRACER_MAX_TRACE
5661 /* resize @tr's buffer to the size of @size_tr's entries */
5662 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5663                                         struct array_buffer *size_buf, int cpu_id)
5664 {
5665         int cpu, ret = 0;
5666
5667         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5668                 for_each_tracing_cpu(cpu) {
5669                         ret = ring_buffer_resize(trace_buf->buffer,
5670                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5671                         if (ret < 0)
5672                                 break;
5673                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5674                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5675                 }
5676         } else {
5677                 ret = ring_buffer_resize(trace_buf->buffer,
5678                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5679                 if (ret == 0)
5680                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5681                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5682         }
5683
5684         return ret;
5685 }
5686 #endif /* CONFIG_TRACER_MAX_TRACE */
5687
5688 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5689                                         unsigned long size, int cpu)
5690 {
5691         int ret;
5692
5693         /*
5694          * If kernel or user changes the size of the ring buffer
5695          * we use the size that was given, and we can forget about
5696          * expanding it later.
5697          */
5698         ring_buffer_expanded = true;
5699
5700         /* May be called before buffers are initialized */
5701         if (!tr->array_buffer.buffer)
5702                 return 0;
5703
5704         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5705         if (ret < 0)
5706                 return ret;
5707
5708 #ifdef CONFIG_TRACER_MAX_TRACE
5709         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5710             !tr->current_trace->use_max_tr)
5711                 goto out;
5712
5713         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5714         if (ret < 0) {
5715                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5716                                                      &tr->array_buffer, cpu);
5717                 if (r < 0) {
5718                         /*
5719                          * AARGH! We are left with different
5720                          * size max buffer!!!!
5721                          * The max buffer is our "snapshot" buffer.
5722                          * When a tracer needs a snapshot (one of the
5723                          * latency tracers), it swaps the max buffer
5724                          * with the saved snap shot. We succeeded to
5725                          * update the size of the main buffer, but failed to
5726                          * update the size of the max buffer. But when we tried
5727                          * to reset the main buffer to the original size, we
5728                          * failed there too. This is very unlikely to
5729                          * happen, but if it does, warn and kill all
5730                          * tracing.
5731                          */
5732                         WARN_ON(1);
5733                         tracing_disabled = 1;
5734                 }
5735                 return ret;
5736         }
5737
5738         if (cpu == RING_BUFFER_ALL_CPUS)
5739                 set_buffer_entries(&tr->max_buffer, size);
5740         else
5741                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5742
5743  out:
5744 #endif /* CONFIG_TRACER_MAX_TRACE */
5745
5746         if (cpu == RING_BUFFER_ALL_CPUS)
5747                 set_buffer_entries(&tr->array_buffer, size);
5748         else
5749                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5750
5751         return ret;
5752 }
5753
5754 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5755                                   unsigned long size, int cpu_id)
5756 {
5757         int ret = size;
5758
5759         mutex_lock(&trace_types_lock);
5760
5761         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5762                 /* make sure, this cpu is enabled in the mask */
5763                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5764                         ret = -EINVAL;
5765                         goto out;
5766                 }
5767         }
5768
5769         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5770         if (ret < 0)
5771                 ret = -ENOMEM;
5772
5773 out:
5774         mutex_unlock(&trace_types_lock);
5775
5776         return ret;
5777 }
5778
5779
5780 /**
5781  * tracing_update_buffers - used by tracing facility to expand ring buffers
5782  *
5783  * To save on memory when the tracing is never used on a system with it
5784  * configured in. The ring buffers are set to a minimum size. But once
5785  * a user starts to use the tracing facility, then they need to grow
5786  * to their default size.
5787  *
5788  * This function is to be called when a tracer is about to be used.
5789  */
5790 int tracing_update_buffers(void)
5791 {
5792         int ret = 0;
5793
5794         mutex_lock(&trace_types_lock);
5795         if (!ring_buffer_expanded)
5796                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5797                                                 RING_BUFFER_ALL_CPUS);
5798         mutex_unlock(&trace_types_lock);
5799
5800         return ret;
5801 }
5802
5803 struct trace_option_dentry;
5804
5805 static void
5806 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5807
5808 /*
5809  * Used to clear out the tracer before deletion of an instance.
5810  * Must have trace_types_lock held.
5811  */
5812 static void tracing_set_nop(struct trace_array *tr)
5813 {
5814         if (tr->current_trace == &nop_trace)
5815                 return;
5816         
5817         tr->current_trace->enabled--;
5818
5819         if (tr->current_trace->reset)
5820                 tr->current_trace->reset(tr);
5821
5822         tr->current_trace = &nop_trace;
5823 }
5824
5825 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5826 {
5827         /* Only enable if the directory has been created already. */
5828         if (!tr->dir)
5829                 return;
5830
5831         create_trace_option_files(tr, t);
5832 }
5833
5834 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5835 {
5836         struct tracer *t;
5837 #ifdef CONFIG_TRACER_MAX_TRACE
5838         bool had_max_tr;
5839 #endif
5840         int ret = 0;
5841
5842         mutex_lock(&trace_types_lock);
5843
5844         if (!ring_buffer_expanded) {
5845                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5846                                                 RING_BUFFER_ALL_CPUS);
5847                 if (ret < 0)
5848                         goto out;
5849                 ret = 0;
5850         }
5851
5852         for (t = trace_types; t; t = t->next) {
5853                 if (strcmp(t->name, buf) == 0)
5854                         break;
5855         }
5856         if (!t) {
5857                 ret = -EINVAL;
5858                 goto out;
5859         }
5860         if (t == tr->current_trace)
5861                 goto out;
5862
5863 #ifdef CONFIG_TRACER_SNAPSHOT
5864         if (t->use_max_tr) {
5865                 arch_spin_lock(&tr->max_lock);
5866                 if (tr->cond_snapshot)
5867                         ret = -EBUSY;
5868                 arch_spin_unlock(&tr->max_lock);
5869                 if (ret)
5870                         goto out;
5871         }
5872 #endif
5873         /* Some tracers won't work on kernel command line */
5874         if (system_state < SYSTEM_RUNNING && t->noboot) {
5875                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5876                         t->name);
5877                 goto out;
5878         }
5879
5880         /* Some tracers are only allowed for the top level buffer */
5881         if (!trace_ok_for_array(t, tr)) {
5882                 ret = -EINVAL;
5883                 goto out;
5884         }
5885
5886         /* If trace pipe files are being read, we can't change the tracer */
5887         if (tr->current_trace->ref) {
5888                 ret = -EBUSY;
5889                 goto out;
5890         }
5891
5892         trace_branch_disable();
5893
5894         tr->current_trace->enabled--;
5895
5896         if (tr->current_trace->reset)
5897                 tr->current_trace->reset(tr);
5898
5899         /* Current trace needs to be nop_trace before synchronize_rcu */
5900         tr->current_trace = &nop_trace;
5901
5902 #ifdef CONFIG_TRACER_MAX_TRACE
5903         had_max_tr = tr->allocated_snapshot;
5904
5905         if (had_max_tr && !t->use_max_tr) {
5906                 /*
5907                  * We need to make sure that the update_max_tr sees that
5908                  * current_trace changed to nop_trace to keep it from
5909                  * swapping the buffers after we resize it.
5910                  * The update_max_tr is called from interrupts disabled
5911                  * so a synchronized_sched() is sufficient.
5912                  */
5913                 synchronize_rcu();
5914                 free_snapshot(tr);
5915         }
5916 #endif
5917
5918 #ifdef CONFIG_TRACER_MAX_TRACE
5919         if (t->use_max_tr && !had_max_tr) {
5920                 ret = tracing_alloc_snapshot_instance(tr);
5921                 if (ret < 0)
5922                         goto out;
5923         }
5924 #endif
5925
5926         if (t->init) {
5927                 ret = tracer_init(t, tr);
5928                 if (ret)
5929                         goto out;
5930         }
5931
5932         tr->current_trace = t;
5933         tr->current_trace->enabled++;
5934         trace_branch_enable(tr);
5935  out:
5936         mutex_unlock(&trace_types_lock);
5937
5938         return ret;
5939 }
5940
5941 static ssize_t
5942 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5943                         size_t cnt, loff_t *ppos)
5944 {
5945         struct trace_array *tr = filp->private_data;
5946         char buf[MAX_TRACER_SIZE+1];
5947         int i;
5948         size_t ret;
5949         int err;
5950
5951         ret = cnt;
5952
5953         if (cnt > MAX_TRACER_SIZE)
5954                 cnt = MAX_TRACER_SIZE;
5955
5956         if (copy_from_user(buf, ubuf, cnt))
5957                 return -EFAULT;
5958
5959         buf[cnt] = 0;
5960
5961         /* strip ending whitespace. */
5962         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5963                 buf[i] = 0;
5964
5965         err = tracing_set_tracer(tr, buf);
5966         if (err)
5967                 return err;
5968
5969         *ppos += ret;
5970
5971         return ret;
5972 }
5973
5974 static ssize_t
5975 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5976                    size_t cnt, loff_t *ppos)
5977 {
5978         char buf[64];
5979         int r;
5980
5981         r = snprintf(buf, sizeof(buf), "%ld\n",
5982                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5983         if (r > sizeof(buf))
5984                 r = sizeof(buf);
5985         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5986 }
5987
5988 static ssize_t
5989 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5990                     size_t cnt, loff_t *ppos)
5991 {
5992         unsigned long val;
5993         int ret;
5994
5995         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5996         if (ret)
5997                 return ret;
5998
5999         *ptr = val * 1000;
6000
6001         return cnt;
6002 }
6003
6004 static ssize_t
6005 tracing_thresh_read(struct file *filp, char __user *ubuf,
6006                     size_t cnt, loff_t *ppos)
6007 {
6008         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6009 }
6010
6011 static ssize_t
6012 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6013                      size_t cnt, loff_t *ppos)
6014 {
6015         struct trace_array *tr = filp->private_data;
6016         int ret;
6017
6018         mutex_lock(&trace_types_lock);
6019         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6020         if (ret < 0)
6021                 goto out;
6022
6023         if (tr->current_trace->update_thresh) {
6024                 ret = tr->current_trace->update_thresh(tr);
6025                 if (ret < 0)
6026                         goto out;
6027         }
6028
6029         ret = cnt;
6030 out:
6031         mutex_unlock(&trace_types_lock);
6032
6033         return ret;
6034 }
6035
6036 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6037
6038 static ssize_t
6039 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6040                      size_t cnt, loff_t *ppos)
6041 {
6042         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6043 }
6044
6045 static ssize_t
6046 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6047                       size_t cnt, loff_t *ppos)
6048 {
6049         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6050 }
6051
6052 #endif
6053
6054 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6055 {
6056         struct trace_array *tr = inode->i_private;
6057         struct trace_iterator *iter;
6058         int ret;
6059
6060         ret = tracing_check_open_get_tr(tr);
6061         if (ret)
6062                 return ret;
6063
6064         mutex_lock(&trace_types_lock);
6065
6066         /* create a buffer to store the information to pass to userspace */
6067         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6068         if (!iter) {
6069                 ret = -ENOMEM;
6070                 __trace_array_put(tr);
6071                 goto out;
6072         }
6073
6074         trace_seq_init(&iter->seq);
6075         iter->trace = tr->current_trace;
6076
6077         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6078                 ret = -ENOMEM;
6079                 goto fail;
6080         }
6081
6082         /* trace pipe does not show start of buffer */
6083         cpumask_setall(iter->started);
6084
6085         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6086                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6087
6088         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6089         if (trace_clocks[tr->clock_id].in_ns)
6090                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6091
6092         iter->tr = tr;
6093         iter->array_buffer = &tr->array_buffer;
6094         iter->cpu_file = tracing_get_cpu(inode);
6095         mutex_init(&iter->mutex);
6096         filp->private_data = iter;
6097
6098         if (iter->trace->pipe_open)
6099                 iter->trace->pipe_open(iter);
6100
6101         nonseekable_open(inode, filp);
6102
6103         tr->current_trace->ref++;
6104 out:
6105         mutex_unlock(&trace_types_lock);
6106         return ret;
6107
6108 fail:
6109         kfree(iter);
6110         __trace_array_put(tr);
6111         mutex_unlock(&trace_types_lock);
6112         return ret;
6113 }
6114
6115 static int tracing_release_pipe(struct inode *inode, struct file *file)
6116 {
6117         struct trace_iterator *iter = file->private_data;
6118         struct trace_array *tr = inode->i_private;
6119
6120         mutex_lock(&trace_types_lock);
6121
6122         tr->current_trace->ref--;
6123
6124         if (iter->trace->pipe_close)
6125                 iter->trace->pipe_close(iter);
6126
6127         mutex_unlock(&trace_types_lock);
6128
6129         free_cpumask_var(iter->started);
6130         mutex_destroy(&iter->mutex);
6131         kfree(iter);
6132
6133         trace_array_put(tr);
6134
6135         return 0;
6136 }
6137
6138 static __poll_t
6139 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6140 {
6141         struct trace_array *tr = iter->tr;
6142
6143         /* Iterators are static, they should be filled or empty */
6144         if (trace_buffer_iter(iter, iter->cpu_file))
6145                 return EPOLLIN | EPOLLRDNORM;
6146
6147         if (tr->trace_flags & TRACE_ITER_BLOCK)
6148                 /*
6149                  * Always select as readable when in blocking mode
6150                  */
6151                 return EPOLLIN | EPOLLRDNORM;
6152         else
6153                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6154                                              filp, poll_table);
6155 }
6156
6157 static __poll_t
6158 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6159 {
6160         struct trace_iterator *iter = filp->private_data;
6161
6162         return trace_poll(iter, filp, poll_table);
6163 }
6164
6165 /* Must be called with iter->mutex held. */
6166 static int tracing_wait_pipe(struct file *filp)
6167 {
6168         struct trace_iterator *iter = filp->private_data;
6169         int ret;
6170
6171         while (trace_empty(iter)) {
6172
6173                 if ((filp->f_flags & O_NONBLOCK)) {
6174                         return -EAGAIN;
6175                 }
6176
6177                 /*
6178                  * We block until we read something and tracing is disabled.
6179                  * We still block if tracing is disabled, but we have never
6180                  * read anything. This allows a user to cat this file, and
6181                  * then enable tracing. But after we have read something,
6182                  * we give an EOF when tracing is again disabled.
6183                  *
6184                  * iter->pos will be 0 if we haven't read anything.
6185                  */
6186                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6187                         break;
6188
6189                 mutex_unlock(&iter->mutex);
6190
6191                 ret = wait_on_pipe(iter, 0);
6192
6193                 mutex_lock(&iter->mutex);
6194
6195                 if (ret)
6196                         return ret;
6197         }
6198
6199         return 1;
6200 }
6201
6202 /*
6203  * Consumer reader.
6204  */
6205 static ssize_t
6206 tracing_read_pipe(struct file *filp, char __user *ubuf,
6207                   size_t cnt, loff_t *ppos)
6208 {
6209         struct trace_iterator *iter = filp->private_data;
6210         ssize_t sret;
6211
6212         /*
6213          * Avoid more than one consumer on a single file descriptor
6214          * This is just a matter of traces coherency, the ring buffer itself
6215          * is protected.
6216          */
6217         mutex_lock(&iter->mutex);
6218
6219         /* return any leftover data */
6220         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6221         if (sret != -EBUSY)
6222                 goto out;
6223
6224         trace_seq_init(&iter->seq);
6225
6226         if (iter->trace->read) {
6227                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6228                 if (sret)
6229                         goto out;
6230         }
6231
6232 waitagain:
6233         sret = tracing_wait_pipe(filp);
6234         if (sret <= 0)
6235                 goto out;
6236
6237         /* stop when tracing is finished */
6238         if (trace_empty(iter)) {
6239                 sret = 0;
6240                 goto out;
6241         }
6242
6243         if (cnt >= PAGE_SIZE)
6244                 cnt = PAGE_SIZE - 1;
6245
6246         /* reset all but tr, trace, and overruns */
6247         memset(&iter->seq, 0,
6248                sizeof(struct trace_iterator) -
6249                offsetof(struct trace_iterator, seq));
6250         cpumask_clear(iter->started);
6251         trace_seq_init(&iter->seq);
6252         iter->pos = -1;
6253
6254         trace_event_read_lock();
6255         trace_access_lock(iter->cpu_file);
6256         while (trace_find_next_entry_inc(iter) != NULL) {
6257                 enum print_line_t ret;
6258                 int save_len = iter->seq.seq.len;
6259
6260                 ret = print_trace_line(iter);
6261                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6262                         /* don't print partial lines */
6263                         iter->seq.seq.len = save_len;
6264                         break;
6265                 }
6266                 if (ret != TRACE_TYPE_NO_CONSUME)
6267                         trace_consume(iter);
6268
6269                 if (trace_seq_used(&iter->seq) >= cnt)
6270                         break;
6271
6272                 /*
6273                  * Setting the full flag means we reached the trace_seq buffer
6274                  * size and we should leave by partial output condition above.
6275                  * One of the trace_seq_* functions is not used properly.
6276                  */
6277                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6278                           iter->ent->type);
6279         }
6280         trace_access_unlock(iter->cpu_file);
6281         trace_event_read_unlock();
6282
6283         /* Now copy what we have to the user */
6284         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6285         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6286                 trace_seq_init(&iter->seq);
6287
6288         /*
6289          * If there was nothing to send to user, in spite of consuming trace
6290          * entries, go back to wait for more entries.
6291          */
6292         if (sret == -EBUSY)
6293                 goto waitagain;
6294
6295 out:
6296         mutex_unlock(&iter->mutex);
6297
6298         return sret;
6299 }
6300
6301 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6302                                      unsigned int idx)
6303 {
6304         __free_page(spd->pages[idx]);
6305 }
6306
6307 static size_t
6308 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6309 {
6310         size_t count;
6311         int save_len;
6312         int ret;
6313
6314         /* Seq buffer is page-sized, exactly what we need. */
6315         for (;;) {
6316                 save_len = iter->seq.seq.len;
6317                 ret = print_trace_line(iter);
6318
6319                 if (trace_seq_has_overflowed(&iter->seq)) {
6320                         iter->seq.seq.len = save_len;
6321                         break;
6322                 }
6323
6324                 /*
6325                  * This should not be hit, because it should only
6326                  * be set if the iter->seq overflowed. But check it
6327                  * anyway to be safe.
6328                  */
6329                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6330                         iter->seq.seq.len = save_len;
6331                         break;
6332                 }
6333
6334                 count = trace_seq_used(&iter->seq) - save_len;
6335                 if (rem < count) {
6336                         rem = 0;
6337                         iter->seq.seq.len = save_len;
6338                         break;
6339                 }
6340
6341                 if (ret != TRACE_TYPE_NO_CONSUME)
6342                         trace_consume(iter);
6343                 rem -= count;
6344                 if (!trace_find_next_entry_inc(iter))   {
6345                         rem = 0;
6346                         iter->ent = NULL;
6347                         break;
6348                 }
6349         }
6350
6351         return rem;
6352 }
6353
6354 static ssize_t tracing_splice_read_pipe(struct file *filp,
6355                                         loff_t *ppos,
6356                                         struct pipe_inode_info *pipe,
6357                                         size_t len,
6358                                         unsigned int flags)
6359 {
6360         struct page *pages_def[PIPE_DEF_BUFFERS];
6361         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6362         struct trace_iterator *iter = filp->private_data;
6363         struct splice_pipe_desc spd = {
6364                 .pages          = pages_def,
6365                 .partial        = partial_def,
6366                 .nr_pages       = 0, /* This gets updated below. */
6367                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6368                 .ops            = &default_pipe_buf_ops,
6369                 .spd_release    = tracing_spd_release_pipe,
6370         };
6371         ssize_t ret;
6372         size_t rem;
6373         unsigned int i;
6374
6375         if (splice_grow_spd(pipe, &spd))
6376                 return -ENOMEM;
6377
6378         mutex_lock(&iter->mutex);
6379
6380         if (iter->trace->splice_read) {
6381                 ret = iter->trace->splice_read(iter, filp,
6382                                                ppos, pipe, len, flags);
6383                 if (ret)
6384                         goto out_err;
6385         }
6386
6387         ret = tracing_wait_pipe(filp);
6388         if (ret <= 0)
6389                 goto out_err;
6390
6391         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6392                 ret = -EFAULT;
6393                 goto out_err;
6394         }
6395
6396         trace_event_read_lock();
6397         trace_access_lock(iter->cpu_file);
6398
6399         /* Fill as many pages as possible. */
6400         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6401                 spd.pages[i] = alloc_page(GFP_KERNEL);
6402                 if (!spd.pages[i])
6403                         break;
6404
6405                 rem = tracing_fill_pipe_page(rem, iter);
6406
6407                 /* Copy the data into the page, so we can start over. */
6408                 ret = trace_seq_to_buffer(&iter->seq,
6409                                           page_address(spd.pages[i]),
6410                                           trace_seq_used(&iter->seq));
6411                 if (ret < 0) {
6412                         __free_page(spd.pages[i]);
6413                         break;
6414                 }
6415                 spd.partial[i].offset = 0;
6416                 spd.partial[i].len = trace_seq_used(&iter->seq);
6417
6418                 trace_seq_init(&iter->seq);
6419         }
6420
6421         trace_access_unlock(iter->cpu_file);
6422         trace_event_read_unlock();
6423         mutex_unlock(&iter->mutex);
6424
6425         spd.nr_pages = i;
6426
6427         if (i)
6428                 ret = splice_to_pipe(pipe, &spd);
6429         else
6430                 ret = 0;
6431 out:
6432         splice_shrink_spd(&spd);
6433         return ret;
6434
6435 out_err:
6436         mutex_unlock(&iter->mutex);
6437         goto out;
6438 }
6439
6440 static ssize_t
6441 tracing_entries_read(struct file *filp, char __user *ubuf,
6442                      size_t cnt, loff_t *ppos)
6443 {
6444         struct inode *inode = file_inode(filp);
6445         struct trace_array *tr = inode->i_private;
6446         int cpu = tracing_get_cpu(inode);
6447         char buf[64];
6448         int r = 0;
6449         ssize_t ret;
6450
6451         mutex_lock(&trace_types_lock);
6452
6453         if (cpu == RING_BUFFER_ALL_CPUS) {
6454                 int cpu, buf_size_same;
6455                 unsigned long size;
6456
6457                 size = 0;
6458                 buf_size_same = 1;
6459                 /* check if all cpu sizes are same */
6460                 for_each_tracing_cpu(cpu) {
6461                         /* fill in the size from first enabled cpu */
6462                         if (size == 0)
6463                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6464                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6465                                 buf_size_same = 0;
6466                                 break;
6467                         }
6468                 }
6469
6470                 if (buf_size_same) {
6471                         if (!ring_buffer_expanded)
6472                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6473                                             size >> 10,
6474                                             trace_buf_size >> 10);
6475                         else
6476                                 r = sprintf(buf, "%lu\n", size >> 10);
6477                 } else
6478                         r = sprintf(buf, "X\n");
6479         } else
6480                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6481
6482         mutex_unlock(&trace_types_lock);
6483
6484         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6485         return ret;
6486 }
6487
6488 static ssize_t
6489 tracing_entries_write(struct file *filp, const char __user *ubuf,
6490                       size_t cnt, loff_t *ppos)
6491 {
6492         struct inode *inode = file_inode(filp);
6493         struct trace_array *tr = inode->i_private;
6494         unsigned long val;
6495         int ret;
6496
6497         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6498         if (ret)
6499                 return ret;
6500
6501         /* must have at least 1 entry */
6502         if (!val)
6503                 return -EINVAL;
6504
6505         /* value is in KB */
6506         val <<= 10;
6507         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6508         if (ret < 0)
6509                 return ret;
6510
6511         *ppos += cnt;
6512
6513         return cnt;
6514 }
6515
6516 static ssize_t
6517 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6518                                 size_t cnt, loff_t *ppos)
6519 {
6520         struct trace_array *tr = filp->private_data;
6521         char buf[64];
6522         int r, cpu;
6523         unsigned long size = 0, expanded_size = 0;
6524
6525         mutex_lock(&trace_types_lock);
6526         for_each_tracing_cpu(cpu) {
6527                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6528                 if (!ring_buffer_expanded)
6529                         expanded_size += trace_buf_size >> 10;
6530         }
6531         if (ring_buffer_expanded)
6532                 r = sprintf(buf, "%lu\n", size);
6533         else
6534                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6535         mutex_unlock(&trace_types_lock);
6536
6537         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6538 }
6539
6540 static ssize_t
6541 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6542                           size_t cnt, loff_t *ppos)
6543 {
6544         /*
6545          * There is no need to read what the user has written, this function
6546          * is just to make sure that there is no error when "echo" is used
6547          */
6548
6549         *ppos += cnt;
6550
6551         return cnt;
6552 }
6553
6554 static int
6555 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6556 {
6557         struct trace_array *tr = inode->i_private;
6558
6559         /* disable tracing ? */
6560         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6561                 tracer_tracing_off(tr);
6562         /* resize the ring buffer to 0 */
6563         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6564
6565         trace_array_put(tr);
6566
6567         return 0;
6568 }
6569
6570 static ssize_t
6571 tracing_mark_write(struct file *filp, const char __user *ubuf,
6572                                         size_t cnt, loff_t *fpos)
6573 {
6574         struct trace_array *tr = filp->private_data;
6575         struct ring_buffer_event *event;
6576         enum event_trigger_type tt = ETT_NONE;
6577         struct trace_buffer *buffer;
6578         struct print_entry *entry;
6579         unsigned long irq_flags;
6580         ssize_t written;
6581         int size;
6582         int len;
6583
6584 /* Used in tracing_mark_raw_write() as well */
6585 #define FAULTED_STR "<faulted>"
6586 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6587
6588         if (tracing_disabled)
6589                 return -EINVAL;
6590
6591         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6592                 return -EINVAL;
6593
6594         if (cnt > TRACE_BUF_SIZE)
6595                 cnt = TRACE_BUF_SIZE;
6596
6597         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6598
6599         local_save_flags(irq_flags);
6600         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6601
6602         /* If less than "<faulted>", then make sure we can still add that */
6603         if (cnt < FAULTED_SIZE)
6604                 size += FAULTED_SIZE - cnt;
6605
6606         buffer = tr->array_buffer.buffer;
6607         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6608                                             irq_flags, preempt_count());
6609         if (unlikely(!event))
6610                 /* Ring buffer disabled, return as if not open for write */
6611                 return -EBADF;
6612
6613         entry = ring_buffer_event_data(event);
6614         entry->ip = _THIS_IP_;
6615
6616         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6617         if (len) {
6618                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6619                 cnt = FAULTED_SIZE;
6620                 written = -EFAULT;
6621         } else
6622                 written = cnt;
6623         len = cnt;
6624
6625         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6626                 /* do not add \n before testing triggers, but add \0 */
6627                 entry->buf[cnt] = '\0';
6628                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6629         }
6630
6631         if (entry->buf[cnt - 1] != '\n') {
6632                 entry->buf[cnt] = '\n';
6633                 entry->buf[cnt + 1] = '\0';
6634         } else
6635                 entry->buf[cnt] = '\0';
6636
6637         __buffer_unlock_commit(buffer, event);
6638
6639         if (tt)
6640                 event_triggers_post_call(tr->trace_marker_file, tt);
6641
6642         if (written > 0)
6643                 *fpos += written;
6644
6645         return written;
6646 }
6647
6648 /* Limit it for now to 3K (including tag) */
6649 #define RAW_DATA_MAX_SIZE (1024*3)
6650
6651 static ssize_t
6652 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6653                                         size_t cnt, loff_t *fpos)
6654 {
6655         struct trace_array *tr = filp->private_data;
6656         struct ring_buffer_event *event;
6657         struct trace_buffer *buffer;
6658         struct raw_data_entry *entry;
6659         unsigned long irq_flags;
6660         ssize_t written;
6661         int size;
6662         int len;
6663
6664 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6665
6666         if (tracing_disabled)
6667                 return -EINVAL;
6668
6669         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6670                 return -EINVAL;
6671
6672         /* The marker must at least have a tag id */
6673         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6674                 return -EINVAL;
6675
6676         if (cnt > TRACE_BUF_SIZE)
6677                 cnt = TRACE_BUF_SIZE;
6678
6679         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6680
6681         local_save_flags(irq_flags);
6682         size = sizeof(*entry) + cnt;
6683         if (cnt < FAULT_SIZE_ID)
6684                 size += FAULT_SIZE_ID - cnt;
6685
6686         buffer = tr->array_buffer.buffer;
6687         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6688                                             irq_flags, preempt_count());
6689         if (!event)
6690                 /* Ring buffer disabled, return as if not open for write */
6691                 return -EBADF;
6692
6693         entry = ring_buffer_event_data(event);
6694
6695         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6696         if (len) {
6697                 entry->id = -1;
6698                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6699                 written = -EFAULT;
6700         } else
6701                 written = cnt;
6702
6703         __buffer_unlock_commit(buffer, event);
6704
6705         if (written > 0)
6706                 *fpos += written;
6707
6708         return written;
6709 }
6710
6711 static int tracing_clock_show(struct seq_file *m, void *v)
6712 {
6713         struct trace_array *tr = m->private;
6714         int i;
6715
6716         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6717                 seq_printf(m,
6718                         "%s%s%s%s", i ? " " : "",
6719                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6720                         i == tr->clock_id ? "]" : "");
6721         seq_putc(m, '\n');
6722
6723         return 0;
6724 }
6725
6726 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6727 {
6728         int i;
6729
6730         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6731                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6732                         break;
6733         }
6734         if (i == ARRAY_SIZE(trace_clocks))
6735                 return -EINVAL;
6736
6737         mutex_lock(&trace_types_lock);
6738
6739         tr->clock_id = i;
6740
6741         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6742
6743         /*
6744          * New clock may not be consistent with the previous clock.
6745          * Reset the buffer so that it doesn't have incomparable timestamps.
6746          */
6747         tracing_reset_online_cpus(&tr->array_buffer);
6748
6749 #ifdef CONFIG_TRACER_MAX_TRACE
6750         if (tr->max_buffer.buffer)
6751                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6752         tracing_reset_online_cpus(&tr->max_buffer);
6753 #endif
6754
6755         mutex_unlock(&trace_types_lock);
6756
6757         return 0;
6758 }
6759
6760 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6761                                    size_t cnt, loff_t *fpos)
6762 {
6763         struct seq_file *m = filp->private_data;
6764         struct trace_array *tr = m->private;
6765         char buf[64];
6766         const char *clockstr;
6767         int ret;
6768
6769         if (cnt >= sizeof(buf))
6770                 return -EINVAL;
6771
6772         if (copy_from_user(buf, ubuf, cnt))
6773                 return -EFAULT;
6774
6775         buf[cnt] = 0;
6776
6777         clockstr = strstrip(buf);
6778
6779         ret = tracing_set_clock(tr, clockstr);
6780         if (ret)
6781                 return ret;
6782
6783         *fpos += cnt;
6784
6785         return cnt;
6786 }
6787
6788 static int tracing_clock_open(struct inode *inode, struct file *file)
6789 {
6790         struct trace_array *tr = inode->i_private;
6791         int ret;
6792
6793         ret = tracing_check_open_get_tr(tr);
6794         if (ret)
6795                 return ret;
6796
6797         ret = single_open(file, tracing_clock_show, inode->i_private);
6798         if (ret < 0)
6799                 trace_array_put(tr);
6800
6801         return ret;
6802 }
6803
6804 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6805 {
6806         struct trace_array *tr = m->private;
6807
6808         mutex_lock(&trace_types_lock);
6809
6810         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6811                 seq_puts(m, "delta [absolute]\n");
6812         else
6813                 seq_puts(m, "[delta] absolute\n");
6814
6815         mutex_unlock(&trace_types_lock);
6816
6817         return 0;
6818 }
6819
6820 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6821 {
6822         struct trace_array *tr = inode->i_private;
6823         int ret;
6824
6825         ret = tracing_check_open_get_tr(tr);
6826         if (ret)
6827                 return ret;
6828
6829         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6830         if (ret < 0)
6831                 trace_array_put(tr);
6832
6833         return ret;
6834 }
6835
6836 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6837 {
6838         int ret = 0;
6839
6840         mutex_lock(&trace_types_lock);
6841
6842         if (abs && tr->time_stamp_abs_ref++)
6843                 goto out;
6844
6845         if (!abs) {
6846                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6847                         ret = -EINVAL;
6848                         goto out;
6849                 }
6850
6851                 if (--tr->time_stamp_abs_ref)
6852                         goto out;
6853         }
6854
6855         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6856
6857 #ifdef CONFIG_TRACER_MAX_TRACE
6858         if (tr->max_buffer.buffer)
6859                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6860 #endif
6861  out:
6862         mutex_unlock(&trace_types_lock);
6863
6864         return ret;
6865 }
6866
6867 struct ftrace_buffer_info {
6868         struct trace_iterator   iter;
6869         void                    *spare;
6870         unsigned int            spare_cpu;
6871         unsigned int            read;
6872 };
6873
6874 #ifdef CONFIG_TRACER_SNAPSHOT
6875 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6876 {
6877         struct trace_array *tr = inode->i_private;
6878         struct trace_iterator *iter;
6879         struct seq_file *m;
6880         int ret;
6881
6882         ret = tracing_check_open_get_tr(tr);
6883         if (ret)
6884                 return ret;
6885
6886         if (file->f_mode & FMODE_READ) {
6887                 iter = __tracing_open(inode, file, true);
6888                 if (IS_ERR(iter))
6889                         ret = PTR_ERR(iter);
6890         } else {
6891                 /* Writes still need the seq_file to hold the private data */
6892                 ret = -ENOMEM;
6893                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6894                 if (!m)
6895                         goto out;
6896                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6897                 if (!iter) {
6898                         kfree(m);
6899                         goto out;
6900                 }
6901                 ret = 0;
6902
6903                 iter->tr = tr;
6904                 iter->array_buffer = &tr->max_buffer;
6905                 iter->cpu_file = tracing_get_cpu(inode);
6906                 m->private = iter;
6907                 file->private_data = m;
6908         }
6909 out:
6910         if (ret < 0)
6911                 trace_array_put(tr);
6912
6913         return ret;
6914 }
6915
6916 static ssize_t
6917 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6918                        loff_t *ppos)
6919 {
6920         struct seq_file *m = filp->private_data;
6921         struct trace_iterator *iter = m->private;
6922         struct trace_array *tr = iter->tr;
6923         unsigned long val;
6924         int ret;
6925
6926         ret = tracing_update_buffers();
6927         if (ret < 0)
6928                 return ret;
6929
6930         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6931         if (ret)
6932                 return ret;
6933
6934         mutex_lock(&trace_types_lock);
6935
6936         if (tr->current_trace->use_max_tr) {
6937                 ret = -EBUSY;
6938                 goto out;
6939         }
6940
6941         arch_spin_lock(&tr->max_lock);
6942         if (tr->cond_snapshot)
6943                 ret = -EBUSY;
6944         arch_spin_unlock(&tr->max_lock);
6945         if (ret)
6946                 goto out;
6947
6948         switch (val) {
6949         case 0:
6950                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6951                         ret = -EINVAL;
6952                         break;
6953                 }
6954                 if (tr->allocated_snapshot)
6955                         free_snapshot(tr);
6956                 break;
6957         case 1:
6958 /* Only allow per-cpu swap if the ring buffer supports it */
6959 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6960                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6961                         ret = -EINVAL;
6962                         break;
6963                 }
6964 #endif
6965                 if (tr->allocated_snapshot)
6966                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6967                                         &tr->array_buffer, iter->cpu_file);
6968                 else
6969                         ret = tracing_alloc_snapshot_instance(tr);
6970                 if (ret < 0)
6971                         break;
6972                 local_irq_disable();
6973                 /* Now, we're going to swap */
6974                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6975                         update_max_tr(tr, current, smp_processor_id(), NULL);
6976                 else
6977                         update_max_tr_single(tr, current, iter->cpu_file);
6978                 local_irq_enable();
6979                 break;
6980         default:
6981                 if (tr->allocated_snapshot) {
6982                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6983                                 tracing_reset_online_cpus(&tr->max_buffer);
6984                         else
6985                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6986                 }
6987                 break;
6988         }
6989
6990         if (ret >= 0) {
6991                 *ppos += cnt;
6992                 ret = cnt;
6993         }
6994 out:
6995         mutex_unlock(&trace_types_lock);
6996         return ret;
6997 }
6998
6999 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7000 {
7001         struct seq_file *m = file->private_data;
7002         int ret;
7003
7004         ret = tracing_release(inode, file);
7005
7006         if (file->f_mode & FMODE_READ)
7007                 return ret;
7008
7009         /* If write only, the seq_file is just a stub */
7010         if (m)
7011                 kfree(m->private);
7012         kfree(m);
7013
7014         return 0;
7015 }
7016
7017 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7018 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7019                                     size_t count, loff_t *ppos);
7020 static int tracing_buffers_release(struct inode *inode, struct file *file);
7021 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7022                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7023
7024 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7025 {
7026         struct ftrace_buffer_info *info;
7027         int ret;
7028
7029         /* The following checks for tracefs lockdown */
7030         ret = tracing_buffers_open(inode, filp);
7031         if (ret < 0)
7032                 return ret;
7033
7034         info = filp->private_data;
7035
7036         if (info->iter.trace->use_max_tr) {
7037                 tracing_buffers_release(inode, filp);
7038                 return -EBUSY;
7039         }
7040
7041         info->iter.snapshot = true;
7042         info->iter.array_buffer = &info->iter.tr->max_buffer;
7043
7044         return ret;
7045 }
7046
7047 #endif /* CONFIG_TRACER_SNAPSHOT */
7048
7049
7050 static const struct file_operations tracing_thresh_fops = {
7051         .open           = tracing_open_generic,
7052         .read           = tracing_thresh_read,
7053         .write          = tracing_thresh_write,
7054         .llseek         = generic_file_llseek,
7055 };
7056
7057 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7058 static const struct file_operations tracing_max_lat_fops = {
7059         .open           = tracing_open_generic,
7060         .read           = tracing_max_lat_read,
7061         .write          = tracing_max_lat_write,
7062         .llseek         = generic_file_llseek,
7063 };
7064 #endif
7065
7066 static const struct file_operations set_tracer_fops = {
7067         .open           = tracing_open_generic,
7068         .read           = tracing_set_trace_read,
7069         .write          = tracing_set_trace_write,
7070         .llseek         = generic_file_llseek,
7071 };
7072
7073 static const struct file_operations tracing_pipe_fops = {
7074         .open           = tracing_open_pipe,
7075         .poll           = tracing_poll_pipe,
7076         .read           = tracing_read_pipe,
7077         .splice_read    = tracing_splice_read_pipe,
7078         .release        = tracing_release_pipe,
7079         .llseek         = no_llseek,
7080 };
7081
7082 static const struct file_operations tracing_entries_fops = {
7083         .open           = tracing_open_generic_tr,
7084         .read           = tracing_entries_read,
7085         .write          = tracing_entries_write,
7086         .llseek         = generic_file_llseek,
7087         .release        = tracing_release_generic_tr,
7088 };
7089
7090 static const struct file_operations tracing_total_entries_fops = {
7091         .open           = tracing_open_generic_tr,
7092         .read           = tracing_total_entries_read,
7093         .llseek         = generic_file_llseek,
7094         .release        = tracing_release_generic_tr,
7095 };
7096
7097 static const struct file_operations tracing_free_buffer_fops = {
7098         .open           = tracing_open_generic_tr,
7099         .write          = tracing_free_buffer_write,
7100         .release        = tracing_free_buffer_release,
7101 };
7102
7103 static const struct file_operations tracing_mark_fops = {
7104         .open           = tracing_open_generic_tr,
7105         .write          = tracing_mark_write,
7106         .llseek         = generic_file_llseek,
7107         .release        = tracing_release_generic_tr,
7108 };
7109
7110 static const struct file_operations tracing_mark_raw_fops = {
7111         .open           = tracing_open_generic_tr,
7112         .write          = tracing_mark_raw_write,
7113         .llseek         = generic_file_llseek,
7114         .release        = tracing_release_generic_tr,
7115 };
7116
7117 static const struct file_operations trace_clock_fops = {
7118         .open           = tracing_clock_open,
7119         .read           = seq_read,
7120         .llseek         = seq_lseek,
7121         .release        = tracing_single_release_tr,
7122         .write          = tracing_clock_write,
7123 };
7124
7125 static const struct file_operations trace_time_stamp_mode_fops = {
7126         .open           = tracing_time_stamp_mode_open,
7127         .read           = seq_read,
7128         .llseek         = seq_lseek,
7129         .release        = tracing_single_release_tr,
7130 };
7131
7132 #ifdef CONFIG_TRACER_SNAPSHOT
7133 static const struct file_operations snapshot_fops = {
7134         .open           = tracing_snapshot_open,
7135         .read           = seq_read,
7136         .write          = tracing_snapshot_write,
7137         .llseek         = tracing_lseek,
7138         .release        = tracing_snapshot_release,
7139 };
7140
7141 static const struct file_operations snapshot_raw_fops = {
7142         .open           = snapshot_raw_open,
7143         .read           = tracing_buffers_read,
7144         .release        = tracing_buffers_release,
7145         .splice_read    = tracing_buffers_splice_read,
7146         .llseek         = no_llseek,
7147 };
7148
7149 #endif /* CONFIG_TRACER_SNAPSHOT */
7150
7151 #define TRACING_LOG_ERRS_MAX    8
7152 #define TRACING_LOG_LOC_MAX     128
7153
7154 #define CMD_PREFIX "  Command: "
7155
7156 struct err_info {
7157         const char      **errs; /* ptr to loc-specific array of err strings */
7158         u8              type;   /* index into errs -> specific err string */
7159         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7160         u64             ts;
7161 };
7162
7163 struct tracing_log_err {
7164         struct list_head        list;
7165         struct err_info         info;
7166         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7167         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7168 };
7169
7170 static DEFINE_MUTEX(tracing_err_log_lock);
7171
7172 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7173 {
7174         struct tracing_log_err *err;
7175
7176         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7177                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7178                 if (!err)
7179                         err = ERR_PTR(-ENOMEM);
7180                 tr->n_err_log_entries++;
7181
7182                 return err;
7183         }
7184
7185         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7186         list_del(&err->list);
7187
7188         return err;
7189 }
7190
7191 /**
7192  * err_pos - find the position of a string within a command for error careting
7193  * @cmd: The tracing command that caused the error
7194  * @str: The string to position the caret at within @cmd
7195  *
7196  * Finds the position of the first occurence of @str within @cmd.  The
7197  * return value can be passed to tracing_log_err() for caret placement
7198  * within @cmd.
7199  *
7200  * Returns the index within @cmd of the first occurence of @str or 0
7201  * if @str was not found.
7202  */
7203 unsigned int err_pos(char *cmd, const char *str)
7204 {
7205         char *found;
7206
7207         if (WARN_ON(!strlen(cmd)))
7208                 return 0;
7209
7210         found = strstr(cmd, str);
7211         if (found)
7212                 return found - cmd;
7213
7214         return 0;
7215 }
7216
7217 /**
7218  * tracing_log_err - write an error to the tracing error log
7219  * @tr: The associated trace array for the error (NULL for top level array)
7220  * @loc: A string describing where the error occurred
7221  * @cmd: The tracing command that caused the error
7222  * @errs: The array of loc-specific static error strings
7223  * @type: The index into errs[], which produces the specific static err string
7224  * @pos: The position the caret should be placed in the cmd
7225  *
7226  * Writes an error into tracing/error_log of the form:
7227  *
7228  * <loc>: error: <text>
7229  *   Command: <cmd>
7230  *              ^
7231  *
7232  * tracing/error_log is a small log file containing the last
7233  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7234  * unless there has been a tracing error, and the error log can be
7235  * cleared and have its memory freed by writing the empty string in
7236  * truncation mode to it i.e. echo > tracing/error_log.
7237  *
7238  * NOTE: the @errs array along with the @type param are used to
7239  * produce a static error string - this string is not copied and saved
7240  * when the error is logged - only a pointer to it is saved.  See
7241  * existing callers for examples of how static strings are typically
7242  * defined for use with tracing_log_err().
7243  */
7244 void tracing_log_err(struct trace_array *tr,
7245                      const char *loc, const char *cmd,
7246                      const char **errs, u8 type, u8 pos)
7247 {
7248         struct tracing_log_err *err;
7249
7250         if (!tr)
7251                 tr = &global_trace;
7252
7253         mutex_lock(&tracing_err_log_lock);
7254         err = get_tracing_log_err(tr);
7255         if (PTR_ERR(err) == -ENOMEM) {
7256                 mutex_unlock(&tracing_err_log_lock);
7257                 return;
7258         }
7259
7260         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7261         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7262
7263         err->info.errs = errs;
7264         err->info.type = type;
7265         err->info.pos = pos;
7266         err->info.ts = local_clock();
7267
7268         list_add_tail(&err->list, &tr->err_log);
7269         mutex_unlock(&tracing_err_log_lock);
7270 }
7271
7272 static void clear_tracing_err_log(struct trace_array *tr)
7273 {
7274         struct tracing_log_err *err, *next;
7275
7276         mutex_lock(&tracing_err_log_lock);
7277         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7278                 list_del(&err->list);
7279                 kfree(err);
7280         }
7281
7282         tr->n_err_log_entries = 0;
7283         mutex_unlock(&tracing_err_log_lock);
7284 }
7285
7286 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7287 {
7288         struct trace_array *tr = m->private;
7289
7290         mutex_lock(&tracing_err_log_lock);
7291
7292         return seq_list_start(&tr->err_log, *pos);
7293 }
7294
7295 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7296 {
7297         struct trace_array *tr = m->private;
7298
7299         return seq_list_next(v, &tr->err_log, pos);
7300 }
7301
7302 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7303 {
7304         mutex_unlock(&tracing_err_log_lock);
7305 }
7306
7307 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7308 {
7309         u8 i;
7310
7311         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7312                 seq_putc(m, ' ');
7313         for (i = 0; i < pos; i++)
7314                 seq_putc(m, ' ');
7315         seq_puts(m, "^\n");
7316 }
7317
7318 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7319 {
7320         struct tracing_log_err *err = v;
7321
7322         if (err) {
7323                 const char *err_text = err->info.errs[err->info.type];
7324                 u64 sec = err->info.ts;
7325                 u32 nsec;
7326
7327                 nsec = do_div(sec, NSEC_PER_SEC);
7328                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7329                            err->loc, err_text);
7330                 seq_printf(m, "%s", err->cmd);
7331                 tracing_err_log_show_pos(m, err->info.pos);
7332         }
7333
7334         return 0;
7335 }
7336
7337 static const struct seq_operations tracing_err_log_seq_ops = {
7338         .start  = tracing_err_log_seq_start,
7339         .next   = tracing_err_log_seq_next,
7340         .stop   = tracing_err_log_seq_stop,
7341         .show   = tracing_err_log_seq_show
7342 };
7343
7344 static int tracing_err_log_open(struct inode *inode, struct file *file)
7345 {
7346         struct trace_array *tr = inode->i_private;
7347         int ret = 0;
7348
7349         ret = tracing_check_open_get_tr(tr);
7350         if (ret)
7351                 return ret;
7352
7353         /* If this file was opened for write, then erase contents */
7354         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7355                 clear_tracing_err_log(tr);
7356
7357         if (file->f_mode & FMODE_READ) {
7358                 ret = seq_open(file, &tracing_err_log_seq_ops);
7359                 if (!ret) {
7360                         struct seq_file *m = file->private_data;
7361                         m->private = tr;
7362                 } else {
7363                         trace_array_put(tr);
7364                 }
7365         }
7366         return ret;
7367 }
7368
7369 static ssize_t tracing_err_log_write(struct file *file,
7370                                      const char __user *buffer,
7371                                      size_t count, loff_t *ppos)
7372 {
7373         return count;
7374 }
7375
7376 static int tracing_err_log_release(struct inode *inode, struct file *file)
7377 {
7378         struct trace_array *tr = inode->i_private;
7379
7380         trace_array_put(tr);
7381
7382         if (file->f_mode & FMODE_READ)
7383                 seq_release(inode, file);
7384
7385         return 0;
7386 }
7387
7388 static const struct file_operations tracing_err_log_fops = {
7389         .open           = tracing_err_log_open,
7390         .write          = tracing_err_log_write,
7391         .read           = seq_read,
7392         .llseek         = seq_lseek,
7393         .release        = tracing_err_log_release,
7394 };
7395
7396 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7397 {
7398         struct trace_array *tr = inode->i_private;
7399         struct ftrace_buffer_info *info;
7400         int ret;
7401
7402         ret = tracing_check_open_get_tr(tr);
7403         if (ret)
7404                 return ret;
7405
7406         info = kzalloc(sizeof(*info), GFP_KERNEL);
7407         if (!info) {
7408                 trace_array_put(tr);
7409                 return -ENOMEM;
7410         }
7411
7412         mutex_lock(&trace_types_lock);
7413
7414         info->iter.tr           = tr;
7415         info->iter.cpu_file     = tracing_get_cpu(inode);
7416         info->iter.trace        = tr->current_trace;
7417         info->iter.array_buffer = &tr->array_buffer;
7418         info->spare             = NULL;
7419         /* Force reading ring buffer for first read */
7420         info->read              = (unsigned int)-1;
7421
7422         filp->private_data = info;
7423
7424         tr->current_trace->ref++;
7425
7426         mutex_unlock(&trace_types_lock);
7427
7428         ret = nonseekable_open(inode, filp);
7429         if (ret < 0)
7430                 trace_array_put(tr);
7431
7432         return ret;
7433 }
7434
7435 static __poll_t
7436 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7437 {
7438         struct ftrace_buffer_info *info = filp->private_data;
7439         struct trace_iterator *iter = &info->iter;
7440
7441         return trace_poll(iter, filp, poll_table);
7442 }
7443
7444 static ssize_t
7445 tracing_buffers_read(struct file *filp, char __user *ubuf,
7446                      size_t count, loff_t *ppos)
7447 {
7448         struct ftrace_buffer_info *info = filp->private_data;
7449         struct trace_iterator *iter = &info->iter;
7450         ssize_t ret = 0;
7451         ssize_t size;
7452
7453         if (!count)
7454                 return 0;
7455
7456 #ifdef CONFIG_TRACER_MAX_TRACE
7457         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7458                 return -EBUSY;
7459 #endif
7460
7461         if (!info->spare) {
7462                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7463                                                           iter->cpu_file);
7464                 if (IS_ERR(info->spare)) {
7465                         ret = PTR_ERR(info->spare);
7466                         info->spare = NULL;
7467                 } else {
7468                         info->spare_cpu = iter->cpu_file;
7469                 }
7470         }
7471         if (!info->spare)
7472                 return ret;
7473
7474         /* Do we have previous read data to read? */
7475         if (info->read < PAGE_SIZE)
7476                 goto read;
7477
7478  again:
7479         trace_access_lock(iter->cpu_file);
7480         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7481                                     &info->spare,
7482                                     count,
7483                                     iter->cpu_file, 0);
7484         trace_access_unlock(iter->cpu_file);
7485
7486         if (ret < 0) {
7487                 if (trace_empty(iter)) {
7488                         if ((filp->f_flags & O_NONBLOCK))
7489                                 return -EAGAIN;
7490
7491                         ret = wait_on_pipe(iter, 0);
7492                         if (ret)
7493                                 return ret;
7494
7495                         goto again;
7496                 }
7497                 return 0;
7498         }
7499
7500         info->read = 0;
7501  read:
7502         size = PAGE_SIZE - info->read;
7503         if (size > count)
7504                 size = count;
7505
7506         ret = copy_to_user(ubuf, info->spare + info->read, size);
7507         if (ret == size)
7508                 return -EFAULT;
7509
7510         size -= ret;
7511
7512         *ppos += size;
7513         info->read += size;
7514
7515         return size;
7516 }
7517
7518 static int tracing_buffers_release(struct inode *inode, struct file *file)
7519 {
7520         struct ftrace_buffer_info *info = file->private_data;
7521         struct trace_iterator *iter = &info->iter;
7522
7523         mutex_lock(&trace_types_lock);
7524
7525         iter->tr->current_trace->ref--;
7526
7527         __trace_array_put(iter->tr);
7528
7529         if (info->spare)
7530                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7531                                            info->spare_cpu, info->spare);
7532         kfree(info);
7533
7534         mutex_unlock(&trace_types_lock);
7535
7536         return 0;
7537 }
7538
7539 struct buffer_ref {
7540         struct trace_buffer     *buffer;
7541         void                    *page;
7542         int                     cpu;
7543         refcount_t              refcount;
7544 };
7545
7546 static void buffer_ref_release(struct buffer_ref *ref)
7547 {
7548         if (!refcount_dec_and_test(&ref->refcount))
7549                 return;
7550         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7551         kfree(ref);
7552 }
7553
7554 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7555                                     struct pipe_buffer *buf)
7556 {
7557         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7558
7559         buffer_ref_release(ref);
7560         buf->private = 0;
7561 }
7562
7563 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7564                                 struct pipe_buffer *buf)
7565 {
7566         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7567
7568         if (refcount_read(&ref->refcount) > INT_MAX/2)
7569                 return false;
7570
7571         refcount_inc(&ref->refcount);
7572         return true;
7573 }
7574
7575 /* Pipe buffer operations for a buffer. */
7576 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7577         .confirm                = generic_pipe_buf_confirm,
7578         .release                = buffer_pipe_buf_release,
7579         .steal                  = generic_pipe_buf_nosteal,
7580         .get                    = buffer_pipe_buf_get,
7581 };
7582
7583 /*
7584  * Callback from splice_to_pipe(), if we need to release some pages
7585  * at the end of the spd in case we error'ed out in filling the pipe.
7586  */
7587 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7588 {
7589         struct buffer_ref *ref =
7590                 (struct buffer_ref *)spd->partial[i].private;
7591
7592         buffer_ref_release(ref);
7593         spd->partial[i].private = 0;
7594 }
7595
7596 static ssize_t
7597 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7598                             struct pipe_inode_info *pipe, size_t len,
7599                             unsigned int flags)
7600 {
7601         struct ftrace_buffer_info *info = file->private_data;
7602         struct trace_iterator *iter = &info->iter;
7603         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7604         struct page *pages_def[PIPE_DEF_BUFFERS];
7605         struct splice_pipe_desc spd = {
7606                 .pages          = pages_def,
7607                 .partial        = partial_def,
7608                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7609                 .ops            = &buffer_pipe_buf_ops,
7610                 .spd_release    = buffer_spd_release,
7611         };
7612         struct buffer_ref *ref;
7613         int entries, i;
7614         ssize_t ret = 0;
7615
7616 #ifdef CONFIG_TRACER_MAX_TRACE
7617         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7618                 return -EBUSY;
7619 #endif
7620
7621         if (*ppos & (PAGE_SIZE - 1))
7622                 return -EINVAL;
7623
7624         if (len & (PAGE_SIZE - 1)) {
7625                 if (len < PAGE_SIZE)
7626                         return -EINVAL;
7627                 len &= PAGE_MASK;
7628         }
7629
7630         if (splice_grow_spd(pipe, &spd))
7631                 return -ENOMEM;
7632
7633  again:
7634         trace_access_lock(iter->cpu_file);
7635         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7636
7637         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7638                 struct page *page;
7639                 int r;
7640
7641                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7642                 if (!ref) {
7643                         ret = -ENOMEM;
7644                         break;
7645                 }
7646
7647                 refcount_set(&ref->refcount, 1);
7648                 ref->buffer = iter->array_buffer->buffer;
7649                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7650                 if (IS_ERR(ref->page)) {
7651                         ret = PTR_ERR(ref->page);
7652                         ref->page = NULL;
7653                         kfree(ref);
7654                         break;
7655                 }
7656                 ref->cpu = iter->cpu_file;
7657
7658                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7659                                           len, iter->cpu_file, 1);
7660                 if (r < 0) {
7661                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7662                                                    ref->page);
7663                         kfree(ref);
7664                         break;
7665                 }
7666
7667                 page = virt_to_page(ref->page);
7668
7669                 spd.pages[i] = page;
7670                 spd.partial[i].len = PAGE_SIZE;
7671                 spd.partial[i].offset = 0;
7672                 spd.partial[i].private = (unsigned long)ref;
7673                 spd.nr_pages++;
7674                 *ppos += PAGE_SIZE;
7675
7676                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7677         }
7678
7679         trace_access_unlock(iter->cpu_file);
7680         spd.nr_pages = i;
7681
7682         /* did we read anything? */
7683         if (!spd.nr_pages) {
7684                 if (ret)
7685                         goto out;
7686
7687                 ret = -EAGAIN;
7688                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7689                         goto out;
7690
7691                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7692                 if (ret)
7693                         goto out;
7694
7695                 goto again;
7696         }
7697
7698         ret = splice_to_pipe(pipe, &spd);
7699 out:
7700         splice_shrink_spd(&spd);
7701
7702         return ret;
7703 }
7704
7705 static const struct file_operations tracing_buffers_fops = {
7706         .open           = tracing_buffers_open,
7707         .read           = tracing_buffers_read,
7708         .poll           = tracing_buffers_poll,
7709         .release        = tracing_buffers_release,
7710         .splice_read    = tracing_buffers_splice_read,
7711         .llseek         = no_llseek,
7712 };
7713
7714 static ssize_t
7715 tracing_stats_read(struct file *filp, char __user *ubuf,
7716                    size_t count, loff_t *ppos)
7717 {
7718         struct inode *inode = file_inode(filp);
7719         struct trace_array *tr = inode->i_private;
7720         struct array_buffer *trace_buf = &tr->array_buffer;
7721         int cpu = tracing_get_cpu(inode);
7722         struct trace_seq *s;
7723         unsigned long cnt;
7724         unsigned long long t;
7725         unsigned long usec_rem;
7726
7727         s = kmalloc(sizeof(*s), GFP_KERNEL);
7728         if (!s)
7729                 return -ENOMEM;
7730
7731         trace_seq_init(s);
7732
7733         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7734         trace_seq_printf(s, "entries: %ld\n", cnt);
7735
7736         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7737         trace_seq_printf(s, "overrun: %ld\n", cnt);
7738
7739         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7740         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7741
7742         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7743         trace_seq_printf(s, "bytes: %ld\n", cnt);
7744
7745         if (trace_clocks[tr->clock_id].in_ns) {
7746                 /* local or global for trace_clock */
7747                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7748                 usec_rem = do_div(t, USEC_PER_SEC);
7749                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7750                                                                 t, usec_rem);
7751
7752                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7753                 usec_rem = do_div(t, USEC_PER_SEC);
7754                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7755         } else {
7756                 /* counter or tsc mode for trace_clock */
7757                 trace_seq_printf(s, "oldest event ts: %llu\n",
7758                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7759
7760                 trace_seq_printf(s, "now ts: %llu\n",
7761                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7762         }
7763
7764         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7765         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7766
7767         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7768         trace_seq_printf(s, "read events: %ld\n", cnt);
7769
7770         count = simple_read_from_buffer(ubuf, count, ppos,
7771                                         s->buffer, trace_seq_used(s));
7772
7773         kfree(s);
7774
7775         return count;
7776 }
7777
7778 static const struct file_operations tracing_stats_fops = {
7779         .open           = tracing_open_generic_tr,
7780         .read           = tracing_stats_read,
7781         .llseek         = generic_file_llseek,
7782         .release        = tracing_release_generic_tr,
7783 };
7784
7785 #ifdef CONFIG_DYNAMIC_FTRACE
7786
7787 static ssize_t
7788 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7789                   size_t cnt, loff_t *ppos)
7790 {
7791         ssize_t ret;
7792         char *buf;
7793         int r;
7794
7795         /* 256 should be plenty to hold the amount needed */
7796         buf = kmalloc(256, GFP_KERNEL);
7797         if (!buf)
7798                 return -ENOMEM;
7799
7800         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7801                       ftrace_update_tot_cnt,
7802                       ftrace_number_of_pages,
7803                       ftrace_number_of_groups);
7804
7805         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7806         kfree(buf);
7807         return ret;
7808 }
7809
7810 static const struct file_operations tracing_dyn_info_fops = {
7811         .open           = tracing_open_generic,
7812         .read           = tracing_read_dyn_info,
7813         .llseek         = generic_file_llseek,
7814 };
7815 #endif /* CONFIG_DYNAMIC_FTRACE */
7816
7817 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7818 static void
7819 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7820                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7821                 void *data)
7822 {
7823         tracing_snapshot_instance(tr);
7824 }
7825
7826 static void
7827 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7828                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7829                       void *data)
7830 {
7831         struct ftrace_func_mapper *mapper = data;
7832         long *count = NULL;
7833
7834         if (mapper)
7835                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7836
7837         if (count) {
7838
7839                 if (*count <= 0)
7840                         return;
7841
7842                 (*count)--;
7843         }
7844
7845         tracing_snapshot_instance(tr);
7846 }
7847
7848 static int
7849 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7850                       struct ftrace_probe_ops *ops, void *data)
7851 {
7852         struct ftrace_func_mapper *mapper = data;
7853         long *count = NULL;
7854
7855         seq_printf(m, "%ps:", (void *)ip);
7856
7857         seq_puts(m, "snapshot");
7858
7859         if (mapper)
7860                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7861
7862         if (count)
7863                 seq_printf(m, ":count=%ld\n", *count);
7864         else
7865                 seq_puts(m, ":unlimited\n");
7866
7867         return 0;
7868 }
7869
7870 static int
7871 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7872                      unsigned long ip, void *init_data, void **data)
7873 {
7874         struct ftrace_func_mapper *mapper = *data;
7875
7876         if (!mapper) {
7877                 mapper = allocate_ftrace_func_mapper();
7878                 if (!mapper)
7879                         return -ENOMEM;
7880                 *data = mapper;
7881         }
7882
7883         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7884 }
7885
7886 static void
7887 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7888                      unsigned long ip, void *data)
7889 {
7890         struct ftrace_func_mapper *mapper = data;
7891
7892         if (!ip) {
7893                 if (!mapper)
7894                         return;
7895                 free_ftrace_func_mapper(mapper, NULL);
7896                 return;
7897         }
7898
7899         ftrace_func_mapper_remove_ip(mapper, ip);
7900 }
7901
7902 static struct ftrace_probe_ops snapshot_probe_ops = {
7903         .func                   = ftrace_snapshot,
7904         .print                  = ftrace_snapshot_print,
7905 };
7906
7907 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7908         .func                   = ftrace_count_snapshot,
7909         .print                  = ftrace_snapshot_print,
7910         .init                   = ftrace_snapshot_init,
7911         .free                   = ftrace_snapshot_free,
7912 };
7913
7914 static int
7915 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7916                                char *glob, char *cmd, char *param, int enable)
7917 {
7918         struct ftrace_probe_ops *ops;
7919         void *count = (void *)-1;
7920         char *number;
7921         int ret;
7922
7923         if (!tr)
7924                 return -ENODEV;
7925
7926         /* hash funcs only work with set_ftrace_filter */
7927         if (!enable)
7928                 return -EINVAL;
7929
7930         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7931
7932         if (glob[0] == '!')
7933                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7934
7935         if (!param)
7936                 goto out_reg;
7937
7938         number = strsep(&param, ":");
7939
7940         if (!strlen(number))
7941                 goto out_reg;
7942
7943         /*
7944          * We use the callback data field (which is a pointer)
7945          * as our counter.
7946          */
7947         ret = kstrtoul(number, 0, (unsigned long *)&count);
7948         if (ret)
7949                 return ret;
7950
7951  out_reg:
7952         ret = tracing_alloc_snapshot_instance(tr);
7953         if (ret < 0)
7954                 goto out;
7955
7956         ret = register_ftrace_function_probe(glob, tr, ops, count);
7957
7958  out:
7959         return ret < 0 ? ret : 0;
7960 }
7961
7962 static struct ftrace_func_command ftrace_snapshot_cmd = {
7963         .name                   = "snapshot",
7964         .func                   = ftrace_trace_snapshot_callback,
7965 };
7966
7967 static __init int register_snapshot_cmd(void)
7968 {
7969         return register_ftrace_command(&ftrace_snapshot_cmd);
7970 }
7971 #else
7972 static inline __init int register_snapshot_cmd(void) { return 0; }
7973 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7974
7975 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7976 {
7977         if (WARN_ON(!tr->dir))
7978                 return ERR_PTR(-ENODEV);
7979
7980         /* Top directory uses NULL as the parent */
7981         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7982                 return NULL;
7983
7984         /* All sub buffers have a descriptor */
7985         return tr->dir;
7986 }
7987
7988 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7989 {
7990         struct dentry *d_tracer;
7991
7992         if (tr->percpu_dir)
7993                 return tr->percpu_dir;
7994
7995         d_tracer = tracing_get_dentry(tr);
7996         if (IS_ERR(d_tracer))
7997                 return NULL;
7998
7999         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8000
8001         MEM_FAIL(!tr->percpu_dir,
8002                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8003
8004         return tr->percpu_dir;
8005 }
8006
8007 static struct dentry *
8008 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8009                       void *data, long cpu, const struct file_operations *fops)
8010 {
8011         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8012
8013         if (ret) /* See tracing_get_cpu() */
8014                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8015         return ret;
8016 }
8017
8018 static void
8019 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8020 {
8021         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8022         struct dentry *d_cpu;
8023         char cpu_dir[30]; /* 30 characters should be more than enough */
8024
8025         if (!d_percpu)
8026                 return;
8027
8028         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8029         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8030         if (!d_cpu) {
8031                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8032                 return;
8033         }
8034
8035         /* per cpu trace_pipe */
8036         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8037                                 tr, cpu, &tracing_pipe_fops);
8038
8039         /* per cpu trace */
8040         trace_create_cpu_file("trace", 0644, d_cpu,
8041                                 tr, cpu, &tracing_fops);
8042
8043         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8044                                 tr, cpu, &tracing_buffers_fops);
8045
8046         trace_create_cpu_file("stats", 0444, d_cpu,
8047                                 tr, cpu, &tracing_stats_fops);
8048
8049         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8050                                 tr, cpu, &tracing_entries_fops);
8051
8052 #ifdef CONFIG_TRACER_SNAPSHOT
8053         trace_create_cpu_file("snapshot", 0644, d_cpu,
8054                                 tr, cpu, &snapshot_fops);
8055
8056         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8057                                 tr, cpu, &snapshot_raw_fops);
8058 #endif
8059 }
8060
8061 #ifdef CONFIG_FTRACE_SELFTEST
8062 /* Let selftest have access to static functions in this file */
8063 #include "trace_selftest.c"
8064 #endif
8065
8066 static ssize_t
8067 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8068                         loff_t *ppos)
8069 {
8070         struct trace_option_dentry *topt = filp->private_data;
8071         char *buf;
8072
8073         if (topt->flags->val & topt->opt->bit)
8074                 buf = "1\n";
8075         else
8076                 buf = "0\n";
8077
8078         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8079 }
8080
8081 static ssize_t
8082 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8083                          loff_t *ppos)
8084 {
8085         struct trace_option_dentry *topt = filp->private_data;
8086         unsigned long val;
8087         int ret;
8088
8089         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8090         if (ret)
8091                 return ret;
8092
8093         if (val != 0 && val != 1)
8094                 return -EINVAL;
8095
8096         if (!!(topt->flags->val & topt->opt->bit) != val) {
8097                 mutex_lock(&trace_types_lock);
8098                 ret = __set_tracer_option(topt->tr, topt->flags,
8099                                           topt->opt, !val);
8100                 mutex_unlock(&trace_types_lock);
8101                 if (ret)
8102                         return ret;
8103         }
8104
8105         *ppos += cnt;
8106
8107         return cnt;
8108 }
8109
8110
8111 static const struct file_operations trace_options_fops = {
8112         .open = tracing_open_generic,
8113         .read = trace_options_read,
8114         .write = trace_options_write,
8115         .llseek = generic_file_llseek,
8116 };
8117
8118 /*
8119  * In order to pass in both the trace_array descriptor as well as the index
8120  * to the flag that the trace option file represents, the trace_array
8121  * has a character array of trace_flags_index[], which holds the index
8122  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8123  * The address of this character array is passed to the flag option file
8124  * read/write callbacks.
8125  *
8126  * In order to extract both the index and the trace_array descriptor,
8127  * get_tr_index() uses the following algorithm.
8128  *
8129  *   idx = *ptr;
8130  *
8131  * As the pointer itself contains the address of the index (remember
8132  * index[1] == 1).
8133  *
8134  * Then to get the trace_array descriptor, by subtracting that index
8135  * from the ptr, we get to the start of the index itself.
8136  *
8137  *   ptr - idx == &index[0]
8138  *
8139  * Then a simple container_of() from that pointer gets us to the
8140  * trace_array descriptor.
8141  */
8142 static void get_tr_index(void *data, struct trace_array **ptr,
8143                          unsigned int *pindex)
8144 {
8145         *pindex = *(unsigned char *)data;
8146
8147         *ptr = container_of(data - *pindex, struct trace_array,
8148                             trace_flags_index);
8149 }
8150
8151 static ssize_t
8152 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8153                         loff_t *ppos)
8154 {
8155         void *tr_index = filp->private_data;
8156         struct trace_array *tr;
8157         unsigned int index;
8158         char *buf;
8159
8160         get_tr_index(tr_index, &tr, &index);
8161
8162         if (tr->trace_flags & (1 << index))
8163                 buf = "1\n";
8164         else
8165                 buf = "0\n";
8166
8167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8168 }
8169
8170 static ssize_t
8171 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8172                          loff_t *ppos)
8173 {
8174         void *tr_index = filp->private_data;
8175         struct trace_array *tr;
8176         unsigned int index;
8177         unsigned long val;
8178         int ret;
8179
8180         get_tr_index(tr_index, &tr, &index);
8181
8182         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8183         if (ret)
8184                 return ret;
8185
8186         if (val != 0 && val != 1)
8187                 return -EINVAL;
8188
8189         mutex_lock(&event_mutex);
8190         mutex_lock(&trace_types_lock);
8191         ret = set_tracer_flag(tr, 1 << index, val);
8192         mutex_unlock(&trace_types_lock);
8193         mutex_unlock(&event_mutex);
8194
8195         if (ret < 0)
8196                 return ret;
8197
8198         *ppos += cnt;
8199
8200         return cnt;
8201 }
8202
8203 static const struct file_operations trace_options_core_fops = {
8204         .open = tracing_open_generic,
8205         .read = trace_options_core_read,
8206         .write = trace_options_core_write,
8207         .llseek = generic_file_llseek,
8208 };
8209
8210 struct dentry *trace_create_file(const char *name,
8211                                  umode_t mode,
8212                                  struct dentry *parent,
8213                                  void *data,
8214                                  const struct file_operations *fops)
8215 {
8216         struct dentry *ret;
8217
8218         ret = tracefs_create_file(name, mode, parent, data, fops);
8219         if (!ret)
8220                 pr_warn("Could not create tracefs '%s' entry\n", name);
8221
8222         return ret;
8223 }
8224
8225
8226 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8227 {
8228         struct dentry *d_tracer;
8229
8230         if (tr->options)
8231                 return tr->options;
8232
8233         d_tracer = tracing_get_dentry(tr);
8234         if (IS_ERR(d_tracer))
8235                 return NULL;
8236
8237         tr->options = tracefs_create_dir("options", d_tracer);
8238         if (!tr->options) {
8239                 pr_warn("Could not create tracefs directory 'options'\n");
8240                 return NULL;
8241         }
8242
8243         return tr->options;
8244 }
8245
8246 static void
8247 create_trace_option_file(struct trace_array *tr,
8248                          struct trace_option_dentry *topt,
8249                          struct tracer_flags *flags,
8250                          struct tracer_opt *opt)
8251 {
8252         struct dentry *t_options;
8253
8254         t_options = trace_options_init_dentry(tr);
8255         if (!t_options)
8256                 return;
8257
8258         topt->flags = flags;
8259         topt->opt = opt;
8260         topt->tr = tr;
8261
8262         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8263                                     &trace_options_fops);
8264
8265 }
8266
8267 static void
8268 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8269 {
8270         struct trace_option_dentry *topts;
8271         struct trace_options *tr_topts;
8272         struct tracer_flags *flags;
8273         struct tracer_opt *opts;
8274         int cnt;
8275         int i;
8276
8277         if (!tracer)
8278                 return;
8279
8280         flags = tracer->flags;
8281
8282         if (!flags || !flags->opts)
8283                 return;
8284
8285         /*
8286          * If this is an instance, only create flags for tracers
8287          * the instance may have.
8288          */
8289         if (!trace_ok_for_array(tracer, tr))
8290                 return;
8291
8292         for (i = 0; i < tr->nr_topts; i++) {
8293                 /* Make sure there's no duplicate flags. */
8294                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8295                         return;
8296         }
8297
8298         opts = flags->opts;
8299
8300         for (cnt = 0; opts[cnt].name; cnt++)
8301                 ;
8302
8303         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8304         if (!topts)
8305                 return;
8306
8307         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8308                             GFP_KERNEL);
8309         if (!tr_topts) {
8310                 kfree(topts);
8311                 return;
8312         }
8313
8314         tr->topts = tr_topts;
8315         tr->topts[tr->nr_topts].tracer = tracer;
8316         tr->topts[tr->nr_topts].topts = topts;
8317         tr->nr_topts++;
8318
8319         for (cnt = 0; opts[cnt].name; cnt++) {
8320                 create_trace_option_file(tr, &topts[cnt], flags,
8321                                          &opts[cnt]);
8322                 MEM_FAIL(topts[cnt].entry == NULL,
8323                           "Failed to create trace option: %s",
8324                           opts[cnt].name);
8325         }
8326 }
8327
8328 static struct dentry *
8329 create_trace_option_core_file(struct trace_array *tr,
8330                               const char *option, long index)
8331 {
8332         struct dentry *t_options;
8333
8334         t_options = trace_options_init_dentry(tr);
8335         if (!t_options)
8336                 return NULL;
8337
8338         return trace_create_file(option, 0644, t_options,
8339                                  (void *)&tr->trace_flags_index[index],
8340                                  &trace_options_core_fops);
8341 }
8342
8343 static void create_trace_options_dir(struct trace_array *tr)
8344 {
8345         struct dentry *t_options;
8346         bool top_level = tr == &global_trace;
8347         int i;
8348
8349         t_options = trace_options_init_dentry(tr);
8350         if (!t_options)
8351                 return;
8352
8353         for (i = 0; trace_options[i]; i++) {
8354                 if (top_level ||
8355                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8356                         create_trace_option_core_file(tr, trace_options[i], i);
8357         }
8358 }
8359
8360 static ssize_t
8361 rb_simple_read(struct file *filp, char __user *ubuf,
8362                size_t cnt, loff_t *ppos)
8363 {
8364         struct trace_array *tr = filp->private_data;
8365         char buf[64];
8366         int r;
8367
8368         r = tracer_tracing_is_on(tr);
8369         r = sprintf(buf, "%d\n", r);
8370
8371         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8372 }
8373
8374 static ssize_t
8375 rb_simple_write(struct file *filp, const char __user *ubuf,
8376                 size_t cnt, loff_t *ppos)
8377 {
8378         struct trace_array *tr = filp->private_data;
8379         struct trace_buffer *buffer = tr->array_buffer.buffer;
8380         unsigned long val;
8381         int ret;
8382
8383         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8384         if (ret)
8385                 return ret;
8386
8387         if (buffer) {
8388                 mutex_lock(&trace_types_lock);
8389                 if (!!val == tracer_tracing_is_on(tr)) {
8390                         val = 0; /* do nothing */
8391                 } else if (val) {
8392                         tracer_tracing_on(tr);
8393                         if (tr->current_trace->start)
8394                                 tr->current_trace->start(tr);
8395                 } else {
8396                         tracer_tracing_off(tr);
8397                         if (tr->current_trace->stop)
8398                                 tr->current_trace->stop(tr);
8399                 }
8400                 mutex_unlock(&trace_types_lock);
8401         }
8402
8403         (*ppos)++;
8404
8405         return cnt;
8406 }
8407
8408 static const struct file_operations rb_simple_fops = {
8409         .open           = tracing_open_generic_tr,
8410         .read           = rb_simple_read,
8411         .write          = rb_simple_write,
8412         .release        = tracing_release_generic_tr,
8413         .llseek         = default_llseek,
8414 };
8415
8416 static ssize_t
8417 buffer_percent_read(struct file *filp, char __user *ubuf,
8418                     size_t cnt, loff_t *ppos)
8419 {
8420         struct trace_array *tr = filp->private_data;
8421         char buf[64];
8422         int r;
8423
8424         r = tr->buffer_percent;
8425         r = sprintf(buf, "%d\n", r);
8426
8427         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8428 }
8429
8430 static ssize_t
8431 buffer_percent_write(struct file *filp, const char __user *ubuf,
8432                      size_t cnt, loff_t *ppos)
8433 {
8434         struct trace_array *tr = filp->private_data;
8435         unsigned long val;
8436         int ret;
8437
8438         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8439         if (ret)
8440                 return ret;
8441
8442         if (val > 100)
8443                 return -EINVAL;
8444
8445         if (!val)
8446                 val = 1;
8447
8448         tr->buffer_percent = val;
8449
8450         (*ppos)++;
8451
8452         return cnt;
8453 }
8454
8455 static const struct file_operations buffer_percent_fops = {
8456         .open           = tracing_open_generic_tr,
8457         .read           = buffer_percent_read,
8458         .write          = buffer_percent_write,
8459         .release        = tracing_release_generic_tr,
8460         .llseek         = default_llseek,
8461 };
8462
8463 static struct dentry *trace_instance_dir;
8464
8465 static void
8466 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8467
8468 static int
8469 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8470 {
8471         enum ring_buffer_flags rb_flags;
8472
8473         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8474
8475         buf->tr = tr;
8476
8477         buf->buffer = ring_buffer_alloc(size, rb_flags);
8478         if (!buf->buffer)
8479                 return -ENOMEM;
8480
8481         buf->data = alloc_percpu(struct trace_array_cpu);
8482         if (!buf->data) {
8483                 ring_buffer_free(buf->buffer);
8484                 buf->buffer = NULL;
8485                 return -ENOMEM;
8486         }
8487
8488         /* Allocate the first page for all buffers */
8489         set_buffer_entries(&tr->array_buffer,
8490                            ring_buffer_size(tr->array_buffer.buffer, 0));
8491
8492         return 0;
8493 }
8494
8495 static int allocate_trace_buffers(struct trace_array *tr, int size)
8496 {
8497         int ret;
8498
8499         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8500         if (ret)
8501                 return ret;
8502
8503 #ifdef CONFIG_TRACER_MAX_TRACE
8504         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8505                                     allocate_snapshot ? size : 1);
8506         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8507                 ring_buffer_free(tr->array_buffer.buffer);
8508                 tr->array_buffer.buffer = NULL;
8509                 free_percpu(tr->array_buffer.data);
8510                 tr->array_buffer.data = NULL;
8511                 return -ENOMEM;
8512         }
8513         tr->allocated_snapshot = allocate_snapshot;
8514
8515         /*
8516          * Only the top level trace array gets its snapshot allocated
8517          * from the kernel command line.
8518          */
8519         allocate_snapshot = false;
8520 #endif
8521         return 0;
8522 }
8523
8524 static void free_trace_buffer(struct array_buffer *buf)
8525 {
8526         if (buf->buffer) {
8527                 ring_buffer_free(buf->buffer);
8528                 buf->buffer = NULL;
8529                 free_percpu(buf->data);
8530                 buf->data = NULL;
8531         }
8532 }
8533
8534 static void free_trace_buffers(struct trace_array *tr)
8535 {
8536         if (!tr)
8537                 return;
8538
8539         free_trace_buffer(&tr->array_buffer);
8540
8541 #ifdef CONFIG_TRACER_MAX_TRACE
8542         free_trace_buffer(&tr->max_buffer);
8543 #endif
8544 }
8545
8546 static void init_trace_flags_index(struct trace_array *tr)
8547 {
8548         int i;
8549
8550         /* Used by the trace options files */
8551         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8552                 tr->trace_flags_index[i] = i;
8553 }
8554
8555 static void __update_tracer_options(struct trace_array *tr)
8556 {
8557         struct tracer *t;
8558
8559         for (t = trace_types; t; t = t->next)
8560                 add_tracer_options(tr, t);
8561 }
8562
8563 static void update_tracer_options(struct trace_array *tr)
8564 {
8565         mutex_lock(&trace_types_lock);
8566         __update_tracer_options(tr);
8567         mutex_unlock(&trace_types_lock);
8568 }
8569
8570 /* Must have trace_types_lock held */
8571 struct trace_array *trace_array_find(const char *instance)
8572 {
8573         struct trace_array *tr, *found = NULL;
8574
8575         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8576                 if (tr->name && strcmp(tr->name, instance) == 0) {
8577                         found = tr;
8578                         break;
8579                 }
8580         }
8581
8582         return found;
8583 }
8584
8585 struct trace_array *trace_array_find_get(const char *instance)
8586 {
8587         struct trace_array *tr;
8588
8589         mutex_lock(&trace_types_lock);
8590         tr = trace_array_find(instance);
8591         if (tr)
8592                 tr->ref++;
8593         mutex_unlock(&trace_types_lock);
8594
8595         return tr;
8596 }
8597
8598 static struct trace_array *trace_array_create(const char *name)
8599 {
8600         struct trace_array *tr;
8601         int ret;
8602
8603         ret = -ENOMEM;
8604         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8605         if (!tr)
8606                 return ERR_PTR(ret);
8607
8608         tr->name = kstrdup(name, GFP_KERNEL);
8609         if (!tr->name)
8610                 goto out_free_tr;
8611
8612         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8613                 goto out_free_tr;
8614
8615         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8616
8617         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8618
8619         raw_spin_lock_init(&tr->start_lock);
8620
8621         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8622
8623         tr->current_trace = &nop_trace;
8624
8625         INIT_LIST_HEAD(&tr->systems);
8626         INIT_LIST_HEAD(&tr->events);
8627         INIT_LIST_HEAD(&tr->hist_vars);
8628         INIT_LIST_HEAD(&tr->err_log);
8629
8630         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8631                 goto out_free_tr;
8632
8633         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8634         if (!tr->dir)
8635                 goto out_free_tr;
8636
8637         ret = event_trace_add_tracer(tr->dir, tr);
8638         if (ret) {
8639                 tracefs_remove(tr->dir);
8640                 goto out_free_tr;
8641         }
8642
8643         ftrace_init_trace_array(tr);
8644
8645         init_tracer_tracefs(tr, tr->dir);
8646         init_trace_flags_index(tr);
8647         __update_tracer_options(tr);
8648
8649         list_add(&tr->list, &ftrace_trace_arrays);
8650
8651         tr->ref++;
8652
8653
8654         return tr;
8655
8656  out_free_tr:
8657         free_trace_buffers(tr);
8658         free_cpumask_var(tr->tracing_cpumask);
8659         kfree(tr->name);
8660         kfree(tr);
8661
8662         return ERR_PTR(ret);
8663 }
8664
8665 static int instance_mkdir(const char *name)
8666 {
8667         struct trace_array *tr;
8668         int ret;
8669
8670         mutex_lock(&event_mutex);
8671         mutex_lock(&trace_types_lock);
8672
8673         ret = -EEXIST;
8674         if (trace_array_find(name))
8675                 goto out_unlock;
8676
8677         tr = trace_array_create(name);
8678
8679         ret = PTR_ERR_OR_ZERO(tr);
8680
8681 out_unlock:
8682         mutex_unlock(&trace_types_lock);
8683         mutex_unlock(&event_mutex);
8684         return ret;
8685 }
8686
8687 /**
8688  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8689  * @name: The name of the trace array to be looked up/created.
8690  *
8691  * Returns pointer to trace array with given name.
8692  * NULL, if it cannot be created.
8693  *
8694  * NOTE: This function increments the reference counter associated with the
8695  * trace array returned. This makes sure it cannot be freed while in use.
8696  * Use trace_array_put() once the trace array is no longer needed.
8697  * If the trace_array is to be freed, trace_array_destroy() needs to
8698  * be called after the trace_array_put(), or simply let user space delete
8699  * it from the tracefs instances directory. But until the
8700  * trace_array_put() is called, user space can not delete it.
8701  *
8702  */
8703 struct trace_array *trace_array_get_by_name(const char *name)
8704 {
8705         struct trace_array *tr;
8706
8707         mutex_lock(&event_mutex);
8708         mutex_lock(&trace_types_lock);
8709
8710         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8711                 if (tr->name && strcmp(tr->name, name) == 0)
8712                         goto out_unlock;
8713         }
8714
8715         tr = trace_array_create(name);
8716
8717         if (IS_ERR(tr))
8718                 tr = NULL;
8719 out_unlock:
8720         if (tr)
8721                 tr->ref++;
8722
8723         mutex_unlock(&trace_types_lock);
8724         mutex_unlock(&event_mutex);
8725         return tr;
8726 }
8727 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8728
8729 static int __remove_instance(struct trace_array *tr)
8730 {
8731         int i;
8732
8733         /* Reference counter for a newly created trace array = 1. */
8734         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8735                 return -EBUSY;
8736
8737         list_del(&tr->list);
8738
8739         /* Disable all the flags that were enabled coming in */
8740         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8741                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8742                         set_tracer_flag(tr, 1 << i, 0);
8743         }
8744
8745         tracing_set_nop(tr);
8746         clear_ftrace_function_probes(tr);
8747         event_trace_del_tracer(tr);
8748         ftrace_clear_pids(tr);
8749         ftrace_destroy_function_files(tr);
8750         tracefs_remove(tr->dir);
8751         free_trace_buffers(tr);
8752
8753         for (i = 0; i < tr->nr_topts; i++) {
8754                 kfree(tr->topts[i].topts);
8755         }
8756         kfree(tr->topts);
8757
8758         free_cpumask_var(tr->tracing_cpumask);
8759         kfree(tr->name);
8760         kfree(tr);
8761         tr = NULL;
8762
8763         return 0;
8764 }
8765
8766 int trace_array_destroy(struct trace_array *this_tr)
8767 {
8768         struct trace_array *tr;
8769         int ret;
8770
8771         if (!this_tr)
8772                 return -EINVAL;
8773
8774         mutex_lock(&event_mutex);
8775         mutex_lock(&trace_types_lock);
8776
8777         ret = -ENODEV;
8778
8779         /* Making sure trace array exists before destroying it. */
8780         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8781                 if (tr == this_tr) {
8782                         ret = __remove_instance(tr);
8783                         break;
8784                 }
8785         }
8786
8787         mutex_unlock(&trace_types_lock);
8788         mutex_unlock(&event_mutex);
8789
8790         return ret;
8791 }
8792 EXPORT_SYMBOL_GPL(trace_array_destroy);
8793
8794 static int instance_rmdir(const char *name)
8795 {
8796         struct trace_array *tr;
8797         int ret;
8798
8799         mutex_lock(&event_mutex);
8800         mutex_lock(&trace_types_lock);
8801
8802         ret = -ENODEV;
8803         tr = trace_array_find(name);
8804         if (tr)
8805                 ret = __remove_instance(tr);
8806
8807         mutex_unlock(&trace_types_lock);
8808         mutex_unlock(&event_mutex);
8809
8810         return ret;
8811 }
8812
8813 static __init void create_trace_instances(struct dentry *d_tracer)
8814 {
8815         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8816                                                          instance_mkdir,
8817                                                          instance_rmdir);
8818         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8819                 return;
8820 }
8821
8822 static void
8823 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8824 {
8825         struct trace_event_file *file;
8826         int cpu;
8827
8828         trace_create_file("available_tracers", 0444, d_tracer,
8829                         tr, &show_traces_fops);
8830
8831         trace_create_file("current_tracer", 0644, d_tracer,
8832                         tr, &set_tracer_fops);
8833
8834         trace_create_file("tracing_cpumask", 0644, d_tracer,
8835                           tr, &tracing_cpumask_fops);
8836
8837         trace_create_file("trace_options", 0644, d_tracer,
8838                           tr, &tracing_iter_fops);
8839
8840         trace_create_file("trace", 0644, d_tracer,
8841                           tr, &tracing_fops);
8842
8843         trace_create_file("trace_pipe", 0444, d_tracer,
8844                           tr, &tracing_pipe_fops);
8845
8846         trace_create_file("buffer_size_kb", 0644, d_tracer,
8847                           tr, &tracing_entries_fops);
8848
8849         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8850                           tr, &tracing_total_entries_fops);
8851
8852         trace_create_file("free_buffer", 0200, d_tracer,
8853                           tr, &tracing_free_buffer_fops);
8854
8855         trace_create_file("trace_marker", 0220, d_tracer,
8856                           tr, &tracing_mark_fops);
8857
8858         file = __find_event_file(tr, "ftrace", "print");
8859         if (file && file->dir)
8860                 trace_create_file("trigger", 0644, file->dir, file,
8861                                   &event_trigger_fops);
8862         tr->trace_marker_file = file;
8863
8864         trace_create_file("trace_marker_raw", 0220, d_tracer,
8865                           tr, &tracing_mark_raw_fops);
8866
8867         trace_create_file("trace_clock", 0644, d_tracer, tr,
8868                           &trace_clock_fops);
8869
8870         trace_create_file("tracing_on", 0644, d_tracer,
8871                           tr, &rb_simple_fops);
8872
8873         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8874                           &trace_time_stamp_mode_fops);
8875
8876         tr->buffer_percent = 50;
8877
8878         trace_create_file("buffer_percent", 0444, d_tracer,
8879                         tr, &buffer_percent_fops);
8880
8881         create_trace_options_dir(tr);
8882
8883 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8884         trace_create_maxlat_file(tr, d_tracer);
8885 #endif
8886
8887         if (ftrace_create_function_files(tr, d_tracer))
8888                 MEM_FAIL(1, "Could not allocate function filter files");
8889
8890 #ifdef CONFIG_TRACER_SNAPSHOT
8891         trace_create_file("snapshot", 0644, d_tracer,
8892                           tr, &snapshot_fops);
8893 #endif
8894
8895         trace_create_file("error_log", 0644, d_tracer,
8896                           tr, &tracing_err_log_fops);
8897
8898         for_each_tracing_cpu(cpu)
8899                 tracing_init_tracefs_percpu(tr, cpu);
8900
8901         ftrace_init_tracefs(tr, d_tracer);
8902 }
8903
8904 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8905 {
8906         struct vfsmount *mnt;
8907         struct file_system_type *type;
8908
8909         /*
8910          * To maintain backward compatibility for tools that mount
8911          * debugfs to get to the tracing facility, tracefs is automatically
8912          * mounted to the debugfs/tracing directory.
8913          */
8914         type = get_fs_type("tracefs");
8915         if (!type)
8916                 return NULL;
8917         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8918         put_filesystem(type);
8919         if (IS_ERR(mnt))
8920                 return NULL;
8921         mntget(mnt);
8922
8923         return mnt;
8924 }
8925
8926 /**
8927  * tracing_init_dentry - initialize top level trace array
8928  *
8929  * This is called when creating files or directories in the tracing
8930  * directory. It is called via fs_initcall() by any of the boot up code
8931  * and expects to return the dentry of the top level tracing directory.
8932  */
8933 struct dentry *tracing_init_dentry(void)
8934 {
8935         struct trace_array *tr = &global_trace;
8936
8937         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8938                 pr_warn("Tracing disabled due to lockdown\n");
8939                 return ERR_PTR(-EPERM);
8940         }
8941
8942         /* The top level trace array uses  NULL as parent */
8943         if (tr->dir)
8944                 return NULL;
8945
8946         if (WARN_ON(!tracefs_initialized()) ||
8947                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8948                  WARN_ON(!debugfs_initialized())))
8949                 return ERR_PTR(-ENODEV);
8950
8951         /*
8952          * As there may still be users that expect the tracing
8953          * files to exist in debugfs/tracing, we must automount
8954          * the tracefs file system there, so older tools still
8955          * work with the newer kerenl.
8956          */
8957         tr->dir = debugfs_create_automount("tracing", NULL,
8958                                            trace_automount, NULL);
8959
8960         return NULL;
8961 }
8962
8963 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8964 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8965
8966 static void __init trace_eval_init(void)
8967 {
8968         int len;
8969
8970         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8971         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8972 }
8973
8974 #ifdef CONFIG_MODULES
8975 static void trace_module_add_evals(struct module *mod)
8976 {
8977         if (!mod->num_trace_evals)
8978                 return;
8979
8980         /*
8981          * Modules with bad taint do not have events created, do
8982          * not bother with enums either.
8983          */
8984         if (trace_module_has_bad_taint(mod))
8985                 return;
8986
8987         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8988 }
8989
8990 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8991 static void trace_module_remove_evals(struct module *mod)
8992 {
8993         union trace_eval_map_item *map;
8994         union trace_eval_map_item **last = &trace_eval_maps;
8995
8996         if (!mod->num_trace_evals)
8997                 return;
8998
8999         mutex_lock(&trace_eval_mutex);
9000
9001         map = trace_eval_maps;
9002
9003         while (map) {
9004                 if (map->head.mod == mod)
9005                         break;
9006                 map = trace_eval_jmp_to_tail(map);
9007                 last = &map->tail.next;
9008                 map = map->tail.next;
9009         }
9010         if (!map)
9011                 goto out;
9012
9013         *last = trace_eval_jmp_to_tail(map)->tail.next;
9014         kfree(map);
9015  out:
9016         mutex_unlock(&trace_eval_mutex);
9017 }
9018 #else
9019 static inline void trace_module_remove_evals(struct module *mod) { }
9020 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9021
9022 static int trace_module_notify(struct notifier_block *self,
9023                                unsigned long val, void *data)
9024 {
9025         struct module *mod = data;
9026
9027         switch (val) {
9028         case MODULE_STATE_COMING:
9029                 trace_module_add_evals(mod);
9030                 break;
9031         case MODULE_STATE_GOING:
9032                 trace_module_remove_evals(mod);
9033                 break;
9034         }
9035
9036         return 0;
9037 }
9038
9039 static struct notifier_block trace_module_nb = {
9040         .notifier_call = trace_module_notify,
9041         .priority = 0,
9042 };
9043 #endif /* CONFIG_MODULES */
9044
9045 static __init int tracer_init_tracefs(void)
9046 {
9047         struct dentry *d_tracer;
9048
9049         trace_access_lock_init();
9050
9051         d_tracer = tracing_init_dentry();
9052         if (IS_ERR(d_tracer))
9053                 return 0;
9054
9055         event_trace_init();
9056
9057         init_tracer_tracefs(&global_trace, d_tracer);
9058         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9059
9060         trace_create_file("tracing_thresh", 0644, d_tracer,
9061                         &global_trace, &tracing_thresh_fops);
9062
9063         trace_create_file("README", 0444, d_tracer,
9064                         NULL, &tracing_readme_fops);
9065
9066         trace_create_file("saved_cmdlines", 0444, d_tracer,
9067                         NULL, &tracing_saved_cmdlines_fops);
9068
9069         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9070                           NULL, &tracing_saved_cmdlines_size_fops);
9071
9072         trace_create_file("saved_tgids", 0444, d_tracer,
9073                         NULL, &tracing_saved_tgids_fops);
9074
9075         trace_eval_init();
9076
9077         trace_create_eval_file(d_tracer);
9078
9079 #ifdef CONFIG_MODULES
9080         register_module_notifier(&trace_module_nb);
9081 #endif
9082
9083 #ifdef CONFIG_DYNAMIC_FTRACE
9084         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9085                         NULL, &tracing_dyn_info_fops);
9086 #endif
9087
9088         create_trace_instances(d_tracer);
9089
9090         update_tracer_options(&global_trace);
9091
9092         return 0;
9093 }
9094
9095 static int trace_panic_handler(struct notifier_block *this,
9096                                unsigned long event, void *unused)
9097 {
9098         if (ftrace_dump_on_oops)
9099                 ftrace_dump(ftrace_dump_on_oops);
9100         return NOTIFY_OK;
9101 }
9102
9103 static struct notifier_block trace_panic_notifier = {
9104         .notifier_call  = trace_panic_handler,
9105         .next           = NULL,
9106         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9107 };
9108
9109 static int trace_die_handler(struct notifier_block *self,
9110                              unsigned long val,
9111                              void *data)
9112 {
9113         switch (val) {
9114         case DIE_OOPS:
9115                 if (ftrace_dump_on_oops)
9116                         ftrace_dump(ftrace_dump_on_oops);
9117                 break;
9118         default:
9119                 break;
9120         }
9121         return NOTIFY_OK;
9122 }
9123
9124 static struct notifier_block trace_die_notifier = {
9125         .notifier_call = trace_die_handler,
9126         .priority = 200
9127 };
9128
9129 /*
9130  * printk is set to max of 1024, we really don't need it that big.
9131  * Nothing should be printing 1000 characters anyway.
9132  */
9133 #define TRACE_MAX_PRINT         1000
9134
9135 /*
9136  * Define here KERN_TRACE so that we have one place to modify
9137  * it if we decide to change what log level the ftrace dump
9138  * should be at.
9139  */
9140 #define KERN_TRACE              KERN_EMERG
9141
9142 void
9143 trace_printk_seq(struct trace_seq *s)
9144 {
9145         /* Probably should print a warning here. */
9146         if (s->seq.len >= TRACE_MAX_PRINT)
9147                 s->seq.len = TRACE_MAX_PRINT;
9148
9149         /*
9150          * More paranoid code. Although the buffer size is set to
9151          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9152          * an extra layer of protection.
9153          */
9154         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9155                 s->seq.len = s->seq.size - 1;
9156
9157         /* should be zero ended, but we are paranoid. */
9158         s->buffer[s->seq.len] = 0;
9159
9160         printk(KERN_TRACE "%s", s->buffer);
9161
9162         trace_seq_init(s);
9163 }
9164
9165 void trace_init_global_iter(struct trace_iterator *iter)
9166 {
9167         iter->tr = &global_trace;
9168         iter->trace = iter->tr->current_trace;
9169         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9170         iter->array_buffer = &global_trace.array_buffer;
9171
9172         if (iter->trace && iter->trace->open)
9173                 iter->trace->open(iter);
9174
9175         /* Annotate start of buffers if we had overruns */
9176         if (ring_buffer_overruns(iter->array_buffer->buffer))
9177                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9178
9179         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9180         if (trace_clocks[iter->tr->clock_id].in_ns)
9181                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9182 }
9183
9184 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9185 {
9186         /* use static because iter can be a bit big for the stack */
9187         static struct trace_iterator iter;
9188         static atomic_t dump_running;
9189         struct trace_array *tr = &global_trace;
9190         unsigned int old_userobj;
9191         unsigned long flags;
9192         int cnt = 0, cpu;
9193
9194         /* Only allow one dump user at a time. */
9195         if (atomic_inc_return(&dump_running) != 1) {
9196                 atomic_dec(&dump_running);
9197                 return;
9198         }
9199
9200         /*
9201          * Always turn off tracing when we dump.
9202          * We don't need to show trace output of what happens
9203          * between multiple crashes.
9204          *
9205          * If the user does a sysrq-z, then they can re-enable
9206          * tracing with echo 1 > tracing_on.
9207          */
9208         tracing_off();
9209
9210         local_irq_save(flags);
9211         printk_nmi_direct_enter();
9212
9213         /* Simulate the iterator */
9214         trace_init_global_iter(&iter);
9215         /* Can not use kmalloc for iter.temp */
9216         iter.temp = static_temp_buf;
9217         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9218
9219         for_each_tracing_cpu(cpu) {
9220                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9221         }
9222
9223         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9224
9225         /* don't look at user memory in panic mode */
9226         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9227
9228         switch (oops_dump_mode) {
9229         case DUMP_ALL:
9230                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9231                 break;
9232         case DUMP_ORIG:
9233                 iter.cpu_file = raw_smp_processor_id();
9234                 break;
9235         case DUMP_NONE:
9236                 goto out_enable;
9237         default:
9238                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9239                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9240         }
9241
9242         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9243
9244         /* Did function tracer already get disabled? */
9245         if (ftrace_is_dead()) {
9246                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9247                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9248         }
9249
9250         /*
9251          * We need to stop all tracing on all CPUS to read the
9252          * the next buffer. This is a bit expensive, but is
9253          * not done often. We fill all what we can read,
9254          * and then release the locks again.
9255          */
9256
9257         while (!trace_empty(&iter)) {
9258
9259                 if (!cnt)
9260                         printk(KERN_TRACE "---------------------------------\n");
9261
9262                 cnt++;
9263
9264                 trace_iterator_reset(&iter);
9265                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9266
9267                 if (trace_find_next_entry_inc(&iter) != NULL) {
9268                         int ret;
9269
9270                         ret = print_trace_line(&iter);
9271                         if (ret != TRACE_TYPE_NO_CONSUME)
9272                                 trace_consume(&iter);
9273                 }
9274                 touch_nmi_watchdog();
9275
9276                 trace_printk_seq(&iter.seq);
9277         }
9278
9279         if (!cnt)
9280                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9281         else
9282                 printk(KERN_TRACE "---------------------------------\n");
9283
9284  out_enable:
9285         tr->trace_flags |= old_userobj;
9286
9287         for_each_tracing_cpu(cpu) {
9288                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9289         }
9290         atomic_dec(&dump_running);
9291         printk_nmi_direct_exit();
9292         local_irq_restore(flags);
9293 }
9294 EXPORT_SYMBOL_GPL(ftrace_dump);
9295
9296 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9297 {
9298         char **argv;
9299         int argc, ret;
9300
9301         argc = 0;
9302         ret = 0;
9303         argv = argv_split(GFP_KERNEL, buf, &argc);
9304         if (!argv)
9305                 return -ENOMEM;
9306
9307         if (argc)
9308                 ret = createfn(argc, argv);
9309
9310         argv_free(argv);
9311
9312         return ret;
9313 }
9314
9315 #define WRITE_BUFSIZE  4096
9316
9317 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9318                                 size_t count, loff_t *ppos,
9319                                 int (*createfn)(int, char **))
9320 {
9321         char *kbuf, *buf, *tmp;
9322         int ret = 0;
9323         size_t done = 0;
9324         size_t size;
9325
9326         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9327         if (!kbuf)
9328                 return -ENOMEM;
9329
9330         while (done < count) {
9331                 size = count - done;
9332
9333                 if (size >= WRITE_BUFSIZE)
9334                         size = WRITE_BUFSIZE - 1;
9335
9336                 if (copy_from_user(kbuf, buffer + done, size)) {
9337                         ret = -EFAULT;
9338                         goto out;
9339                 }
9340                 kbuf[size] = '\0';
9341                 buf = kbuf;
9342                 do {
9343                         tmp = strchr(buf, '\n');
9344                         if (tmp) {
9345                                 *tmp = '\0';
9346                                 size = tmp - buf + 1;
9347                         } else {
9348                                 size = strlen(buf);
9349                                 if (done + size < count) {
9350                                         if (buf != kbuf)
9351                                                 break;
9352                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9353                                         pr_warn("Line length is too long: Should be less than %d\n",
9354                                                 WRITE_BUFSIZE - 2);
9355                                         ret = -EINVAL;
9356                                         goto out;
9357                                 }
9358                         }
9359                         done += size;
9360
9361                         /* Remove comments */
9362                         tmp = strchr(buf, '#');
9363
9364                         if (tmp)
9365                                 *tmp = '\0';
9366
9367                         ret = trace_run_command(buf, createfn);
9368                         if (ret)
9369                                 goto out;
9370                         buf += size;
9371
9372                 } while (done < count);
9373         }
9374         ret = done;
9375
9376 out:
9377         kfree(kbuf);
9378
9379         return ret;
9380 }
9381
9382 __init static int tracer_alloc_buffers(void)
9383 {
9384         int ring_buf_size;
9385         int ret = -ENOMEM;
9386
9387
9388         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9389                 pr_warn("Tracing disabled due to lockdown\n");
9390                 return -EPERM;
9391         }
9392
9393         /*
9394          * Make sure we don't accidently add more trace options
9395          * than we have bits for.
9396          */
9397         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9398
9399         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9400                 goto out;
9401
9402         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9403                 goto out_free_buffer_mask;
9404
9405         /* Only allocate trace_printk buffers if a trace_printk exists */
9406         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9407                 /* Must be called before global_trace.buffer is allocated */
9408                 trace_printk_init_buffers();
9409
9410         /* To save memory, keep the ring buffer size to its minimum */
9411         if (ring_buffer_expanded)
9412                 ring_buf_size = trace_buf_size;
9413         else
9414                 ring_buf_size = 1;
9415
9416         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9417         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9418
9419         raw_spin_lock_init(&global_trace.start_lock);
9420
9421         /*
9422          * The prepare callbacks allocates some memory for the ring buffer. We
9423          * don't free the buffer if the if the CPU goes down. If we were to free
9424          * the buffer, then the user would lose any trace that was in the
9425          * buffer. The memory will be removed once the "instance" is removed.
9426          */
9427         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9428                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9429                                       NULL);
9430         if (ret < 0)
9431                 goto out_free_cpumask;
9432         /* Used for event triggers */
9433         ret = -ENOMEM;
9434         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9435         if (!temp_buffer)
9436                 goto out_rm_hp_state;
9437
9438         if (trace_create_savedcmd() < 0)
9439                 goto out_free_temp_buffer;
9440
9441         /* TODO: make the number of buffers hot pluggable with CPUS */
9442         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9443                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9444                 goto out_free_savedcmd;
9445         }
9446
9447         if (global_trace.buffer_disabled)
9448                 tracing_off();
9449
9450         if (trace_boot_clock) {
9451                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9452                 if (ret < 0)
9453                         pr_warn("Trace clock %s not defined, going back to default\n",
9454                                 trace_boot_clock);
9455         }
9456
9457         /*
9458          * register_tracer() might reference current_trace, so it
9459          * needs to be set before we register anything. This is
9460          * just a bootstrap of current_trace anyway.
9461          */
9462         global_trace.current_trace = &nop_trace;
9463
9464         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9465
9466         ftrace_init_global_array_ops(&global_trace);
9467
9468         init_trace_flags_index(&global_trace);
9469
9470         register_tracer(&nop_trace);
9471
9472         /* Function tracing may start here (via kernel command line) */
9473         init_function_trace();
9474
9475         /* All seems OK, enable tracing */
9476         tracing_disabled = 0;
9477
9478         atomic_notifier_chain_register(&panic_notifier_list,
9479                                        &trace_panic_notifier);
9480
9481         register_die_notifier(&trace_die_notifier);
9482
9483         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9484
9485         INIT_LIST_HEAD(&global_trace.systems);
9486         INIT_LIST_HEAD(&global_trace.events);
9487         INIT_LIST_HEAD(&global_trace.hist_vars);
9488         INIT_LIST_HEAD(&global_trace.err_log);
9489         list_add(&global_trace.list, &ftrace_trace_arrays);
9490
9491         apply_trace_boot_options();
9492
9493         register_snapshot_cmd();
9494
9495         return 0;
9496
9497 out_free_savedcmd:
9498         free_saved_cmdlines_buffer(savedcmd);
9499 out_free_temp_buffer:
9500         ring_buffer_free(temp_buffer);
9501 out_rm_hp_state:
9502         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9503 out_free_cpumask:
9504         free_cpumask_var(global_trace.tracing_cpumask);
9505 out_free_buffer_mask:
9506         free_cpumask_var(tracing_buffer_mask);
9507 out:
9508         return ret;
9509 }
9510
9511 void __init early_trace_init(void)
9512 {
9513         if (tracepoint_printk) {
9514                 tracepoint_print_iter =
9515                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9516                 if (MEM_FAIL(!tracepoint_print_iter,
9517                              "Failed to allocate trace iterator\n"))
9518                         tracepoint_printk = 0;
9519                 else
9520                         static_key_enable(&tracepoint_printk_key.key);
9521         }
9522         tracer_alloc_buffers();
9523 }
9524
9525 void __init trace_init(void)
9526 {
9527         trace_event_init();
9528 }
9529
9530 __init static int clear_boot_tracer(void)
9531 {
9532         /*
9533          * The default tracer at boot buffer is an init section.
9534          * This function is called in lateinit. If we did not
9535          * find the boot tracer, then clear it out, to prevent
9536          * later registration from accessing the buffer that is
9537          * about to be freed.
9538          */
9539         if (!default_bootup_tracer)
9540                 return 0;
9541
9542         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9543                default_bootup_tracer);
9544         default_bootup_tracer = NULL;
9545
9546         return 0;
9547 }
9548
9549 fs_initcall(tracer_init_tracefs);
9550 late_initcall_sync(clear_boot_tracer);
9551
9552 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9553 __init static int tracing_set_default_clock(void)
9554 {
9555         /* sched_clock_stable() is determined in late_initcall */
9556         if (!trace_boot_clock && !sched_clock_stable()) {
9557                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9558                         pr_warn("Can not set tracing clock due to lockdown\n");
9559                         return -EPERM;
9560                 }
9561
9562                 printk(KERN_WARNING
9563                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9564                        "If you want to keep using the local clock, then add:\n"
9565                        "  \"trace_clock=local\"\n"
9566                        "on the kernel command line\n");
9567                 tracing_set_clock(&global_trace, "global");
9568         }
9569
9570         return 0;
9571 }
9572 late_initcall_sync(tracing_set_default_clock);
9573 #endif