ARM: at91: Replace HTTP links with HTTPS ones
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning) {
1303                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304                         "Disabling tracing due to warning\n");
1305                 tracing_off();
1306         }
1307 }
1308
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317         if (tr->array_buffer.buffer)
1318                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319         return !tr->buffer_disabled;
1320 }
1321
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327         return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330
1331 static int __init set_buf_size(char *str)
1332 {
1333         unsigned long buf_size;
1334
1335         if (!str)
1336                 return 0;
1337         buf_size = memparse(str, &str);
1338         /* nr_entries can not be zero */
1339         if (buf_size == 0)
1340                 return 0;
1341         trace_buf_size = buf_size;
1342         return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348         unsigned long threshold;
1349         int ret;
1350
1351         if (!str)
1352                 return 0;
1353         ret = kstrtoul(str, 0, &threshold);
1354         if (ret < 0)
1355                 return 0;
1356         tracing_thresh = threshold * 1000;
1357         return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363         return nsecs / 1000;
1364 }
1365
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377         TRACE_FLAGS
1378         NULL
1379 };
1380
1381 static struct {
1382         u64 (*func)(void);
1383         const char *name;
1384         int in_ns;              /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386         { trace_clock_local,            "local",        1 },
1387         { trace_clock_global,           "global",       1 },
1388         { trace_clock_counter,          "counter",      0 },
1389         { trace_clock_jiffies,          "uptime",       0 },
1390         { trace_clock,                  "perf",         1 },
1391         { ktime_get_mono_fast_ns,       "mono",         1 },
1392         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1393         { ktime_get_boot_fast_ns,       "boot",         1 },
1394         ARCH_TRACE_CLOCKS
1395 };
1396
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399         if (trace_clocks[tr->clock_id].in_ns)
1400                 return true;
1401
1402         return false;
1403 }
1404
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410         memset(parser, 0, sizeof(*parser));
1411
1412         parser->buffer = kmalloc(size, GFP_KERNEL);
1413         if (!parser->buffer)
1414                 return 1;
1415
1416         parser->size = size;
1417         return 0;
1418 }
1419
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425         kfree(parser->buffer);
1426         parser->buffer = NULL;
1427 }
1428
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441         size_t cnt, loff_t *ppos)
1442 {
1443         char ch;
1444         size_t read = 0;
1445         ssize_t ret;
1446
1447         if (!*ppos)
1448                 trace_parser_clear(parser);
1449
1450         ret = get_user(ch, ubuf++);
1451         if (ret)
1452                 goto out;
1453
1454         read++;
1455         cnt--;
1456
1457         /*
1458          * The parser is not finished with the last write,
1459          * continue reading the user input without skipping spaces.
1460          */
1461         if (!parser->cont) {
1462                 /* skip white space */
1463                 while (cnt && isspace(ch)) {
1464                         ret = get_user(ch, ubuf++);
1465                         if (ret)
1466                                 goto out;
1467                         read++;
1468                         cnt--;
1469                 }
1470
1471                 parser->idx = 0;
1472
1473                 /* only spaces were written */
1474                 if (isspace(ch) || !ch) {
1475                         *ppos += read;
1476                         ret = read;
1477                         goto out;
1478                 }
1479         }
1480
1481         /* read the non-space input */
1482         while (cnt && !isspace(ch) && ch) {
1483                 if (parser->idx < parser->size - 1)
1484                         parser->buffer[parser->idx++] = ch;
1485                 else {
1486                         ret = -EINVAL;
1487                         goto out;
1488                 }
1489                 ret = get_user(ch, ubuf++);
1490                 if (ret)
1491                         goto out;
1492                 read++;
1493                 cnt--;
1494         }
1495
1496         /* We either got finished input or we have to wait for another call. */
1497         if (isspace(ch) || !ch) {
1498                 parser->buffer[parser->idx] = 0;
1499                 parser->cont = false;
1500         } else if (parser->idx < parser->size - 1) {
1501                 parser->cont = true;
1502                 parser->buffer[parser->idx++] = ch;
1503                 /* Make sure the parsed string always terminates with '\0'. */
1504                 parser->buffer[parser->idx] = 0;
1505         } else {
1506                 ret = -EINVAL;
1507                 goto out;
1508         }
1509
1510         *ppos += read;
1511         ret = read;
1512
1513 out:
1514         return ret;
1515 }
1516
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520         int len;
1521
1522         if (trace_seq_used(s) <= s->seq.readpos)
1523                 return -EBUSY;
1524
1525         len = trace_seq_used(s) - s->seq.readpos;
1526         if (cnt > len)
1527                 cnt = len;
1528         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529
1530         s->seq.readpos += cnt;
1531         return cnt;
1532 }
1533
1534 unsigned long __read_mostly     tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538         defined(CONFIG_FSNOTIFY)
1539
1540 static struct workqueue_struct *fsnotify_wq;
1541
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544         struct trace_array *tr = container_of(work, struct trace_array,
1545                                               fsnotify_work);
1546         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552         struct trace_array *tr = container_of(iwork, struct trace_array,
1553                                               fsnotify_irqwork);
1554         queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558                                      struct dentry *d_tracer)
1559 {
1560         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563                                               d_tracer, &tr->max_latency,
1564                                               &tracing_max_lat_fops);
1565 }
1566
1567 __init static int latency_fsnotify_init(void)
1568 {
1569         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1571         if (!fsnotify_wq) {
1572                 pr_err("Unable to allocate tr_max_lat_wq\n");
1573                 return -ENOMEM;
1574         }
1575         return 0;
1576 }
1577
1578 late_initcall_sync(latency_fsnotify_init);
1579
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582         if (!fsnotify_wq)
1583                 return;
1584         /*
1585          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586          * possible that we are called from __schedule() or do_idle(), which
1587          * could cause a deadlock.
1588          */
1589         irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597
1598 #define trace_create_maxlat_file(tr, d_tracer)                          \
1599         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1600                           &tr->max_latency, &tracing_max_lat_fops)
1601
1602 #endif
1603
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613         struct array_buffer *trace_buf = &tr->array_buffer;
1614         struct array_buffer *max_buf = &tr->max_buffer;
1615         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617
1618         max_buf->cpu = cpu;
1619         max_buf->time_start = data->preempt_timestamp;
1620
1621         max_data->saved_latency = tr->max_latency;
1622         max_data->critical_start = data->critical_start;
1623         max_data->critical_end = data->critical_end;
1624
1625         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626         max_data->pid = tsk->pid;
1627         /*
1628          * If tsk == current, then use current_uid(), as that does not use
1629          * RCU. The irq tracer can be called out of RCU scope.
1630          */
1631         if (tsk == current)
1632                 max_data->uid = current_uid();
1633         else
1634                 max_data->uid = task_uid(tsk);
1635
1636         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637         max_data->policy = tsk->policy;
1638         max_data->rt_priority = tsk->rt_priority;
1639
1640         /* record this tasks comm */
1641         tracing_record_cmdline(tsk);
1642         latency_fsnotify(tr);
1643 }
1644
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657               void *cond_data)
1658 {
1659         if (tr->stop_count)
1660                 return;
1661
1662         WARN_ON_ONCE(!irqs_disabled());
1663
1664         if (!tr->allocated_snapshot) {
1665                 /* Only the nop tracer should hit this when disabling */
1666                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667                 return;
1668         }
1669
1670         arch_spin_lock(&tr->max_lock);
1671
1672         /* Inherit the recordable setting from array_buffer */
1673         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674                 ring_buffer_record_on(tr->max_buffer.buffer);
1675         else
1676                 ring_buffer_record_off(tr->max_buffer.buffer);
1677
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680                 goto out_unlock;
1681 #endif
1682         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683
1684         __update_max_tr(tr, tsk, cpu);
1685
1686  out_unlock:
1687         arch_spin_unlock(&tr->max_lock);
1688 }
1689
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701         int ret;
1702
1703         if (tr->stop_count)
1704                 return;
1705
1706         WARN_ON_ONCE(!irqs_disabled());
1707         if (!tr->allocated_snapshot) {
1708                 /* Only the nop tracer should hit this when disabling */
1709                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710                 return;
1711         }
1712
1713         arch_spin_lock(&tr->max_lock);
1714
1715         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716
1717         if (ret == -EBUSY) {
1718                 /*
1719                  * We failed to swap the buffer due to a commit taking
1720                  * place on this CPU. We fail to record, but we reset
1721                  * the max trace buffer (no one writes directly to it)
1722                  * and flag that it failed.
1723                  */
1724                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725                         "Failed to swap buffers due to commit in progress\n");
1726         }
1727
1728         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729
1730         __update_max_tr(tr, tsk, cpu);
1731         arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737         /* Iterators are static, they should be filled or empty */
1738         if (trace_buffer_iter(iter, iter->cpu_file))
1739                 return 0;
1740
1741         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742                                 full);
1743 }
1744
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747
1748 struct trace_selftests {
1749         struct list_head                list;
1750         struct tracer                   *type;
1751 };
1752
1753 static LIST_HEAD(postponed_selftests);
1754
1755 static int save_selftest(struct tracer *type)
1756 {
1757         struct trace_selftests *selftest;
1758
1759         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760         if (!selftest)
1761                 return -ENOMEM;
1762
1763         selftest->type = type;
1764         list_add(&selftest->list, &postponed_selftests);
1765         return 0;
1766 }
1767
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770         struct trace_array *tr = &global_trace;
1771         struct tracer *saved_tracer = tr->current_trace;
1772         int ret;
1773
1774         if (!type->selftest || tracing_selftest_disabled)
1775                 return 0;
1776
1777         /*
1778          * If a tracer registers early in boot up (before scheduling is
1779          * initialized and such), then do not run its selftests yet.
1780          * Instead, run it a little later in the boot process.
1781          */
1782         if (!selftests_can_run)
1783                 return save_selftest(type);
1784
1785         /*
1786          * Run a selftest on this tracer.
1787          * Here we reset the trace buffer, and set the current
1788          * tracer to be this tracer. The tracer can then run some
1789          * internal tracing to verify that everything is in order.
1790          * If we fail, we do not register this tracer.
1791          */
1792         tracing_reset_online_cpus(&tr->array_buffer);
1793
1794         tr->current_trace = type;
1795
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797         if (type->use_max_tr) {
1798                 /* If we expanded the buffers, make sure the max is expanded too */
1799                 if (ring_buffer_expanded)
1800                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801                                            RING_BUFFER_ALL_CPUS);
1802                 tr->allocated_snapshot = true;
1803         }
1804 #endif
1805
1806         /* the test is responsible for initializing and enabling */
1807         pr_info("Testing tracer %s: ", type->name);
1808         ret = type->selftest(type, tr);
1809         /* the test is responsible for resetting too */
1810         tr->current_trace = saved_tracer;
1811         if (ret) {
1812                 printk(KERN_CONT "FAILED!\n");
1813                 /* Add the warning after printing 'FAILED' */
1814                 WARN_ON(1);
1815                 return -1;
1816         }
1817         /* Only reset on passing, to avoid touching corrupted buffers */
1818         tracing_reset_online_cpus(&tr->array_buffer);
1819
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821         if (type->use_max_tr) {
1822                 tr->allocated_snapshot = false;
1823
1824                 /* Shrink the max buffer again */
1825                 if (ring_buffer_expanded)
1826                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1827                                            RING_BUFFER_ALL_CPUS);
1828         }
1829 #endif
1830
1831         printk(KERN_CONT "PASSED\n");
1832         return 0;
1833 }
1834
1835 static __init int init_trace_selftests(void)
1836 {
1837         struct trace_selftests *p, *n;
1838         struct tracer *t, **last;
1839         int ret;
1840
1841         selftests_can_run = true;
1842
1843         mutex_lock(&trace_types_lock);
1844
1845         if (list_empty(&postponed_selftests))
1846                 goto out;
1847
1848         pr_info("Running postponed tracer tests:\n");
1849
1850         tracing_selftest_running = true;
1851         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852                 /* This loop can take minutes when sanitizers are enabled, so
1853                  * lets make sure we allow RCU processing.
1854                  */
1855                 cond_resched();
1856                 ret = run_tracer_selftest(p->type);
1857                 /* If the test fails, then warn and remove from available_tracers */
1858                 if (ret < 0) {
1859                         WARN(1, "tracer: %s failed selftest, disabling\n",
1860                              p->type->name);
1861                         last = &trace_types;
1862                         for (t = trace_types; t; t = t->next) {
1863                                 if (t == p->type) {
1864                                         *last = t->next;
1865                                         break;
1866                                 }
1867                                 last = &t->next;
1868                         }
1869                 }
1870                 list_del(&p->list);
1871                 kfree(p);
1872         }
1873         tracing_selftest_running = false;
1874
1875  out:
1876         mutex_unlock(&trace_types_lock);
1877
1878         return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884         return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889
1890 static void __init apply_trace_boot_options(void);
1891
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900         struct tracer *t;
1901         int ret = 0;
1902
1903         if (!type->name) {
1904                 pr_info("Tracer must have a name\n");
1905                 return -1;
1906         }
1907
1908         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910                 return -1;
1911         }
1912
1913         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914                 pr_warn("Can not register tracer %s due to lockdown\n",
1915                            type->name);
1916                 return -EPERM;
1917         }
1918
1919         mutex_lock(&trace_types_lock);
1920
1921         tracing_selftest_running = true;
1922
1923         for (t = trace_types; t; t = t->next) {
1924                 if (strcmp(type->name, t->name) == 0) {
1925                         /* already found */
1926                         pr_info("Tracer %s already registered\n",
1927                                 type->name);
1928                         ret = -1;
1929                         goto out;
1930                 }
1931         }
1932
1933         if (!type->set_flag)
1934                 type->set_flag = &dummy_set_flag;
1935         if (!type->flags) {
1936                 /*allocate a dummy tracer_flags*/
1937                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938                 if (!type->flags) {
1939                         ret = -ENOMEM;
1940                         goto out;
1941                 }
1942                 type->flags->val = 0;
1943                 type->flags->opts = dummy_tracer_opt;
1944         } else
1945                 if (!type->flags->opts)
1946                         type->flags->opts = dummy_tracer_opt;
1947
1948         /* store the tracer for __set_tracer_option */
1949         type->flags->trace = type;
1950
1951         ret = run_tracer_selftest(type);
1952         if (ret < 0)
1953                 goto out;
1954
1955         type->next = trace_types;
1956         trace_types = type;
1957         add_tracer_options(&global_trace, type);
1958
1959  out:
1960         tracing_selftest_running = false;
1961         mutex_unlock(&trace_types_lock);
1962
1963         if (ret || !default_bootup_tracer)
1964                 goto out_unlock;
1965
1966         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967                 goto out_unlock;
1968
1969         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970         /* Do we want this tracer to start on bootup? */
1971         tracing_set_tracer(&global_trace, type->name);
1972         default_bootup_tracer = NULL;
1973
1974         apply_trace_boot_options();
1975
1976         /* disable other selftests, since this will break it. */
1977         tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980                type->name);
1981 #endif
1982
1983  out_unlock:
1984         return ret;
1985 }
1986
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989         struct trace_buffer *buffer = buf->buffer;
1990
1991         if (!buffer)
1992                 return;
1993
1994         ring_buffer_record_disable(buffer);
1995
1996         /* Make sure all commits have finished */
1997         synchronize_rcu();
1998         ring_buffer_reset_cpu(buffer, cpu);
1999
2000         ring_buffer_record_enable(buffer);
2001 }
2002
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005         struct trace_buffer *buffer = buf->buffer;
2006         int cpu;
2007
2008         if (!buffer)
2009                 return;
2010
2011         ring_buffer_record_disable(buffer);
2012
2013         /* Make sure all commits have finished */
2014         synchronize_rcu();
2015
2016         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2017
2018         for_each_online_cpu(cpu)
2019                 ring_buffer_reset_cpu(buffer, cpu);
2020
2021         ring_buffer_record_enable(buffer);
2022 }
2023
2024 /* Must have trace_types_lock held */
2025 void tracing_reset_all_online_cpus(void)
2026 {
2027         struct trace_array *tr;
2028
2029         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2030                 if (!tr->clear_trace)
2031                         continue;
2032                 tr->clear_trace = false;
2033                 tracing_reset_online_cpus(&tr->array_buffer);
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035                 tracing_reset_online_cpus(&tr->max_buffer);
2036 #endif
2037         }
2038 }
2039
2040 static int *tgid_map;
2041
2042 #define SAVED_CMDLINES_DEFAULT 128
2043 #define NO_CMDLINE_MAP UINT_MAX
2044 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2045 struct saved_cmdlines_buffer {
2046         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2047         unsigned *map_cmdline_to_pid;
2048         unsigned cmdline_num;
2049         int cmdline_idx;
2050         char *saved_cmdlines;
2051 };
2052 static struct saved_cmdlines_buffer *savedcmd;
2053
2054 /* temporary disable recording */
2055 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2056
2057 static inline char *get_saved_cmdlines(int idx)
2058 {
2059         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2060 }
2061
2062 static inline void set_cmdline(int idx, const char *cmdline)
2063 {
2064         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2065 }
2066
2067 static int allocate_cmdlines_buffer(unsigned int val,
2068                                     struct saved_cmdlines_buffer *s)
2069 {
2070         s->map_cmdline_to_pid = kmalloc_array(val,
2071                                               sizeof(*s->map_cmdline_to_pid),
2072                                               GFP_KERNEL);
2073         if (!s->map_cmdline_to_pid)
2074                 return -ENOMEM;
2075
2076         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2077         if (!s->saved_cmdlines) {
2078                 kfree(s->map_cmdline_to_pid);
2079                 return -ENOMEM;
2080         }
2081
2082         s->cmdline_idx = 0;
2083         s->cmdline_num = val;
2084         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2085                sizeof(s->map_pid_to_cmdline));
2086         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2087                val * sizeof(*s->map_cmdline_to_pid));
2088
2089         return 0;
2090 }
2091
2092 static int trace_create_savedcmd(void)
2093 {
2094         int ret;
2095
2096         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097         if (!savedcmd)
2098                 return -ENOMEM;
2099
2100         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2101         if (ret < 0) {
2102                 kfree(savedcmd);
2103                 savedcmd = NULL;
2104                 return -ENOMEM;
2105         }
2106
2107         return 0;
2108 }
2109
2110 int is_tracing_stopped(void)
2111 {
2112         return global_trace.stop_count;
2113 }
2114
2115 /**
2116  * tracing_start - quick start of the tracer
2117  *
2118  * If tracing is enabled but was stopped by tracing_stop,
2119  * this will start the tracer back up.
2120  */
2121 void tracing_start(void)
2122 {
2123         struct trace_buffer *buffer;
2124         unsigned long flags;
2125
2126         if (tracing_disabled)
2127                 return;
2128
2129         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2130         if (--global_trace.stop_count) {
2131                 if (global_trace.stop_count < 0) {
2132                         /* Someone screwed up their debugging */
2133                         WARN_ON_ONCE(1);
2134                         global_trace.stop_count = 0;
2135                 }
2136                 goto out;
2137         }
2138
2139         /* Prevent the buffers from switching */
2140         arch_spin_lock(&global_trace.max_lock);
2141
2142         buffer = global_trace.array_buffer.buffer;
2143         if (buffer)
2144                 ring_buffer_record_enable(buffer);
2145
2146 #ifdef CONFIG_TRACER_MAX_TRACE
2147         buffer = global_trace.max_buffer.buffer;
2148         if (buffer)
2149                 ring_buffer_record_enable(buffer);
2150 #endif
2151
2152         arch_spin_unlock(&global_trace.max_lock);
2153
2154  out:
2155         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2156 }
2157
2158 static void tracing_start_tr(struct trace_array *tr)
2159 {
2160         struct trace_buffer *buffer;
2161         unsigned long flags;
2162
2163         if (tracing_disabled)
2164                 return;
2165
2166         /* If global, we need to also start the max tracer */
2167         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2168                 return tracing_start();
2169
2170         raw_spin_lock_irqsave(&tr->start_lock, flags);
2171
2172         if (--tr->stop_count) {
2173                 if (tr->stop_count < 0) {
2174                         /* Someone screwed up their debugging */
2175                         WARN_ON_ONCE(1);
2176                         tr->stop_count = 0;
2177                 }
2178                 goto out;
2179         }
2180
2181         buffer = tr->array_buffer.buffer;
2182         if (buffer)
2183                 ring_buffer_record_enable(buffer);
2184
2185  out:
2186         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 }
2188
2189 /**
2190  * tracing_stop - quick stop of the tracer
2191  *
2192  * Light weight way to stop tracing. Use in conjunction with
2193  * tracing_start.
2194  */
2195 void tracing_stop(void)
2196 {
2197         struct trace_buffer *buffer;
2198         unsigned long flags;
2199
2200         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2201         if (global_trace.stop_count++)
2202                 goto out;
2203
2204         /* Prevent the buffers from switching */
2205         arch_spin_lock(&global_trace.max_lock);
2206
2207         buffer = global_trace.array_buffer.buffer;
2208         if (buffer)
2209                 ring_buffer_record_disable(buffer);
2210
2211 #ifdef CONFIG_TRACER_MAX_TRACE
2212         buffer = global_trace.max_buffer.buffer;
2213         if (buffer)
2214                 ring_buffer_record_disable(buffer);
2215 #endif
2216
2217         arch_spin_unlock(&global_trace.max_lock);
2218
2219  out:
2220         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2221 }
2222
2223 static void tracing_stop_tr(struct trace_array *tr)
2224 {
2225         struct trace_buffer *buffer;
2226         unsigned long flags;
2227
2228         /* If global, we need to also stop the max tracer */
2229         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2230                 return tracing_stop();
2231
2232         raw_spin_lock_irqsave(&tr->start_lock, flags);
2233         if (tr->stop_count++)
2234                 goto out;
2235
2236         buffer = tr->array_buffer.buffer;
2237         if (buffer)
2238                 ring_buffer_record_disable(buffer);
2239
2240  out:
2241         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2242 }
2243
2244 static int trace_save_cmdline(struct task_struct *tsk)
2245 {
2246         unsigned pid, idx;
2247
2248         /* treat recording of idle task as a success */
2249         if (!tsk->pid)
2250                 return 1;
2251
2252         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253                 return 0;
2254
2255         /*
2256          * It's not the end of the world if we don't get
2257          * the lock, but we also don't want to spin
2258          * nor do we want to disable interrupts,
2259          * so if we miss here, then better luck next time.
2260          */
2261         if (!arch_spin_trylock(&trace_cmdline_lock))
2262                 return 0;
2263
2264         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2265         if (idx == NO_CMDLINE_MAP) {
2266                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2267
2268                 /*
2269                  * Check whether the cmdline buffer at idx has a pid
2270                  * mapped. We are going to overwrite that entry so we
2271                  * need to clear the map_pid_to_cmdline. Otherwise we
2272                  * would read the new comm for the old pid.
2273                  */
2274                 pid = savedcmd->map_cmdline_to_pid[idx];
2275                 if (pid != NO_CMDLINE_MAP)
2276                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2277
2278                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2279                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2280
2281                 savedcmd->cmdline_idx = idx;
2282         }
2283
2284         set_cmdline(idx, tsk->comm);
2285
2286         arch_spin_unlock(&trace_cmdline_lock);
2287
2288         return 1;
2289 }
2290
2291 static void __trace_find_cmdline(int pid, char comm[])
2292 {
2293         unsigned map;
2294
2295         if (!pid) {
2296                 strcpy(comm, "<idle>");
2297                 return;
2298         }
2299
2300         if (WARN_ON_ONCE(pid < 0)) {
2301                 strcpy(comm, "<XXX>");
2302                 return;
2303         }
2304
2305         if (pid > PID_MAX_DEFAULT) {
2306                 strcpy(comm, "<...>");
2307                 return;
2308         }
2309
2310         map = savedcmd->map_pid_to_cmdline[pid];
2311         if (map != NO_CMDLINE_MAP)
2312                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2313         else
2314                 strcpy(comm, "<...>");
2315 }
2316
2317 void trace_find_cmdline(int pid, char comm[])
2318 {
2319         preempt_disable();
2320         arch_spin_lock(&trace_cmdline_lock);
2321
2322         __trace_find_cmdline(pid, comm);
2323
2324         arch_spin_unlock(&trace_cmdline_lock);
2325         preempt_enable();
2326 }
2327
2328 int trace_find_tgid(int pid)
2329 {
2330         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2331                 return 0;
2332
2333         return tgid_map[pid];
2334 }
2335
2336 static int trace_save_tgid(struct task_struct *tsk)
2337 {
2338         /* treat recording of idle task as a success */
2339         if (!tsk->pid)
2340                 return 1;
2341
2342         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2343                 return 0;
2344
2345         tgid_map[tsk->pid] = tsk->tgid;
2346         return 1;
2347 }
2348
2349 static bool tracing_record_taskinfo_skip(int flags)
2350 {
2351         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2352                 return true;
2353         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2354                 return true;
2355         if (!__this_cpu_read(trace_taskinfo_save))
2356                 return true;
2357         return false;
2358 }
2359
2360 /**
2361  * tracing_record_taskinfo - record the task info of a task
2362  *
2363  * @task:  task to record
2364  * @flags: TRACE_RECORD_CMDLINE for recording comm
2365  *         TRACE_RECORD_TGID for recording tgid
2366  */
2367 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 {
2369         bool done;
2370
2371         if (tracing_record_taskinfo_skip(flags))
2372                 return;
2373
2374         /*
2375          * Record as much task information as possible. If some fail, continue
2376          * to try to record the others.
2377          */
2378         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2379         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2380
2381         /* If recording any information failed, retry again soon. */
2382         if (!done)
2383                 return;
2384
2385         __this_cpu_write(trace_taskinfo_save, false);
2386 }
2387
2388 /**
2389  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2390  *
2391  * @prev: previous task during sched_switch
2392  * @next: next task during sched_switch
2393  * @flags: TRACE_RECORD_CMDLINE for recording comm
2394  *         TRACE_RECORD_TGID for recording tgid
2395  */
2396 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2397                                           struct task_struct *next, int flags)
2398 {
2399         bool done;
2400
2401         if (tracing_record_taskinfo_skip(flags))
2402                 return;
2403
2404         /*
2405          * Record as much task information as possible. If some fail, continue
2406          * to try to record the others.
2407          */
2408         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2409         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2410         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2411         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2412
2413         /* If recording any information failed, retry again soon. */
2414         if (!done)
2415                 return;
2416
2417         __this_cpu_write(trace_taskinfo_save, false);
2418 }
2419
2420 /* Helpers to record a specific task information */
2421 void tracing_record_cmdline(struct task_struct *task)
2422 {
2423         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2424 }
2425
2426 void tracing_record_tgid(struct task_struct *task)
2427 {
2428         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 }
2430
2431 /*
2432  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2433  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2434  * simplifies those functions and keeps them in sync.
2435  */
2436 enum print_line_t trace_handle_return(struct trace_seq *s)
2437 {
2438         return trace_seq_has_overflowed(s) ?
2439                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2440 }
2441 EXPORT_SYMBOL_GPL(trace_handle_return);
2442
2443 void
2444 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2445                              unsigned long flags, int pc)
2446 {
2447         struct task_struct *tsk = current;
2448
2449         entry->preempt_count            = pc & 0xff;
2450         entry->pid                      = (tsk) ? tsk->pid : 0;
2451         entry->type                     = type;
2452         entry->flags =
2453 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2454                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2455 #else
2456                 TRACE_FLAG_IRQS_NOSUPPORT |
2457 #endif
2458                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2459                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2460                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2461                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2462                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2463 }
2464 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2465
2466 struct ring_buffer_event *
2467 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2468                           int type,
2469                           unsigned long len,
2470                           unsigned long flags, int pc)
2471 {
2472         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2473 }
2474
2475 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2476 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2477 static int trace_buffered_event_ref;
2478
2479 /**
2480  * trace_buffered_event_enable - enable buffering events
2481  *
2482  * When events are being filtered, it is quicker to use a temporary
2483  * buffer to write the event data into if there's a likely chance
2484  * that it will not be committed. The discard of the ring buffer
2485  * is not as fast as committing, and is much slower than copying
2486  * a commit.
2487  *
2488  * When an event is to be filtered, allocate per cpu buffers to
2489  * write the event data into, and if the event is filtered and discarded
2490  * it is simply dropped, otherwise, the entire data is to be committed
2491  * in one shot.
2492  */
2493 void trace_buffered_event_enable(void)
2494 {
2495         struct ring_buffer_event *event;
2496         struct page *page;
2497         int cpu;
2498
2499         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500
2501         if (trace_buffered_event_ref++)
2502                 return;
2503
2504         for_each_tracing_cpu(cpu) {
2505                 page = alloc_pages_node(cpu_to_node(cpu),
2506                                         GFP_KERNEL | __GFP_NORETRY, 0);
2507                 if (!page)
2508                         goto failed;
2509
2510                 event = page_address(page);
2511                 memset(event, 0, sizeof(*event));
2512
2513                 per_cpu(trace_buffered_event, cpu) = event;
2514
2515                 preempt_disable();
2516                 if (cpu == smp_processor_id() &&
2517                     this_cpu_read(trace_buffered_event) !=
2518                     per_cpu(trace_buffered_event, cpu))
2519                         WARN_ON_ONCE(1);
2520                 preempt_enable();
2521         }
2522
2523         return;
2524  failed:
2525         trace_buffered_event_disable();
2526 }
2527
2528 static void enable_trace_buffered_event(void *data)
2529 {
2530         /* Probably not needed, but do it anyway */
2531         smp_rmb();
2532         this_cpu_dec(trace_buffered_event_cnt);
2533 }
2534
2535 static void disable_trace_buffered_event(void *data)
2536 {
2537         this_cpu_inc(trace_buffered_event_cnt);
2538 }
2539
2540 /**
2541  * trace_buffered_event_disable - disable buffering events
2542  *
2543  * When a filter is removed, it is faster to not use the buffered
2544  * events, and to commit directly into the ring buffer. Free up
2545  * the temp buffers when there are no more users. This requires
2546  * special synchronization with current events.
2547  */
2548 void trace_buffered_event_disable(void)
2549 {
2550         int cpu;
2551
2552         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2553
2554         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2555                 return;
2556
2557         if (--trace_buffered_event_ref)
2558                 return;
2559
2560         preempt_disable();
2561         /* For each CPU, set the buffer as used. */
2562         smp_call_function_many(tracing_buffer_mask,
2563                                disable_trace_buffered_event, NULL, 1);
2564         preempt_enable();
2565
2566         /* Wait for all current users to finish */
2567         synchronize_rcu();
2568
2569         for_each_tracing_cpu(cpu) {
2570                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2571                 per_cpu(trace_buffered_event, cpu) = NULL;
2572         }
2573         /*
2574          * Make sure trace_buffered_event is NULL before clearing
2575          * trace_buffered_event_cnt.
2576          */
2577         smp_wmb();
2578
2579         preempt_disable();
2580         /* Do the work on each cpu */
2581         smp_call_function_many(tracing_buffer_mask,
2582                                enable_trace_buffered_event, NULL, 1);
2583         preempt_enable();
2584 }
2585
2586 static struct trace_buffer *temp_buffer;
2587
2588 struct ring_buffer_event *
2589 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2590                           struct trace_event_file *trace_file,
2591                           int type, unsigned long len,
2592                           unsigned long flags, int pc)
2593 {
2594         struct ring_buffer_event *entry;
2595         int val;
2596
2597         *current_rb = trace_file->tr->array_buffer.buffer;
2598
2599         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2600              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2601             (entry = this_cpu_read(trace_buffered_event))) {
2602                 /* Try to use the per cpu buffer first */
2603                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2604                 if (val == 1) {
2605                         trace_event_setup(entry, type, flags, pc);
2606                         entry->array[0] = len;
2607                         return entry;
2608                 }
2609                 this_cpu_dec(trace_buffered_event_cnt);
2610         }
2611
2612         entry = __trace_buffer_lock_reserve(*current_rb,
2613                                             type, len, flags, pc);
2614         /*
2615          * If tracing is off, but we have triggers enabled
2616          * we still need to look at the event data. Use the temp_buffer
2617          * to store the trace event for the tigger to use. It's recusive
2618          * safe and will not be recorded anywhere.
2619          */
2620         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2621                 *current_rb = temp_buffer;
2622                 entry = __trace_buffer_lock_reserve(*current_rb,
2623                                                     type, len, flags, pc);
2624         }
2625         return entry;
2626 }
2627 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2628
2629 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2630 static DEFINE_MUTEX(tracepoint_printk_mutex);
2631
2632 static void output_printk(struct trace_event_buffer *fbuffer)
2633 {
2634         struct trace_event_call *event_call;
2635         struct trace_event_file *file;
2636         struct trace_event *event;
2637         unsigned long flags;
2638         struct trace_iterator *iter = tracepoint_print_iter;
2639
2640         /* We should never get here if iter is NULL */
2641         if (WARN_ON_ONCE(!iter))
2642                 return;
2643
2644         event_call = fbuffer->trace_file->event_call;
2645         if (!event_call || !event_call->event.funcs ||
2646             !event_call->event.funcs->trace)
2647                 return;
2648
2649         file = fbuffer->trace_file;
2650         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2651             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2652              !filter_match_preds(file->filter, fbuffer->entry)))
2653                 return;
2654
2655         event = &fbuffer->trace_file->event_call->event;
2656
2657         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2658         trace_seq_init(&iter->seq);
2659         iter->ent = fbuffer->entry;
2660         event_call->event.funcs->trace(iter, 0, event);
2661         trace_seq_putc(&iter->seq, 0);
2662         printk("%s", iter->seq.buffer);
2663
2664         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2665 }
2666
2667 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2668                              void *buffer, size_t *lenp,
2669                              loff_t *ppos)
2670 {
2671         int save_tracepoint_printk;
2672         int ret;
2673
2674         mutex_lock(&tracepoint_printk_mutex);
2675         save_tracepoint_printk = tracepoint_printk;
2676
2677         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2678
2679         /*
2680          * This will force exiting early, as tracepoint_printk
2681          * is always zero when tracepoint_printk_iter is not allocated
2682          */
2683         if (!tracepoint_print_iter)
2684                 tracepoint_printk = 0;
2685
2686         if (save_tracepoint_printk == tracepoint_printk)
2687                 goto out;
2688
2689         if (tracepoint_printk)
2690                 static_key_enable(&tracepoint_printk_key.key);
2691         else
2692                 static_key_disable(&tracepoint_printk_key.key);
2693
2694  out:
2695         mutex_unlock(&tracepoint_printk_mutex);
2696
2697         return ret;
2698 }
2699
2700 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2701 {
2702         if (static_key_false(&tracepoint_printk_key.key))
2703                 output_printk(fbuffer);
2704
2705         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2706                                     fbuffer->event, fbuffer->entry,
2707                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2708 }
2709 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2710
2711 /*
2712  * Skip 3:
2713  *
2714  *   trace_buffer_unlock_commit_regs()
2715  *   trace_event_buffer_commit()
2716  *   trace_event_raw_event_xxx()
2717  */
2718 # define STACK_SKIP 3
2719
2720 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2721                                      struct trace_buffer *buffer,
2722                                      struct ring_buffer_event *event,
2723                                      unsigned long flags, int pc,
2724                                      struct pt_regs *regs)
2725 {
2726         __buffer_unlock_commit(buffer, event);
2727
2728         /*
2729          * If regs is not set, then skip the necessary functions.
2730          * Note, we can still get here via blktrace, wakeup tracer
2731          * and mmiotrace, but that's ok if they lose a function or
2732          * two. They are not that meaningful.
2733          */
2734         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2735         ftrace_trace_userstack(buffer, flags, pc);
2736 }
2737
2738 /*
2739  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2740  */
2741 void
2742 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2743                                    struct ring_buffer_event *event)
2744 {
2745         __buffer_unlock_commit(buffer, event);
2746 }
2747
2748 static void
2749 trace_process_export(struct trace_export *export,
2750                struct ring_buffer_event *event)
2751 {
2752         struct trace_entry *entry;
2753         unsigned int size = 0;
2754
2755         entry = ring_buffer_event_data(event);
2756         size = ring_buffer_event_length(event);
2757         export->write(export, entry, size);
2758 }
2759
2760 static DEFINE_MUTEX(ftrace_export_lock);
2761
2762 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2763
2764 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2765
2766 static inline void ftrace_exports_enable(void)
2767 {
2768         static_branch_enable(&ftrace_exports_enabled);
2769 }
2770
2771 static inline void ftrace_exports_disable(void)
2772 {
2773         static_branch_disable(&ftrace_exports_enabled);
2774 }
2775
2776 static void ftrace_exports(struct ring_buffer_event *event)
2777 {
2778         struct trace_export *export;
2779
2780         preempt_disable_notrace();
2781
2782         export = rcu_dereference_raw_check(ftrace_exports_list);
2783         while (export) {
2784                 trace_process_export(export, event);
2785                 export = rcu_dereference_raw_check(export->next);
2786         }
2787
2788         preempt_enable_notrace();
2789 }
2790
2791 static inline void
2792 add_trace_export(struct trace_export **list, struct trace_export *export)
2793 {
2794         rcu_assign_pointer(export->next, *list);
2795         /*
2796          * We are entering export into the list but another
2797          * CPU might be walking that list. We need to make sure
2798          * the export->next pointer is valid before another CPU sees
2799          * the export pointer included into the list.
2800          */
2801         rcu_assign_pointer(*list, export);
2802 }
2803
2804 static inline int
2805 rm_trace_export(struct trace_export **list, struct trace_export *export)
2806 {
2807         struct trace_export **p;
2808
2809         for (p = list; *p != NULL; p = &(*p)->next)
2810                 if (*p == export)
2811                         break;
2812
2813         if (*p != export)
2814                 return -1;
2815
2816         rcu_assign_pointer(*p, (*p)->next);
2817
2818         return 0;
2819 }
2820
2821 static inline void
2822 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2823 {
2824         if (*list == NULL)
2825                 ftrace_exports_enable();
2826
2827         add_trace_export(list, export);
2828 }
2829
2830 static inline int
2831 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 {
2833         int ret;
2834
2835         ret = rm_trace_export(list, export);
2836         if (*list == NULL)
2837                 ftrace_exports_disable();
2838
2839         return ret;
2840 }
2841
2842 int register_ftrace_export(struct trace_export *export)
2843 {
2844         if (WARN_ON_ONCE(!export->write))
2845                 return -1;
2846
2847         mutex_lock(&ftrace_export_lock);
2848
2849         add_ftrace_export(&ftrace_exports_list, export);
2850
2851         mutex_unlock(&ftrace_export_lock);
2852
2853         return 0;
2854 }
2855 EXPORT_SYMBOL_GPL(register_ftrace_export);
2856
2857 int unregister_ftrace_export(struct trace_export *export)
2858 {
2859         int ret;
2860
2861         mutex_lock(&ftrace_export_lock);
2862
2863         ret = rm_ftrace_export(&ftrace_exports_list, export);
2864
2865         mutex_unlock(&ftrace_export_lock);
2866
2867         return ret;
2868 }
2869 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2870
2871 void
2872 trace_function(struct trace_array *tr,
2873                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2874                int pc)
2875 {
2876         struct trace_event_call *call = &event_function;
2877         struct trace_buffer *buffer = tr->array_buffer.buffer;
2878         struct ring_buffer_event *event;
2879         struct ftrace_entry *entry;
2880
2881         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882                                             flags, pc);
2883         if (!event)
2884                 return;
2885         entry   = ring_buffer_event_data(event);
2886         entry->ip                       = ip;
2887         entry->parent_ip                = parent_ip;
2888
2889         if (!call_filter_check_discard(call, entry, buffer, event)) {
2890                 if (static_branch_unlikely(&ftrace_exports_enabled))
2891                         ftrace_exports(event);
2892                 __buffer_unlock_commit(buffer, event);
2893         }
2894 }
2895
2896 #ifdef CONFIG_STACKTRACE
2897
2898 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2899 #define FTRACE_KSTACK_NESTING   4
2900
2901 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2902
2903 struct ftrace_stack {
2904         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2905 };
2906
2907
2908 struct ftrace_stacks {
2909         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2910 };
2911
2912 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2913 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2914
2915 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2916                                  unsigned long flags,
2917                                  int skip, int pc, struct pt_regs *regs)
2918 {
2919         struct trace_event_call *call = &event_kernel_stack;
2920         struct ring_buffer_event *event;
2921         unsigned int size, nr_entries;
2922         struct ftrace_stack *fstack;
2923         struct stack_entry *entry;
2924         int stackidx;
2925
2926         /*
2927          * Add one, for this function and the call to save_stack_trace()
2928          * If regs is set, then these functions will not be in the way.
2929          */
2930 #ifndef CONFIG_UNWINDER_ORC
2931         if (!regs)
2932                 skip++;
2933 #endif
2934
2935         /*
2936          * Since events can happen in NMIs there's no safe way to
2937          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2938          * or NMI comes in, it will just have to use the default
2939          * FTRACE_STACK_SIZE.
2940          */
2941         preempt_disable_notrace();
2942
2943         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944
2945         /* This should never happen. If it does, yell once and skip */
2946         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2947                 goto out;
2948
2949         /*
2950          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951          * interrupt will either see the value pre increment or post
2952          * increment. If the interrupt happens pre increment it will have
2953          * restored the counter when it returns.  We just need a barrier to
2954          * keep gcc from moving things around.
2955          */
2956         barrier();
2957
2958         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959         size = ARRAY_SIZE(fstack->calls);
2960
2961         if (regs) {
2962                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963                                                    size, skip);
2964         } else {
2965                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2966         }
2967
2968         size = nr_entries * sizeof(unsigned long);
2969         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2970                                             sizeof(*entry) + size, flags, pc);
2971         if (!event)
2972                 goto out;
2973         entry = ring_buffer_event_data(event);
2974
2975         memcpy(&entry->caller, fstack->calls, size);
2976         entry->size = nr_entries;
2977
2978         if (!call_filter_check_discard(call, entry, buffer, event))
2979                 __buffer_unlock_commit(buffer, event);
2980
2981  out:
2982         /* Again, don't let gcc optimize things here */
2983         barrier();
2984         __this_cpu_dec(ftrace_stack_reserve);
2985         preempt_enable_notrace();
2986
2987 }
2988
2989 static inline void ftrace_trace_stack(struct trace_array *tr,
2990                                       struct trace_buffer *buffer,
2991                                       unsigned long flags,
2992                                       int skip, int pc, struct pt_regs *regs)
2993 {
2994         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2995                 return;
2996
2997         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2998 }
2999
3000 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3001                    int pc)
3002 {
3003         struct trace_buffer *buffer = tr->array_buffer.buffer;
3004
3005         if (rcu_is_watching()) {
3006                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3007                 return;
3008         }
3009
3010         /*
3011          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3012          * but if the above rcu_is_watching() failed, then the NMI
3013          * triggered someplace critical, and rcu_irq_enter() should
3014          * not be called from NMI.
3015          */
3016         if (unlikely(in_nmi()))
3017                 return;
3018
3019         rcu_irq_enter_irqson();
3020         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3021         rcu_irq_exit_irqson();
3022 }
3023
3024 /**
3025  * trace_dump_stack - record a stack back trace in the trace buffer
3026  * @skip: Number of functions to skip (helper handlers)
3027  */
3028 void trace_dump_stack(int skip)
3029 {
3030         unsigned long flags;
3031
3032         if (tracing_disabled || tracing_selftest_running)
3033                 return;
3034
3035         local_save_flags(flags);
3036
3037 #ifndef CONFIG_UNWINDER_ORC
3038         /* Skip 1 to skip this function. */
3039         skip++;
3040 #endif
3041         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3042                              flags, skip, preempt_count(), NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048
3049 static void
3050 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3051 {
3052         struct trace_event_call *call = &event_user_stack;
3053         struct ring_buffer_event *event;
3054         struct userstack_entry *entry;
3055
3056         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3057                 return;
3058
3059         /*
3060          * NMIs can not handle page faults, even with fix ups.
3061          * The save user stack can (and often does) fault.
3062          */
3063         if (unlikely(in_nmi()))
3064                 return;
3065
3066         /*
3067          * prevent recursion, since the user stack tracing may
3068          * trigger other kernel events.
3069          */
3070         preempt_disable();
3071         if (__this_cpu_read(user_stack_count))
3072                 goto out;
3073
3074         __this_cpu_inc(user_stack_count);
3075
3076         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077                                             sizeof(*entry), flags, pc);
3078         if (!event)
3079                 goto out_drop_count;
3080         entry   = ring_buffer_event_data(event);
3081
3082         entry->tgid             = current->tgid;
3083         memset(&entry->caller, 0, sizeof(entry->caller));
3084
3085         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086         if (!call_filter_check_discard(call, entry, buffer, event))
3087                 __buffer_unlock_commit(buffer, event);
3088
3089  out_drop_count:
3090         __this_cpu_dec(user_stack_count);
3091  out:
3092         preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3096                                    unsigned long flags, int pc)
3097 {
3098 }
3099 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3100
3101 #endif /* CONFIG_STACKTRACE */
3102
3103 /* created for use with alloc_percpu */
3104 struct trace_buffer_struct {
3105         int nesting;
3106         char buffer[4][TRACE_BUF_SIZE];
3107 };
3108
3109 static struct trace_buffer_struct *trace_percpu_buffer;
3110
3111 /*
3112  * Thise allows for lockless recording.  If we're nested too deeply, then
3113  * this returns NULL.
3114  */
3115 static char *get_trace_buf(void)
3116 {
3117         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3118
3119         if (!buffer || buffer->nesting >= 4)
3120                 return NULL;
3121
3122         buffer->nesting++;
3123
3124         /* Interrupts must see nesting incremented before we use the buffer */
3125         barrier();
3126         return &buffer->buffer[buffer->nesting][0];
3127 }
3128
3129 static void put_trace_buf(void)
3130 {
3131         /* Don't let the decrement of nesting leak before this */
3132         barrier();
3133         this_cpu_dec(trace_percpu_buffer->nesting);
3134 }
3135
3136 static int alloc_percpu_trace_buffer(void)
3137 {
3138         struct trace_buffer_struct *buffers;
3139
3140         buffers = alloc_percpu(struct trace_buffer_struct);
3141         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3142                 return -ENOMEM;
3143
3144         trace_percpu_buffer = buffers;
3145         return 0;
3146 }
3147
3148 static int buffers_allocated;
3149
3150 void trace_printk_init_buffers(void)
3151 {
3152         if (buffers_allocated)
3153                 return;
3154
3155         if (alloc_percpu_trace_buffer())
3156                 return;
3157
3158         /* trace_printk() is for debug use only. Don't use it in production. */
3159
3160         pr_warn("\n");
3161         pr_warn("**********************************************************\n");
3162         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163         pr_warn("**                                                      **\n");
3164         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3165         pr_warn("**                                                      **\n");
3166         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3167         pr_warn("** unsafe for production use.                           **\n");
3168         pr_warn("**                                                      **\n");
3169         pr_warn("** If you see this message and you are not debugging    **\n");
3170         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3171         pr_warn("**                                                      **\n");
3172         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3173         pr_warn("**********************************************************\n");
3174
3175         /* Expand the buffers to set size */
3176         tracing_update_buffers();
3177
3178         buffers_allocated = 1;
3179
3180         /*
3181          * trace_printk_init_buffers() can be called by modules.
3182          * If that happens, then we need to start cmdline recording
3183          * directly here. If the global_trace.buffer is already
3184          * allocated here, then this was called by module code.
3185          */
3186         if (global_trace.array_buffer.buffer)
3187                 tracing_start_cmdline_record();
3188 }
3189 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3190
3191 void trace_printk_start_comm(void)
3192 {
3193         /* Start tracing comms if trace printk is set */
3194         if (!buffers_allocated)
3195                 return;
3196         tracing_start_cmdline_record();
3197 }
3198
3199 static void trace_printk_start_stop_comm(int enabled)
3200 {
3201         if (!buffers_allocated)
3202                 return;
3203
3204         if (enabled)
3205                 tracing_start_cmdline_record();
3206         else
3207                 tracing_stop_cmdline_record();
3208 }
3209
3210 /**
3211  * trace_vbprintk - write binary msg to tracing buffer
3212  * @ip:    The address of the caller
3213  * @fmt:   The string format to write to the buffer
3214  * @args:  Arguments for @fmt
3215  */
3216 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3217 {
3218         struct trace_event_call *call = &event_bprint;
3219         struct ring_buffer_event *event;
3220         struct trace_buffer *buffer;
3221         struct trace_array *tr = &global_trace;
3222         struct bprint_entry *entry;
3223         unsigned long flags;
3224         char *tbuffer;
3225         int len = 0, size, pc;
3226
3227         if (unlikely(tracing_selftest_running || tracing_disabled))
3228                 return 0;
3229
3230         /* Don't pollute graph traces with trace_vprintk internals */
3231         pause_graph_tracing();
3232
3233         pc = preempt_count();
3234         preempt_disable_notrace();
3235
3236         tbuffer = get_trace_buf();
3237         if (!tbuffer) {
3238                 len = 0;
3239                 goto out_nobuffer;
3240         }
3241
3242         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3243
3244         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3245                 goto out_put;
3246
3247         local_save_flags(flags);
3248         size = sizeof(*entry) + sizeof(u32) * len;
3249         buffer = tr->array_buffer.buffer;
3250         ring_buffer_nest_start(buffer);
3251         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3252                                             flags, pc);
3253         if (!event)
3254                 goto out;
3255         entry = ring_buffer_event_data(event);
3256         entry->ip                       = ip;
3257         entry->fmt                      = fmt;
3258
3259         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3260         if (!call_filter_check_discard(call, entry, buffer, event)) {
3261                 __buffer_unlock_commit(buffer, event);
3262                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3263         }
3264
3265 out:
3266         ring_buffer_nest_end(buffer);
3267 out_put:
3268         put_trace_buf();
3269
3270 out_nobuffer:
3271         preempt_enable_notrace();
3272         unpause_graph_tracing();
3273
3274         return len;
3275 }
3276 EXPORT_SYMBOL_GPL(trace_vbprintk);
3277
3278 __printf(3, 0)
3279 static int
3280 __trace_array_vprintk(struct trace_buffer *buffer,
3281                       unsigned long ip, const char *fmt, va_list args)
3282 {
3283         struct trace_event_call *call = &event_print;
3284         struct ring_buffer_event *event;
3285         int len = 0, size, pc;
3286         struct print_entry *entry;
3287         unsigned long flags;
3288         char *tbuffer;
3289
3290         if (tracing_disabled || tracing_selftest_running)
3291                 return 0;
3292
3293         /* Don't pollute graph traces with trace_vprintk internals */
3294         pause_graph_tracing();
3295
3296         pc = preempt_count();
3297         preempt_disable_notrace();
3298
3299
3300         tbuffer = get_trace_buf();
3301         if (!tbuffer) {
3302                 len = 0;
3303                 goto out_nobuffer;
3304         }
3305
3306         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3307
3308         local_save_flags(flags);
3309         size = sizeof(*entry) + len + 1;
3310         ring_buffer_nest_start(buffer);
3311         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3312                                             flags, pc);
3313         if (!event)
3314                 goto out;
3315         entry = ring_buffer_event_data(event);
3316         entry->ip = ip;
3317
3318         memcpy(&entry->buf, tbuffer, len + 1);
3319         if (!call_filter_check_discard(call, entry, buffer, event)) {
3320                 __buffer_unlock_commit(buffer, event);
3321                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3322         }
3323
3324 out:
3325         ring_buffer_nest_end(buffer);
3326         put_trace_buf();
3327
3328 out_nobuffer:
3329         preempt_enable_notrace();
3330         unpause_graph_tracing();
3331
3332         return len;
3333 }
3334
3335 __printf(3, 0)
3336 int trace_array_vprintk(struct trace_array *tr,
3337                         unsigned long ip, const char *fmt, va_list args)
3338 {
3339         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3340 }
3341
3342 __printf(3, 0)
3343 int trace_array_printk(struct trace_array *tr,
3344                        unsigned long ip, const char *fmt, ...)
3345 {
3346         int ret;
3347         va_list ap;
3348
3349         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3350                 return 0;
3351
3352         if (!tr)
3353                 return -ENOENT;
3354
3355         va_start(ap, fmt);
3356         ret = trace_array_vprintk(tr, ip, fmt, ap);
3357         va_end(ap);
3358         return ret;
3359 }
3360 EXPORT_SYMBOL_GPL(trace_array_printk);
3361
3362 __printf(3, 4)
3363 int trace_array_printk_buf(struct trace_buffer *buffer,
3364                            unsigned long ip, const char *fmt, ...)
3365 {
3366         int ret;
3367         va_list ap;
3368
3369         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3370                 return 0;
3371
3372         va_start(ap, fmt);
3373         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3374         va_end(ap);
3375         return ret;
3376 }
3377
3378 __printf(2, 0)
3379 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3380 {
3381         return trace_array_vprintk(&global_trace, ip, fmt, args);
3382 }
3383 EXPORT_SYMBOL_GPL(trace_vprintk);
3384
3385 static void trace_iterator_increment(struct trace_iterator *iter)
3386 {
3387         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3388
3389         iter->idx++;
3390         if (buf_iter)
3391                 ring_buffer_iter_advance(buf_iter);
3392 }
3393
3394 static struct trace_entry *
3395 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3396                 unsigned long *lost_events)
3397 {
3398         struct ring_buffer_event *event;
3399         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3400
3401         if (buf_iter) {
3402                 event = ring_buffer_iter_peek(buf_iter, ts);
3403                 if (lost_events)
3404                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3405                                 (unsigned long)-1 : 0;
3406         } else {
3407                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3408                                          lost_events);
3409         }
3410
3411         if (event) {
3412                 iter->ent_size = ring_buffer_event_length(event);
3413                 return ring_buffer_event_data(event);
3414         }
3415         iter->ent_size = 0;
3416         return NULL;
3417 }
3418
3419 static struct trace_entry *
3420 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3421                   unsigned long *missing_events, u64 *ent_ts)
3422 {
3423         struct trace_buffer *buffer = iter->array_buffer->buffer;
3424         struct trace_entry *ent, *next = NULL;
3425         unsigned long lost_events = 0, next_lost = 0;
3426         int cpu_file = iter->cpu_file;
3427         u64 next_ts = 0, ts;
3428         int next_cpu = -1;
3429         int next_size = 0;
3430         int cpu;
3431
3432         /*
3433          * If we are in a per_cpu trace file, don't bother by iterating over
3434          * all cpu and peek directly.
3435          */
3436         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3437                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3438                         return NULL;
3439                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3440                 if (ent_cpu)
3441                         *ent_cpu = cpu_file;
3442
3443                 return ent;
3444         }
3445
3446         for_each_tracing_cpu(cpu) {
3447
3448                 if (ring_buffer_empty_cpu(buffer, cpu))
3449                         continue;
3450
3451                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3452
3453                 /*
3454                  * Pick the entry with the smallest timestamp:
3455                  */
3456                 if (ent && (!next || ts < next_ts)) {
3457                         next = ent;
3458                         next_cpu = cpu;
3459                         next_ts = ts;
3460                         next_lost = lost_events;
3461                         next_size = iter->ent_size;
3462                 }
3463         }
3464
3465         iter->ent_size = next_size;
3466
3467         if (ent_cpu)
3468                 *ent_cpu = next_cpu;
3469
3470         if (ent_ts)
3471                 *ent_ts = next_ts;
3472
3473         if (missing_events)
3474                 *missing_events = next_lost;
3475
3476         return next;
3477 }
3478
3479 #define STATIC_TEMP_BUF_SIZE    128
3480 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3481
3482 /* Find the next real entry, without updating the iterator itself */
3483 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3484                                           int *ent_cpu, u64 *ent_ts)
3485 {
3486         /* __find_next_entry will reset ent_size */
3487         int ent_size = iter->ent_size;
3488         struct trace_entry *entry;
3489
3490         /*
3491          * If called from ftrace_dump(), then the iter->temp buffer
3492          * will be the static_temp_buf and not created from kmalloc.
3493          * If the entry size is greater than the buffer, we can
3494          * not save it. Just return NULL in that case. This is only
3495          * used to add markers when two consecutive events' time
3496          * stamps have a large delta. See trace_print_lat_context()
3497          */
3498         if (iter->temp == static_temp_buf &&
3499             STATIC_TEMP_BUF_SIZE < ent_size)
3500                 return NULL;
3501
3502         /*
3503          * The __find_next_entry() may call peek_next_entry(), which may
3504          * call ring_buffer_peek() that may make the contents of iter->ent
3505          * undefined. Need to copy iter->ent now.
3506          */
3507         if (iter->ent && iter->ent != iter->temp) {
3508                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3509                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3510                         kfree(iter->temp);
3511                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3512                         if (!iter->temp)
3513                                 return NULL;
3514                 }
3515                 memcpy(iter->temp, iter->ent, iter->ent_size);
3516                 iter->temp_size = iter->ent_size;
3517                 iter->ent = iter->temp;
3518         }
3519         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3520         /* Put back the original ent_size */
3521         iter->ent_size = ent_size;
3522
3523         return entry;
3524 }
3525
3526 /* Find the next real entry, and increment the iterator to the next entry */
3527 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3528 {
3529         iter->ent = __find_next_entry(iter, &iter->cpu,
3530                                       &iter->lost_events, &iter->ts);
3531
3532         if (iter->ent)
3533                 trace_iterator_increment(iter);
3534
3535         return iter->ent ? iter : NULL;
3536 }
3537
3538 static void trace_consume(struct trace_iterator *iter)
3539 {
3540         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3541                             &iter->lost_events);
3542 }
3543
3544 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3545 {
3546         struct trace_iterator *iter = m->private;
3547         int i = (int)*pos;
3548         void *ent;
3549
3550         WARN_ON_ONCE(iter->leftover);
3551
3552         (*pos)++;
3553
3554         /* can't go backwards */
3555         if (iter->idx > i)
3556                 return NULL;
3557
3558         if (iter->idx < 0)
3559                 ent = trace_find_next_entry_inc(iter);
3560         else
3561                 ent = iter;
3562
3563         while (ent && iter->idx < i)
3564                 ent = trace_find_next_entry_inc(iter);
3565
3566         iter->pos = *pos;
3567
3568         return ent;
3569 }
3570
3571 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3572 {
3573         struct ring_buffer_event *event;
3574         struct ring_buffer_iter *buf_iter;
3575         unsigned long entries = 0;
3576         u64 ts;
3577
3578         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3579
3580         buf_iter = trace_buffer_iter(iter, cpu);
3581         if (!buf_iter)
3582                 return;
3583
3584         ring_buffer_iter_reset(buf_iter);
3585
3586         /*
3587          * We could have the case with the max latency tracers
3588          * that a reset never took place on a cpu. This is evident
3589          * by the timestamp being before the start of the buffer.
3590          */
3591         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3592                 if (ts >= iter->array_buffer->time_start)
3593                         break;
3594                 entries++;
3595                 ring_buffer_iter_advance(buf_iter);
3596         }
3597
3598         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3599 }
3600
3601 /*
3602  * The current tracer is copied to avoid a global locking
3603  * all around.
3604  */
3605 static void *s_start(struct seq_file *m, loff_t *pos)
3606 {
3607         struct trace_iterator *iter = m->private;
3608         struct trace_array *tr = iter->tr;
3609         int cpu_file = iter->cpu_file;
3610         void *p = NULL;
3611         loff_t l = 0;
3612         int cpu;
3613
3614         /*
3615          * copy the tracer to avoid using a global lock all around.
3616          * iter->trace is a copy of current_trace, the pointer to the
3617          * name may be used instead of a strcmp(), as iter->trace->name
3618          * will point to the same string as current_trace->name.
3619          */
3620         mutex_lock(&trace_types_lock);
3621         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3622                 *iter->trace = *tr->current_trace;
3623         mutex_unlock(&trace_types_lock);
3624
3625 #ifdef CONFIG_TRACER_MAX_TRACE
3626         if (iter->snapshot && iter->trace->use_max_tr)
3627                 return ERR_PTR(-EBUSY);
3628 #endif
3629
3630         if (!iter->snapshot)
3631                 atomic_inc(&trace_record_taskinfo_disabled);
3632
3633         if (*pos != iter->pos) {
3634                 iter->ent = NULL;
3635                 iter->cpu = 0;
3636                 iter->idx = -1;
3637
3638                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3639                         for_each_tracing_cpu(cpu)
3640                                 tracing_iter_reset(iter, cpu);
3641                 } else
3642                         tracing_iter_reset(iter, cpu_file);
3643
3644                 iter->leftover = 0;
3645                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3646                         ;
3647
3648         } else {
3649                 /*
3650                  * If we overflowed the seq_file before, then we want
3651                  * to just reuse the trace_seq buffer again.
3652                  */
3653                 if (iter->leftover)
3654                         p = iter;
3655                 else {
3656                         l = *pos - 1;
3657                         p = s_next(m, p, &l);
3658                 }
3659         }
3660
3661         trace_event_read_lock();
3662         trace_access_lock(cpu_file);
3663         return p;
3664 }
3665
3666 static void s_stop(struct seq_file *m, void *p)
3667 {
3668         struct trace_iterator *iter = m->private;
3669
3670 #ifdef CONFIG_TRACER_MAX_TRACE
3671         if (iter->snapshot && iter->trace->use_max_tr)
3672                 return;
3673 #endif
3674
3675         if (!iter->snapshot)
3676                 atomic_dec(&trace_record_taskinfo_disabled);
3677
3678         trace_access_unlock(iter->cpu_file);
3679         trace_event_read_unlock();
3680 }
3681
3682 static void
3683 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3684                       unsigned long *entries, int cpu)
3685 {
3686         unsigned long count;
3687
3688         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3689         /*
3690          * If this buffer has skipped entries, then we hold all
3691          * entries for the trace and we need to ignore the
3692          * ones before the time stamp.
3693          */
3694         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3695                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3696                 /* total is the same as the entries */
3697                 *total = count;
3698         } else
3699                 *total = count +
3700                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3701         *entries = count;
3702 }
3703
3704 static void
3705 get_total_entries(struct array_buffer *buf,
3706                   unsigned long *total, unsigned long *entries)
3707 {
3708         unsigned long t, e;
3709         int cpu;
3710
3711         *total = 0;
3712         *entries = 0;
3713
3714         for_each_tracing_cpu(cpu) {
3715                 get_total_entries_cpu(buf, &t, &e, cpu);
3716                 *total += t;
3717                 *entries += e;
3718         }
3719 }
3720
3721 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3722 {
3723         unsigned long total, entries;
3724
3725         if (!tr)
3726                 tr = &global_trace;
3727
3728         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3729
3730         return entries;
3731 }
3732
3733 unsigned long trace_total_entries(struct trace_array *tr)
3734 {
3735         unsigned long total, entries;
3736
3737         if (!tr)
3738                 tr = &global_trace;
3739
3740         get_total_entries(&tr->array_buffer, &total, &entries);
3741
3742         return entries;
3743 }
3744
3745 static void print_lat_help_header(struct seq_file *m)
3746 {
3747         seq_puts(m, "#                  _------=> CPU#            \n"
3748                     "#                 / _-----=> irqs-off        \n"
3749                     "#                | / _----=> need-resched    \n"
3750                     "#                || / _---=> hardirq/softirq \n"
3751                     "#                ||| / _--=> preempt-depth   \n"
3752                     "#                |||| /     delay            \n"
3753                     "#  cmd     pid   ||||| time  |   caller      \n"
3754                     "#     \\   /      |||||  \\    |   /         \n");
3755 }
3756
3757 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3758 {
3759         unsigned long total;
3760         unsigned long entries;
3761
3762         get_total_entries(buf, &total, &entries);
3763         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3764                    entries, total, num_online_cpus());
3765         seq_puts(m, "#\n");
3766 }
3767
3768 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3769                                    unsigned int flags)
3770 {
3771         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3772
3773         print_event_info(buf, m);
3774
3775         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3776         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3777 }
3778
3779 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3780                                        unsigned int flags)
3781 {
3782         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3783         const char *space = "          ";
3784         int prec = tgid ? 10 : 2;
3785
3786         print_event_info(buf, m);
3787
3788         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3789         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3790         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3791         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3792         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3793         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3794         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3795 }
3796
3797 void
3798 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3799 {
3800         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3801         struct array_buffer *buf = iter->array_buffer;
3802         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3803         struct tracer *type = iter->trace;
3804         unsigned long entries;
3805         unsigned long total;
3806         const char *name = "preemption";
3807
3808         name = type->name;
3809
3810         get_total_entries(buf, &total, &entries);
3811
3812         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3813                    name, UTS_RELEASE);
3814         seq_puts(m, "# -----------------------------------"
3815                  "---------------------------------\n");
3816         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3817                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3818                    nsecs_to_usecs(data->saved_latency),
3819                    entries,
3820                    total,
3821                    buf->cpu,
3822 #if defined(CONFIG_PREEMPT_NONE)
3823                    "server",
3824 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3825                    "desktop",
3826 #elif defined(CONFIG_PREEMPT)
3827                    "preempt",
3828 #elif defined(CONFIG_PREEMPT_RT)
3829                    "preempt_rt",
3830 #else
3831                    "unknown",
3832 #endif
3833                    /* These are reserved for later use */
3834                    0, 0, 0, 0);
3835 #ifdef CONFIG_SMP
3836         seq_printf(m, " #P:%d)\n", num_online_cpus());
3837 #else
3838         seq_puts(m, ")\n");
3839 #endif
3840         seq_puts(m, "#    -----------------\n");
3841         seq_printf(m, "#    | task: %.16s-%d "
3842                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3843                    data->comm, data->pid,
3844                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3845                    data->policy, data->rt_priority);
3846         seq_puts(m, "#    -----------------\n");
3847
3848         if (data->critical_start) {
3849                 seq_puts(m, "#  => started at: ");
3850                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3851                 trace_print_seq(m, &iter->seq);
3852                 seq_puts(m, "\n#  => ended at:   ");
3853                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3854                 trace_print_seq(m, &iter->seq);
3855                 seq_puts(m, "\n#\n");
3856         }
3857
3858         seq_puts(m, "#\n");
3859 }
3860
3861 static void test_cpu_buff_start(struct trace_iterator *iter)
3862 {
3863         struct trace_seq *s = &iter->seq;
3864         struct trace_array *tr = iter->tr;
3865
3866         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3867                 return;
3868
3869         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3870                 return;
3871
3872         if (cpumask_available(iter->started) &&
3873             cpumask_test_cpu(iter->cpu, iter->started))
3874                 return;
3875
3876         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3877                 return;
3878
3879         if (cpumask_available(iter->started))
3880                 cpumask_set_cpu(iter->cpu, iter->started);
3881
3882         /* Don't print started cpu buffer for the first entry of the trace */
3883         if (iter->idx > 1)
3884                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3885                                 iter->cpu);
3886 }
3887
3888 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3889 {
3890         struct trace_array *tr = iter->tr;
3891         struct trace_seq *s = &iter->seq;
3892         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3893         struct trace_entry *entry;
3894         struct trace_event *event;
3895
3896         entry = iter->ent;
3897
3898         test_cpu_buff_start(iter);
3899
3900         event = ftrace_find_event(entry->type);
3901
3902         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3903                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3904                         trace_print_lat_context(iter);
3905                 else
3906                         trace_print_context(iter);
3907         }
3908
3909         if (trace_seq_has_overflowed(s))
3910                 return TRACE_TYPE_PARTIAL_LINE;
3911
3912         if (event)
3913                 return event->funcs->trace(iter, sym_flags, event);
3914
3915         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3916
3917         return trace_handle_return(s);
3918 }
3919
3920 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3921 {
3922         struct trace_array *tr = iter->tr;
3923         struct trace_seq *s = &iter->seq;
3924         struct trace_entry *entry;
3925         struct trace_event *event;
3926
3927         entry = iter->ent;
3928
3929         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3930                 trace_seq_printf(s, "%d %d %llu ",
3931                                  entry->pid, iter->cpu, iter->ts);
3932
3933         if (trace_seq_has_overflowed(s))
3934                 return TRACE_TYPE_PARTIAL_LINE;
3935
3936         event = ftrace_find_event(entry->type);
3937         if (event)
3938                 return event->funcs->raw(iter, 0, event);
3939
3940         trace_seq_printf(s, "%d ?\n", entry->type);
3941
3942         return trace_handle_return(s);
3943 }
3944
3945 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3946 {
3947         struct trace_array *tr = iter->tr;
3948         struct trace_seq *s = &iter->seq;
3949         unsigned char newline = '\n';
3950         struct trace_entry *entry;
3951         struct trace_event *event;
3952
3953         entry = iter->ent;
3954
3955         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3956                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3957                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3958                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3959                 if (trace_seq_has_overflowed(s))
3960                         return TRACE_TYPE_PARTIAL_LINE;
3961         }
3962
3963         event = ftrace_find_event(entry->type);
3964         if (event) {
3965                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3966                 if (ret != TRACE_TYPE_HANDLED)
3967                         return ret;
3968         }
3969
3970         SEQ_PUT_FIELD(s, newline);
3971
3972         return trace_handle_return(s);
3973 }
3974
3975 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3976 {
3977         struct trace_array *tr = iter->tr;
3978         struct trace_seq *s = &iter->seq;
3979         struct trace_entry *entry;
3980         struct trace_event *event;
3981
3982         entry = iter->ent;
3983
3984         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3985                 SEQ_PUT_FIELD(s, entry->pid);
3986                 SEQ_PUT_FIELD(s, iter->cpu);
3987                 SEQ_PUT_FIELD(s, iter->ts);
3988                 if (trace_seq_has_overflowed(s))
3989                         return TRACE_TYPE_PARTIAL_LINE;
3990         }
3991
3992         event = ftrace_find_event(entry->type);
3993         return event ? event->funcs->binary(iter, 0, event) :
3994                 TRACE_TYPE_HANDLED;
3995 }
3996
3997 int trace_empty(struct trace_iterator *iter)
3998 {
3999         struct ring_buffer_iter *buf_iter;
4000         int cpu;
4001
4002         /* If we are looking at one CPU buffer, only check that one */
4003         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4004                 cpu = iter->cpu_file;
4005                 buf_iter = trace_buffer_iter(iter, cpu);
4006                 if (buf_iter) {
4007                         if (!ring_buffer_iter_empty(buf_iter))
4008                                 return 0;
4009                 } else {
4010                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4011                                 return 0;
4012                 }
4013                 return 1;
4014         }
4015
4016         for_each_tracing_cpu(cpu) {
4017                 buf_iter = trace_buffer_iter(iter, cpu);
4018                 if (buf_iter) {
4019                         if (!ring_buffer_iter_empty(buf_iter))
4020                                 return 0;
4021                 } else {
4022                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4023                                 return 0;
4024                 }
4025         }
4026
4027         return 1;
4028 }
4029
4030 /*  Called with trace_event_read_lock() held. */
4031 enum print_line_t print_trace_line(struct trace_iterator *iter)
4032 {
4033         struct trace_array *tr = iter->tr;
4034         unsigned long trace_flags = tr->trace_flags;
4035         enum print_line_t ret;
4036
4037         if (iter->lost_events) {
4038                 if (iter->lost_events == (unsigned long)-1)
4039                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4040                                          iter->cpu);
4041                 else
4042                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4043                                          iter->cpu, iter->lost_events);
4044                 if (trace_seq_has_overflowed(&iter->seq))
4045                         return TRACE_TYPE_PARTIAL_LINE;
4046         }
4047
4048         if (iter->trace && iter->trace->print_line) {
4049                 ret = iter->trace->print_line(iter);
4050                 if (ret != TRACE_TYPE_UNHANDLED)
4051                         return ret;
4052         }
4053
4054         if (iter->ent->type == TRACE_BPUTS &&
4055                         trace_flags & TRACE_ITER_PRINTK &&
4056                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4057                 return trace_print_bputs_msg_only(iter);
4058
4059         if (iter->ent->type == TRACE_BPRINT &&
4060                         trace_flags & TRACE_ITER_PRINTK &&
4061                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4062                 return trace_print_bprintk_msg_only(iter);
4063
4064         if (iter->ent->type == TRACE_PRINT &&
4065                         trace_flags & TRACE_ITER_PRINTK &&
4066                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4067                 return trace_print_printk_msg_only(iter);
4068
4069         if (trace_flags & TRACE_ITER_BIN)
4070                 return print_bin_fmt(iter);
4071
4072         if (trace_flags & TRACE_ITER_HEX)
4073                 return print_hex_fmt(iter);
4074
4075         if (trace_flags & TRACE_ITER_RAW)
4076                 return print_raw_fmt(iter);
4077
4078         return print_trace_fmt(iter);
4079 }
4080
4081 void trace_latency_header(struct seq_file *m)
4082 {
4083         struct trace_iterator *iter = m->private;
4084         struct trace_array *tr = iter->tr;
4085
4086         /* print nothing if the buffers are empty */
4087         if (trace_empty(iter))
4088                 return;
4089
4090         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4091                 print_trace_header(m, iter);
4092
4093         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4094                 print_lat_help_header(m);
4095 }
4096
4097 void trace_default_header(struct seq_file *m)
4098 {
4099         struct trace_iterator *iter = m->private;
4100         struct trace_array *tr = iter->tr;
4101         unsigned long trace_flags = tr->trace_flags;
4102
4103         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4104                 return;
4105
4106         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4107                 /* print nothing if the buffers are empty */
4108                 if (trace_empty(iter))
4109                         return;
4110                 print_trace_header(m, iter);
4111                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4112                         print_lat_help_header(m);
4113         } else {
4114                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4115                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4116                                 print_func_help_header_irq(iter->array_buffer,
4117                                                            m, trace_flags);
4118                         else
4119                                 print_func_help_header(iter->array_buffer, m,
4120                                                        trace_flags);
4121                 }
4122         }
4123 }
4124
4125 static void test_ftrace_alive(struct seq_file *m)
4126 {
4127         if (!ftrace_is_dead())
4128                 return;
4129         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4130                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4131 }
4132
4133 #ifdef CONFIG_TRACER_MAX_TRACE
4134 static void show_snapshot_main_help(struct seq_file *m)
4135 {
4136         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4137                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4138                     "#                      Takes a snapshot of the main buffer.\n"
4139                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4140                     "#                      (Doesn't have to be '2' works with any number that\n"
4141                     "#                       is not a '0' or '1')\n");
4142 }
4143
4144 static void show_snapshot_percpu_help(struct seq_file *m)
4145 {
4146         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4147 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4148         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4149                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4150 #else
4151         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4152                     "#                     Must use main snapshot file to allocate.\n");
4153 #endif
4154         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4155                     "#                      (Doesn't have to be '2' works with any number that\n"
4156                     "#                       is not a '0' or '1')\n");
4157 }
4158
4159 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4160 {
4161         if (iter->tr->allocated_snapshot)
4162                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4163         else
4164                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4165
4166         seq_puts(m, "# Snapshot commands:\n");
4167         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4168                 show_snapshot_main_help(m);
4169         else
4170                 show_snapshot_percpu_help(m);
4171 }
4172 #else
4173 /* Should never be called */
4174 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4175 #endif
4176
4177 static int s_show(struct seq_file *m, void *v)
4178 {
4179         struct trace_iterator *iter = v;
4180         int ret;
4181
4182         if (iter->ent == NULL) {
4183                 if (iter->tr) {
4184                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4185                         seq_puts(m, "#\n");
4186                         test_ftrace_alive(m);
4187                 }
4188                 if (iter->snapshot && trace_empty(iter))
4189                         print_snapshot_help(m, iter);
4190                 else if (iter->trace && iter->trace->print_header)
4191                         iter->trace->print_header(m);
4192                 else
4193                         trace_default_header(m);
4194
4195         } else if (iter->leftover) {
4196                 /*
4197                  * If we filled the seq_file buffer earlier, we
4198                  * want to just show it now.
4199                  */
4200                 ret = trace_print_seq(m, &iter->seq);
4201
4202                 /* ret should this time be zero, but you never know */
4203                 iter->leftover = ret;
4204
4205         } else {
4206                 print_trace_line(iter);
4207                 ret = trace_print_seq(m, &iter->seq);
4208                 /*
4209                  * If we overflow the seq_file buffer, then it will
4210                  * ask us for this data again at start up.
4211                  * Use that instead.
4212                  *  ret is 0 if seq_file write succeeded.
4213                  *        -1 otherwise.
4214                  */
4215                 iter->leftover = ret;
4216         }
4217
4218         return 0;
4219 }
4220
4221 /*
4222  * Should be used after trace_array_get(), trace_types_lock
4223  * ensures that i_cdev was already initialized.
4224  */
4225 static inline int tracing_get_cpu(struct inode *inode)
4226 {
4227         if (inode->i_cdev) /* See trace_create_cpu_file() */
4228                 return (long)inode->i_cdev - 1;
4229         return RING_BUFFER_ALL_CPUS;
4230 }
4231
4232 static const struct seq_operations tracer_seq_ops = {
4233         .start          = s_start,
4234         .next           = s_next,
4235         .stop           = s_stop,
4236         .show           = s_show,
4237 };
4238
4239 static struct trace_iterator *
4240 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4241 {
4242         struct trace_array *tr = inode->i_private;
4243         struct trace_iterator *iter;
4244         int cpu;
4245
4246         if (tracing_disabled)
4247                 return ERR_PTR(-ENODEV);
4248
4249         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4250         if (!iter)
4251                 return ERR_PTR(-ENOMEM);
4252
4253         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4254                                     GFP_KERNEL);
4255         if (!iter->buffer_iter)
4256                 goto release;
4257
4258         /*
4259          * trace_find_next_entry() may need to save off iter->ent.
4260          * It will place it into the iter->temp buffer. As most
4261          * events are less than 128, allocate a buffer of that size.
4262          * If one is greater, then trace_find_next_entry() will
4263          * allocate a new buffer to adjust for the bigger iter->ent.
4264          * It's not critical if it fails to get allocated here.
4265          */
4266         iter->temp = kmalloc(128, GFP_KERNEL);
4267         if (iter->temp)
4268                 iter->temp_size = 128;
4269
4270         /*
4271          * We make a copy of the current tracer to avoid concurrent
4272          * changes on it while we are reading.
4273          */
4274         mutex_lock(&trace_types_lock);
4275         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4276         if (!iter->trace)
4277                 goto fail;
4278
4279         *iter->trace = *tr->current_trace;
4280
4281         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4282                 goto fail;
4283
4284         iter->tr = tr;
4285
4286 #ifdef CONFIG_TRACER_MAX_TRACE
4287         /* Currently only the top directory has a snapshot */
4288         if (tr->current_trace->print_max || snapshot)
4289                 iter->array_buffer = &tr->max_buffer;
4290         else
4291 #endif
4292                 iter->array_buffer = &tr->array_buffer;
4293         iter->snapshot = snapshot;
4294         iter->pos = -1;
4295         iter->cpu_file = tracing_get_cpu(inode);
4296         mutex_init(&iter->mutex);
4297
4298         /* Notify the tracer early; before we stop tracing. */
4299         if (iter->trace->open)
4300                 iter->trace->open(iter);
4301
4302         /* Annotate start of buffers if we had overruns */
4303         if (ring_buffer_overruns(iter->array_buffer->buffer))
4304                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4305
4306         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4307         if (trace_clocks[tr->clock_id].in_ns)
4308                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4309
4310         /*
4311          * If pause-on-trace is enabled, then stop the trace while
4312          * dumping, unless this is the "snapshot" file
4313          */
4314         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4315                 tracing_stop_tr(tr);
4316
4317         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4318                 for_each_tracing_cpu(cpu) {
4319                         iter->buffer_iter[cpu] =
4320                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4321                                                          cpu, GFP_KERNEL);
4322                 }
4323                 ring_buffer_read_prepare_sync();
4324                 for_each_tracing_cpu(cpu) {
4325                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4326                         tracing_iter_reset(iter, cpu);
4327                 }
4328         } else {
4329                 cpu = iter->cpu_file;
4330                 iter->buffer_iter[cpu] =
4331                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4332                                                  cpu, GFP_KERNEL);
4333                 ring_buffer_read_prepare_sync();
4334                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4335                 tracing_iter_reset(iter, cpu);
4336         }
4337
4338         mutex_unlock(&trace_types_lock);
4339
4340         return iter;
4341
4342  fail:
4343         mutex_unlock(&trace_types_lock);
4344         kfree(iter->trace);
4345         kfree(iter->temp);
4346         kfree(iter->buffer_iter);
4347 release:
4348         seq_release_private(inode, file);
4349         return ERR_PTR(-ENOMEM);
4350 }
4351
4352 int tracing_open_generic(struct inode *inode, struct file *filp)
4353 {
4354         int ret;
4355
4356         ret = tracing_check_open_get_tr(NULL);
4357         if (ret)
4358                 return ret;
4359
4360         filp->private_data = inode->i_private;
4361         return 0;
4362 }
4363
4364 bool tracing_is_disabled(void)
4365 {
4366         return (tracing_disabled) ? true: false;
4367 }
4368
4369 /*
4370  * Open and update trace_array ref count.
4371  * Must have the current trace_array passed to it.
4372  */
4373 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4374 {
4375         struct trace_array *tr = inode->i_private;
4376         int ret;
4377
4378         ret = tracing_check_open_get_tr(tr);
4379         if (ret)
4380                 return ret;
4381
4382         filp->private_data = inode->i_private;
4383
4384         return 0;
4385 }
4386
4387 static int tracing_release(struct inode *inode, struct file *file)
4388 {
4389         struct trace_array *tr = inode->i_private;
4390         struct seq_file *m = file->private_data;
4391         struct trace_iterator *iter;
4392         int cpu;
4393
4394         if (!(file->f_mode & FMODE_READ)) {
4395                 trace_array_put(tr);
4396                 return 0;
4397         }
4398
4399         /* Writes do not use seq_file */
4400         iter = m->private;
4401         mutex_lock(&trace_types_lock);
4402
4403         for_each_tracing_cpu(cpu) {
4404                 if (iter->buffer_iter[cpu])
4405                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4406         }
4407
4408         if (iter->trace && iter->trace->close)
4409                 iter->trace->close(iter);
4410
4411         if (!iter->snapshot && tr->stop_count)
4412                 /* reenable tracing if it was previously enabled */
4413                 tracing_start_tr(tr);
4414
4415         __trace_array_put(tr);
4416
4417         mutex_unlock(&trace_types_lock);
4418
4419         mutex_destroy(&iter->mutex);
4420         free_cpumask_var(iter->started);
4421         kfree(iter->temp);
4422         kfree(iter->trace);
4423         kfree(iter->buffer_iter);
4424         seq_release_private(inode, file);
4425
4426         return 0;
4427 }
4428
4429 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4430 {
4431         struct trace_array *tr = inode->i_private;
4432
4433         trace_array_put(tr);
4434         return 0;
4435 }
4436
4437 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4438 {
4439         struct trace_array *tr = inode->i_private;
4440
4441         trace_array_put(tr);
4442
4443         return single_release(inode, file);
4444 }
4445
4446 static int tracing_open(struct inode *inode, struct file *file)
4447 {
4448         struct trace_array *tr = inode->i_private;
4449         struct trace_iterator *iter;
4450         int ret;
4451
4452         ret = tracing_check_open_get_tr(tr);
4453         if (ret)
4454                 return ret;
4455
4456         /* If this file was open for write, then erase contents */
4457         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4458                 int cpu = tracing_get_cpu(inode);
4459                 struct array_buffer *trace_buf = &tr->array_buffer;
4460
4461 #ifdef CONFIG_TRACER_MAX_TRACE
4462                 if (tr->current_trace->print_max)
4463                         trace_buf = &tr->max_buffer;
4464 #endif
4465
4466                 if (cpu == RING_BUFFER_ALL_CPUS)
4467                         tracing_reset_online_cpus(trace_buf);
4468                 else
4469                         tracing_reset_cpu(trace_buf, cpu);
4470         }
4471
4472         if (file->f_mode & FMODE_READ) {
4473                 iter = __tracing_open(inode, file, false);
4474                 if (IS_ERR(iter))
4475                         ret = PTR_ERR(iter);
4476                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4477                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4478         }
4479
4480         if (ret < 0)
4481                 trace_array_put(tr);
4482
4483         return ret;
4484 }
4485
4486 /*
4487  * Some tracers are not suitable for instance buffers.
4488  * A tracer is always available for the global array (toplevel)
4489  * or if it explicitly states that it is.
4490  */
4491 static bool
4492 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4493 {
4494         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4495 }
4496
4497 /* Find the next tracer that this trace array may use */
4498 static struct tracer *
4499 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4500 {
4501         while (t && !trace_ok_for_array(t, tr))
4502                 t = t->next;
4503
4504         return t;
4505 }
4506
4507 static void *
4508 t_next(struct seq_file *m, void *v, loff_t *pos)
4509 {
4510         struct trace_array *tr = m->private;
4511         struct tracer *t = v;
4512
4513         (*pos)++;
4514
4515         if (t)
4516                 t = get_tracer_for_array(tr, t->next);
4517
4518         return t;
4519 }
4520
4521 static void *t_start(struct seq_file *m, loff_t *pos)
4522 {
4523         struct trace_array *tr = m->private;
4524         struct tracer *t;
4525         loff_t l = 0;
4526
4527         mutex_lock(&trace_types_lock);
4528
4529         t = get_tracer_for_array(tr, trace_types);
4530         for (; t && l < *pos; t = t_next(m, t, &l))
4531                         ;
4532
4533         return t;
4534 }
4535
4536 static void t_stop(struct seq_file *m, void *p)
4537 {
4538         mutex_unlock(&trace_types_lock);
4539 }
4540
4541 static int t_show(struct seq_file *m, void *v)
4542 {
4543         struct tracer *t = v;
4544
4545         if (!t)
4546                 return 0;
4547
4548         seq_puts(m, t->name);
4549         if (t->next)
4550                 seq_putc(m, ' ');
4551         else
4552                 seq_putc(m, '\n');
4553
4554         return 0;
4555 }
4556
4557 static const struct seq_operations show_traces_seq_ops = {
4558         .start          = t_start,
4559         .next           = t_next,
4560         .stop           = t_stop,
4561         .show           = t_show,
4562 };
4563
4564 static int show_traces_open(struct inode *inode, struct file *file)
4565 {
4566         struct trace_array *tr = inode->i_private;
4567         struct seq_file *m;
4568         int ret;
4569
4570         ret = tracing_check_open_get_tr(tr);
4571         if (ret)
4572                 return ret;
4573
4574         ret = seq_open(file, &show_traces_seq_ops);
4575         if (ret) {
4576                 trace_array_put(tr);
4577                 return ret;
4578         }
4579
4580         m = file->private_data;
4581         m->private = tr;
4582
4583         return 0;
4584 }
4585
4586 static int show_traces_release(struct inode *inode, struct file *file)
4587 {
4588         struct trace_array *tr = inode->i_private;
4589
4590         trace_array_put(tr);
4591         return seq_release(inode, file);
4592 }
4593
4594 static ssize_t
4595 tracing_write_stub(struct file *filp, const char __user *ubuf,
4596                    size_t count, loff_t *ppos)
4597 {
4598         return count;
4599 }
4600
4601 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4602 {
4603         int ret;
4604
4605         if (file->f_mode & FMODE_READ)
4606                 ret = seq_lseek(file, offset, whence);
4607         else
4608                 file->f_pos = ret = 0;
4609
4610         return ret;
4611 }
4612
4613 static const struct file_operations tracing_fops = {
4614         .open           = tracing_open,
4615         .read           = seq_read,
4616         .write          = tracing_write_stub,
4617         .llseek         = tracing_lseek,
4618         .release        = tracing_release,
4619 };
4620
4621 static const struct file_operations show_traces_fops = {
4622         .open           = show_traces_open,
4623         .read           = seq_read,
4624         .llseek         = seq_lseek,
4625         .release        = show_traces_release,
4626 };
4627
4628 static ssize_t
4629 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4630                      size_t count, loff_t *ppos)
4631 {
4632         struct trace_array *tr = file_inode(filp)->i_private;
4633         char *mask_str;
4634         int len;
4635
4636         len = snprintf(NULL, 0, "%*pb\n",
4637                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4638         mask_str = kmalloc(len, GFP_KERNEL);
4639         if (!mask_str)
4640                 return -ENOMEM;
4641
4642         len = snprintf(mask_str, len, "%*pb\n",
4643                        cpumask_pr_args(tr->tracing_cpumask));
4644         if (len >= count) {
4645                 count = -EINVAL;
4646                 goto out_err;
4647         }
4648         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4649
4650 out_err:
4651         kfree(mask_str);
4652
4653         return count;
4654 }
4655
4656 int tracing_set_cpumask(struct trace_array *tr,
4657                         cpumask_var_t tracing_cpumask_new)
4658 {
4659         int cpu;
4660
4661         if (!tr)
4662                 return -EINVAL;
4663
4664         local_irq_disable();
4665         arch_spin_lock(&tr->max_lock);
4666         for_each_tracing_cpu(cpu) {
4667                 /*
4668                  * Increase/decrease the disabled counter if we are
4669                  * about to flip a bit in the cpumask:
4670                  */
4671                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4672                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4673                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4674                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4675                 }
4676                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4677                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4678                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4679                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4680                 }
4681         }
4682         arch_spin_unlock(&tr->max_lock);
4683         local_irq_enable();
4684
4685         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4686
4687         return 0;
4688 }
4689
4690 static ssize_t
4691 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4692                       size_t count, loff_t *ppos)
4693 {
4694         struct trace_array *tr = file_inode(filp)->i_private;
4695         cpumask_var_t tracing_cpumask_new;
4696         int err;
4697
4698         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4699                 return -ENOMEM;
4700
4701         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4702         if (err)
4703                 goto err_free;
4704
4705         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4706         if (err)
4707                 goto err_free;
4708
4709         free_cpumask_var(tracing_cpumask_new);
4710
4711         return count;
4712
4713 err_free:
4714         free_cpumask_var(tracing_cpumask_new);
4715
4716         return err;
4717 }
4718
4719 static const struct file_operations tracing_cpumask_fops = {
4720         .open           = tracing_open_generic_tr,
4721         .read           = tracing_cpumask_read,
4722         .write          = tracing_cpumask_write,
4723         .release        = tracing_release_generic_tr,
4724         .llseek         = generic_file_llseek,
4725 };
4726
4727 static int tracing_trace_options_show(struct seq_file *m, void *v)
4728 {
4729         struct tracer_opt *trace_opts;
4730         struct trace_array *tr = m->private;
4731         u32 tracer_flags;
4732         int i;
4733
4734         mutex_lock(&trace_types_lock);
4735         tracer_flags = tr->current_trace->flags->val;
4736         trace_opts = tr->current_trace->flags->opts;
4737
4738         for (i = 0; trace_options[i]; i++) {
4739                 if (tr->trace_flags & (1 << i))
4740                         seq_printf(m, "%s\n", trace_options[i]);
4741                 else
4742                         seq_printf(m, "no%s\n", trace_options[i]);
4743         }
4744
4745         for (i = 0; trace_opts[i].name; i++) {
4746                 if (tracer_flags & trace_opts[i].bit)
4747                         seq_printf(m, "%s\n", trace_opts[i].name);
4748                 else
4749                         seq_printf(m, "no%s\n", trace_opts[i].name);
4750         }
4751         mutex_unlock(&trace_types_lock);
4752
4753         return 0;
4754 }
4755
4756 static int __set_tracer_option(struct trace_array *tr,
4757                                struct tracer_flags *tracer_flags,
4758                                struct tracer_opt *opts, int neg)
4759 {
4760         struct tracer *trace = tracer_flags->trace;
4761         int ret;
4762
4763         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4764         if (ret)
4765                 return ret;
4766
4767         if (neg)
4768                 tracer_flags->val &= ~opts->bit;
4769         else
4770                 tracer_flags->val |= opts->bit;
4771         return 0;
4772 }
4773
4774 /* Try to assign a tracer specific option */
4775 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4776 {
4777         struct tracer *trace = tr->current_trace;
4778         struct tracer_flags *tracer_flags = trace->flags;
4779         struct tracer_opt *opts = NULL;
4780         int i;
4781
4782         for (i = 0; tracer_flags->opts[i].name; i++) {
4783                 opts = &tracer_flags->opts[i];
4784
4785                 if (strcmp(cmp, opts->name) == 0)
4786                         return __set_tracer_option(tr, trace->flags, opts, neg);
4787         }
4788
4789         return -EINVAL;
4790 }
4791
4792 /* Some tracers require overwrite to stay enabled */
4793 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4794 {
4795         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4796                 return -1;
4797
4798         return 0;
4799 }
4800
4801 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4802 {
4803         if ((mask == TRACE_ITER_RECORD_TGID) ||
4804             (mask == TRACE_ITER_RECORD_CMD))
4805                 lockdep_assert_held(&event_mutex);
4806
4807         /* do nothing if flag is already set */
4808         if (!!(tr->trace_flags & mask) == !!enabled)
4809                 return 0;
4810
4811         /* Give the tracer a chance to approve the change */
4812         if (tr->current_trace->flag_changed)
4813                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4814                         return -EINVAL;
4815
4816         if (enabled)
4817                 tr->trace_flags |= mask;
4818         else
4819                 tr->trace_flags &= ~mask;
4820
4821         if (mask == TRACE_ITER_RECORD_CMD)
4822                 trace_event_enable_cmd_record(enabled);
4823
4824         if (mask == TRACE_ITER_RECORD_TGID) {
4825                 if (!tgid_map)
4826                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4827                                            sizeof(*tgid_map),
4828                                            GFP_KERNEL);
4829                 if (!tgid_map) {
4830                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4831                         return -ENOMEM;
4832                 }
4833
4834                 trace_event_enable_tgid_record(enabled);
4835         }
4836
4837         if (mask == TRACE_ITER_EVENT_FORK)
4838                 trace_event_follow_fork(tr, enabled);
4839
4840         if (mask == TRACE_ITER_FUNC_FORK)
4841                 ftrace_pid_follow_fork(tr, enabled);
4842
4843         if (mask == TRACE_ITER_OVERWRITE) {
4844                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4845 #ifdef CONFIG_TRACER_MAX_TRACE
4846                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4847 #endif
4848         }
4849
4850         if (mask == TRACE_ITER_PRINTK) {
4851                 trace_printk_start_stop_comm(enabled);
4852                 trace_printk_control(enabled);
4853         }
4854
4855         return 0;
4856 }
4857
4858 int trace_set_options(struct trace_array *tr, char *option)
4859 {
4860         char *cmp;
4861         int neg = 0;
4862         int ret;
4863         size_t orig_len = strlen(option);
4864         int len;
4865
4866         cmp = strstrip(option);
4867
4868         len = str_has_prefix(cmp, "no");
4869         if (len)
4870                 neg = 1;
4871
4872         cmp += len;
4873
4874         mutex_lock(&event_mutex);
4875         mutex_lock(&trace_types_lock);
4876
4877         ret = match_string(trace_options, -1, cmp);
4878         /* If no option could be set, test the specific tracer options */
4879         if (ret < 0)
4880                 ret = set_tracer_option(tr, cmp, neg);
4881         else
4882                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4883
4884         mutex_unlock(&trace_types_lock);
4885         mutex_unlock(&event_mutex);
4886
4887         /*
4888          * If the first trailing whitespace is replaced with '\0' by strstrip,
4889          * turn it back into a space.
4890          */
4891         if (orig_len > strlen(option))
4892                 option[strlen(option)] = ' ';
4893
4894         return ret;
4895 }
4896
4897 static void __init apply_trace_boot_options(void)
4898 {
4899         char *buf = trace_boot_options_buf;
4900         char *option;
4901
4902         while (true) {
4903                 option = strsep(&buf, ",");
4904
4905                 if (!option)
4906                         break;
4907
4908                 if (*option)
4909                         trace_set_options(&global_trace, option);
4910
4911                 /* Put back the comma to allow this to be called again */
4912                 if (buf)
4913                         *(buf - 1) = ',';
4914         }
4915 }
4916
4917 static ssize_t
4918 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4919                         size_t cnt, loff_t *ppos)
4920 {
4921         struct seq_file *m = filp->private_data;
4922         struct trace_array *tr = m->private;
4923         char buf[64];
4924         int ret;
4925
4926         if (cnt >= sizeof(buf))
4927                 return -EINVAL;
4928
4929         if (copy_from_user(buf, ubuf, cnt))
4930                 return -EFAULT;
4931
4932         buf[cnt] = 0;
4933
4934         ret = trace_set_options(tr, buf);
4935         if (ret < 0)
4936                 return ret;
4937
4938         *ppos += cnt;
4939
4940         return cnt;
4941 }
4942
4943 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946         int ret;
4947
4948         ret = tracing_check_open_get_tr(tr);
4949         if (ret)
4950                 return ret;
4951
4952         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4953         if (ret < 0)
4954                 trace_array_put(tr);
4955
4956         return ret;
4957 }
4958
4959 static const struct file_operations tracing_iter_fops = {
4960         .open           = tracing_trace_options_open,
4961         .read           = seq_read,
4962         .llseek         = seq_lseek,
4963         .release        = tracing_single_release_tr,
4964         .write          = tracing_trace_options_write,
4965 };
4966
4967 static const char readme_msg[] =
4968         "tracing mini-HOWTO:\n\n"
4969         "# echo 0 > tracing_on : quick way to disable tracing\n"
4970         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4971         " Important files:\n"
4972         "  trace\t\t\t- The static contents of the buffer\n"
4973         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4974         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4975         "  current_tracer\t- function and latency tracers\n"
4976         "  available_tracers\t- list of configured tracers for current_tracer\n"
4977         "  error_log\t- error log for failed commands (that support it)\n"
4978         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4979         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4980         "  trace_clock\t\t-change the clock used to order events\n"
4981         "       local:   Per cpu clock but may not be synced across CPUs\n"
4982         "      global:   Synced across CPUs but slows tracing down.\n"
4983         "     counter:   Not a clock, but just an increment\n"
4984         "      uptime:   Jiffy counter from time of boot\n"
4985         "        perf:   Same clock that perf events use\n"
4986 #ifdef CONFIG_X86_64
4987         "     x86-tsc:   TSC cycle counter\n"
4988 #endif
4989         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4990         "       delta:   Delta difference against a buffer-wide timestamp\n"
4991         "    absolute:   Absolute (standalone) timestamp\n"
4992         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4993         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4994         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4995         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4996         "\t\t\t  Remove sub-buffer with rmdir\n"
4997         "  trace_options\t\t- Set format or modify how tracing happens\n"
4998         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4999         "\t\t\t  option name\n"
5000         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5001 #ifdef CONFIG_DYNAMIC_FTRACE
5002         "\n  available_filter_functions - list of functions that can be filtered on\n"
5003         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5004         "\t\t\t  functions\n"
5005         "\t     accepts: func_full_name or glob-matching-pattern\n"
5006         "\t     modules: Can select a group via module\n"
5007         "\t      Format: :mod:<module-name>\n"
5008         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5009         "\t    triggers: a command to perform when function is hit\n"
5010         "\t      Format: <function>:<trigger>[:count]\n"
5011         "\t     trigger: traceon, traceoff\n"
5012         "\t\t      enable_event:<system>:<event>\n"
5013         "\t\t      disable_event:<system>:<event>\n"
5014 #ifdef CONFIG_STACKTRACE
5015         "\t\t      stacktrace\n"
5016 #endif
5017 #ifdef CONFIG_TRACER_SNAPSHOT
5018         "\t\t      snapshot\n"
5019 #endif
5020         "\t\t      dump\n"
5021         "\t\t      cpudump\n"
5022         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5023         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5024         "\t     The first one will disable tracing every time do_fault is hit\n"
5025         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5026         "\t       The first time do trap is hit and it disables tracing, the\n"
5027         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5028         "\t       the counter will not decrement. It only decrements when the\n"
5029         "\t       trigger did work\n"
5030         "\t     To remove trigger without count:\n"
5031         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5032         "\t     To remove trigger with a count:\n"
5033         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5034         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5035         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5036         "\t    modules: Can select a group via module command :mod:\n"
5037         "\t    Does not accept triggers\n"
5038 #endif /* CONFIG_DYNAMIC_FTRACE */
5039 #ifdef CONFIG_FUNCTION_TRACER
5040         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5041         "\t\t    (function)\n"
5042         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5043         "\t\t    (function)\n"
5044 #endif
5045 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5046         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5047         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5048         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5049 #endif
5050 #ifdef CONFIG_TRACER_SNAPSHOT
5051         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5052         "\t\t\t  snapshot buffer. Read the contents for more\n"
5053         "\t\t\t  information\n"
5054 #endif
5055 #ifdef CONFIG_STACK_TRACER
5056         "  stack_trace\t\t- Shows the max stack trace when active\n"
5057         "  stack_max_size\t- Shows current max stack size that was traced\n"
5058         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5059         "\t\t\t  new trace)\n"
5060 #ifdef CONFIG_DYNAMIC_FTRACE
5061         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5062         "\t\t\t  traces\n"
5063 #endif
5064 #endif /* CONFIG_STACK_TRACER */
5065 #ifdef CONFIG_DYNAMIC_EVENTS
5066         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5067         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5068 #endif
5069 #ifdef CONFIG_KPROBE_EVENTS
5070         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5071         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5072 #endif
5073 #ifdef CONFIG_UPROBE_EVENTS
5074         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5075         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5076 #endif
5077 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5078         "\t  accepts: event-definitions (one definition per line)\n"
5079         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5080         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5081 #ifdef CONFIG_HIST_TRIGGERS
5082         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5083 #endif
5084         "\t           -:[<group>/]<event>\n"
5085 #ifdef CONFIG_KPROBE_EVENTS
5086         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5087   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5088 #endif
5089 #ifdef CONFIG_UPROBE_EVENTS
5090   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5091 #endif
5092         "\t     args: <name>=fetcharg[:type]\n"
5093         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5094 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5095         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5096 #else
5097         "\t           $stack<index>, $stack, $retval, $comm,\n"
5098 #endif
5099         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5100         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5101         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5102         "\t           <type>\\[<array-size>\\]\n"
5103 #ifdef CONFIG_HIST_TRIGGERS
5104         "\t    field: <stype> <name>;\n"
5105         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5106         "\t           [unsigned] char/int/long\n"
5107 #endif
5108 #endif
5109         "  events/\t\t- Directory containing all trace event subsystems:\n"
5110         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5111         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5112         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5113         "\t\t\t  events\n"
5114         "      filter\t\t- If set, only events passing filter are traced\n"
5115         "  events/<system>/<event>/\t- Directory containing control files for\n"
5116         "\t\t\t  <event>:\n"
5117         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5118         "      filter\t\t- If set, only events passing filter are traced\n"
5119         "      trigger\t\t- If set, a command to perform when event is hit\n"
5120         "\t    Format: <trigger>[:count][if <filter>]\n"
5121         "\t   trigger: traceon, traceoff\n"
5122         "\t            enable_event:<system>:<event>\n"
5123         "\t            disable_event:<system>:<event>\n"
5124 #ifdef CONFIG_HIST_TRIGGERS
5125         "\t            enable_hist:<system>:<event>\n"
5126         "\t            disable_hist:<system>:<event>\n"
5127 #endif
5128 #ifdef CONFIG_STACKTRACE
5129         "\t\t    stacktrace\n"
5130 #endif
5131 #ifdef CONFIG_TRACER_SNAPSHOT
5132         "\t\t    snapshot\n"
5133 #endif
5134 #ifdef CONFIG_HIST_TRIGGERS
5135         "\t\t    hist (see below)\n"
5136 #endif
5137         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5138         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5139         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5140         "\t                  events/block/block_unplug/trigger\n"
5141         "\t   The first disables tracing every time block_unplug is hit.\n"
5142         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5143         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5144         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5145         "\t   Like function triggers, the counter is only decremented if it\n"
5146         "\t    enabled or disabled tracing.\n"
5147         "\t   To remove a trigger without a count:\n"
5148         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5149         "\t   To remove a trigger with a count:\n"
5150         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5151         "\t   Filters can be ignored when removing a trigger.\n"
5152 #ifdef CONFIG_HIST_TRIGGERS
5153         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5154         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5155         "\t            [:values=<field1[,field2,...]>]\n"
5156         "\t            [:sort=<field1[,field2,...]>]\n"
5157         "\t            [:size=#entries]\n"
5158         "\t            [:pause][:continue][:clear]\n"
5159         "\t            [:name=histname1]\n"
5160         "\t            [:<handler>.<action>]\n"
5161         "\t            [if <filter>]\n\n"
5162         "\t    When a matching event is hit, an entry is added to a hash\n"
5163         "\t    table using the key(s) and value(s) named, and the value of a\n"
5164         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5165         "\t    correspond to fields in the event's format description.  Keys\n"
5166         "\t    can be any field, or the special string 'stacktrace'.\n"
5167         "\t    Compound keys consisting of up to two fields can be specified\n"
5168         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5169         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5170         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5171         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5172         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5173         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5174         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5175         "\t    its histogram data will be shared with other triggers of the\n"
5176         "\t    same name, and trigger hits will update this common data.\n\n"
5177         "\t    Reading the 'hist' file for the event will dump the hash\n"
5178         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5179         "\t    triggers attached to an event, there will be a table for each\n"
5180         "\t    trigger in the output.  The table displayed for a named\n"
5181         "\t    trigger will be the same as any other instance having the\n"
5182         "\t    same name.  The default format used to display a given field\n"
5183         "\t    can be modified by appending any of the following modifiers\n"
5184         "\t    to the field name, as applicable:\n\n"
5185         "\t            .hex        display a number as a hex value\n"
5186         "\t            .sym        display an address as a symbol\n"
5187         "\t            .sym-offset display an address as a symbol and offset\n"
5188         "\t            .execname   display a common_pid as a program name\n"
5189         "\t            .syscall    display a syscall id as a syscall name\n"
5190         "\t            .log2       display log2 value rather than raw number\n"
5191         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5192         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5193         "\t    trigger or to start a hist trigger but not log any events\n"
5194         "\t    until told to do so.  'continue' can be used to start or\n"
5195         "\t    restart a paused hist trigger.\n\n"
5196         "\t    The 'clear' parameter will clear the contents of a running\n"
5197         "\t    hist trigger and leave its current paused/active state\n"
5198         "\t    unchanged.\n\n"
5199         "\t    The enable_hist and disable_hist triggers can be used to\n"
5200         "\t    have one event conditionally start and stop another event's\n"
5201         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5202         "\t    the enable_event and disable_event triggers.\n\n"
5203         "\t    Hist trigger handlers and actions are executed whenever a\n"
5204         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5205         "\t        <handler>.<action>\n\n"
5206         "\t    The available handlers are:\n\n"
5207         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5208         "\t        onmax(var)               - invoke if var exceeds current max\n"
5209         "\t        onchange(var)            - invoke action if var changes\n\n"
5210         "\t    The available actions are:\n\n"
5211         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5212         "\t        save(field,...)                      - save current event fields\n"
5213 #ifdef CONFIG_TRACER_SNAPSHOT
5214         "\t        snapshot()                           - snapshot the trace buffer\n"
5215 #endif
5216 #endif
5217 ;
5218
5219 static ssize_t
5220 tracing_readme_read(struct file *filp, char __user *ubuf,
5221                        size_t cnt, loff_t *ppos)
5222 {
5223         return simple_read_from_buffer(ubuf, cnt, ppos,
5224                                         readme_msg, strlen(readme_msg));
5225 }
5226
5227 static const struct file_operations tracing_readme_fops = {
5228         .open           = tracing_open_generic,
5229         .read           = tracing_readme_read,
5230         .llseek         = generic_file_llseek,
5231 };
5232
5233 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5234 {
5235         int *ptr = v;
5236
5237         if (*pos || m->count)
5238                 ptr++;
5239
5240         (*pos)++;
5241
5242         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5243                 if (trace_find_tgid(*ptr))
5244                         return ptr;
5245         }
5246
5247         return NULL;
5248 }
5249
5250 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5251 {
5252         void *v;
5253         loff_t l = 0;
5254
5255         if (!tgid_map)
5256                 return NULL;
5257
5258         v = &tgid_map[0];
5259         while (l <= *pos) {
5260                 v = saved_tgids_next(m, v, &l);
5261                 if (!v)
5262                         return NULL;
5263         }
5264
5265         return v;
5266 }
5267
5268 static void saved_tgids_stop(struct seq_file *m, void *v)
5269 {
5270 }
5271
5272 static int saved_tgids_show(struct seq_file *m, void *v)
5273 {
5274         int pid = (int *)v - tgid_map;
5275
5276         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5277         return 0;
5278 }
5279
5280 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5281         .start          = saved_tgids_start,
5282         .stop           = saved_tgids_stop,
5283         .next           = saved_tgids_next,
5284         .show           = saved_tgids_show,
5285 };
5286
5287 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5288 {
5289         int ret;
5290
5291         ret = tracing_check_open_get_tr(NULL);
5292         if (ret)
5293                 return ret;
5294
5295         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5296 }
5297
5298
5299 static const struct file_operations tracing_saved_tgids_fops = {
5300         .open           = tracing_saved_tgids_open,
5301         .read           = seq_read,
5302         .llseek         = seq_lseek,
5303         .release        = seq_release,
5304 };
5305
5306 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5307 {
5308         unsigned int *ptr = v;
5309
5310         if (*pos || m->count)
5311                 ptr++;
5312
5313         (*pos)++;
5314
5315         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5316              ptr++) {
5317                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5318                         continue;
5319
5320                 return ptr;
5321         }
5322
5323         return NULL;
5324 }
5325
5326 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5327 {
5328         void *v;
5329         loff_t l = 0;
5330
5331         preempt_disable();
5332         arch_spin_lock(&trace_cmdline_lock);
5333
5334         v = &savedcmd->map_cmdline_to_pid[0];
5335         while (l <= *pos) {
5336                 v = saved_cmdlines_next(m, v, &l);
5337                 if (!v)
5338                         return NULL;
5339         }
5340
5341         return v;
5342 }
5343
5344 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5345 {
5346         arch_spin_unlock(&trace_cmdline_lock);
5347         preempt_enable();
5348 }
5349
5350 static int saved_cmdlines_show(struct seq_file *m, void *v)
5351 {
5352         char buf[TASK_COMM_LEN];
5353         unsigned int *pid = v;
5354
5355         __trace_find_cmdline(*pid, buf);
5356         seq_printf(m, "%d %s\n", *pid, buf);
5357         return 0;
5358 }
5359
5360 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5361         .start          = saved_cmdlines_start,
5362         .next           = saved_cmdlines_next,
5363         .stop           = saved_cmdlines_stop,
5364         .show           = saved_cmdlines_show,
5365 };
5366
5367 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5368 {
5369         int ret;
5370
5371         ret = tracing_check_open_get_tr(NULL);
5372         if (ret)
5373                 return ret;
5374
5375         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5376 }
5377
5378 static const struct file_operations tracing_saved_cmdlines_fops = {
5379         .open           = tracing_saved_cmdlines_open,
5380         .read           = seq_read,
5381         .llseek         = seq_lseek,
5382         .release        = seq_release,
5383 };
5384
5385 static ssize_t
5386 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5387                                  size_t cnt, loff_t *ppos)
5388 {
5389         char buf[64];
5390         int r;
5391
5392         arch_spin_lock(&trace_cmdline_lock);
5393         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5394         arch_spin_unlock(&trace_cmdline_lock);
5395
5396         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5397 }
5398
5399 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5400 {
5401         kfree(s->saved_cmdlines);
5402         kfree(s->map_cmdline_to_pid);
5403         kfree(s);
5404 }
5405
5406 static int tracing_resize_saved_cmdlines(unsigned int val)
5407 {
5408         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5409
5410         s = kmalloc(sizeof(*s), GFP_KERNEL);
5411         if (!s)
5412                 return -ENOMEM;
5413
5414         if (allocate_cmdlines_buffer(val, s) < 0) {
5415                 kfree(s);
5416                 return -ENOMEM;
5417         }
5418
5419         arch_spin_lock(&trace_cmdline_lock);
5420         savedcmd_temp = savedcmd;
5421         savedcmd = s;
5422         arch_spin_unlock(&trace_cmdline_lock);
5423         free_saved_cmdlines_buffer(savedcmd_temp);
5424
5425         return 0;
5426 }
5427
5428 static ssize_t
5429 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5430                                   size_t cnt, loff_t *ppos)
5431 {
5432         unsigned long val;
5433         int ret;
5434
5435         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5436         if (ret)
5437                 return ret;
5438
5439         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5440         if (!val || val > PID_MAX_DEFAULT)
5441                 return -EINVAL;
5442
5443         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5444         if (ret < 0)
5445                 return ret;
5446
5447         *ppos += cnt;
5448
5449         return cnt;
5450 }
5451
5452 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5453         .open           = tracing_open_generic,
5454         .read           = tracing_saved_cmdlines_size_read,
5455         .write          = tracing_saved_cmdlines_size_write,
5456 };
5457
5458 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5459 static union trace_eval_map_item *
5460 update_eval_map(union trace_eval_map_item *ptr)
5461 {
5462         if (!ptr->map.eval_string) {
5463                 if (ptr->tail.next) {
5464                         ptr = ptr->tail.next;
5465                         /* Set ptr to the next real item (skip head) */
5466                         ptr++;
5467                 } else
5468                         return NULL;
5469         }
5470         return ptr;
5471 }
5472
5473 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5474 {
5475         union trace_eval_map_item *ptr = v;
5476
5477         /*
5478          * Paranoid! If ptr points to end, we don't want to increment past it.
5479          * This really should never happen.
5480          */
5481         (*pos)++;
5482         ptr = update_eval_map(ptr);
5483         if (WARN_ON_ONCE(!ptr))
5484                 return NULL;
5485
5486         ptr++;
5487         ptr = update_eval_map(ptr);
5488
5489         return ptr;
5490 }
5491
5492 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5493 {
5494         union trace_eval_map_item *v;
5495         loff_t l = 0;
5496
5497         mutex_lock(&trace_eval_mutex);
5498
5499         v = trace_eval_maps;
5500         if (v)
5501                 v++;
5502
5503         while (v && l < *pos) {
5504                 v = eval_map_next(m, v, &l);
5505         }
5506
5507         return v;
5508 }
5509
5510 static void eval_map_stop(struct seq_file *m, void *v)
5511 {
5512         mutex_unlock(&trace_eval_mutex);
5513 }
5514
5515 static int eval_map_show(struct seq_file *m, void *v)
5516 {
5517         union trace_eval_map_item *ptr = v;
5518
5519         seq_printf(m, "%s %ld (%s)\n",
5520                    ptr->map.eval_string, ptr->map.eval_value,
5521                    ptr->map.system);
5522
5523         return 0;
5524 }
5525
5526 static const struct seq_operations tracing_eval_map_seq_ops = {
5527         .start          = eval_map_start,
5528         .next           = eval_map_next,
5529         .stop           = eval_map_stop,
5530         .show           = eval_map_show,
5531 };
5532
5533 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5534 {
5535         int ret;
5536
5537         ret = tracing_check_open_get_tr(NULL);
5538         if (ret)
5539                 return ret;
5540
5541         return seq_open(filp, &tracing_eval_map_seq_ops);
5542 }
5543
5544 static const struct file_operations tracing_eval_map_fops = {
5545         .open           = tracing_eval_map_open,
5546         .read           = seq_read,
5547         .llseek         = seq_lseek,
5548         .release        = seq_release,
5549 };
5550
5551 static inline union trace_eval_map_item *
5552 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5553 {
5554         /* Return tail of array given the head */
5555         return ptr + ptr->head.length + 1;
5556 }
5557
5558 static void
5559 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5560                            int len)
5561 {
5562         struct trace_eval_map **stop;
5563         struct trace_eval_map **map;
5564         union trace_eval_map_item *map_array;
5565         union trace_eval_map_item *ptr;
5566
5567         stop = start + len;
5568
5569         /*
5570          * The trace_eval_maps contains the map plus a head and tail item,
5571          * where the head holds the module and length of array, and the
5572          * tail holds a pointer to the next list.
5573          */
5574         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5575         if (!map_array) {
5576                 pr_warn("Unable to allocate trace eval mapping\n");
5577                 return;
5578         }
5579
5580         mutex_lock(&trace_eval_mutex);
5581
5582         if (!trace_eval_maps)
5583                 trace_eval_maps = map_array;
5584         else {
5585                 ptr = trace_eval_maps;
5586                 for (;;) {
5587                         ptr = trace_eval_jmp_to_tail(ptr);
5588                         if (!ptr->tail.next)
5589                                 break;
5590                         ptr = ptr->tail.next;
5591
5592                 }
5593                 ptr->tail.next = map_array;
5594         }
5595         map_array->head.mod = mod;
5596         map_array->head.length = len;
5597         map_array++;
5598
5599         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5600                 map_array->map = **map;
5601                 map_array++;
5602         }
5603         memset(map_array, 0, sizeof(*map_array));
5604
5605         mutex_unlock(&trace_eval_mutex);
5606 }
5607
5608 static void trace_create_eval_file(struct dentry *d_tracer)
5609 {
5610         trace_create_file("eval_map", 0444, d_tracer,
5611                           NULL, &tracing_eval_map_fops);
5612 }
5613
5614 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5615 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5616 static inline void trace_insert_eval_map_file(struct module *mod,
5617                               struct trace_eval_map **start, int len) { }
5618 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5619
5620 static void trace_insert_eval_map(struct module *mod,
5621                                   struct trace_eval_map **start, int len)
5622 {
5623         struct trace_eval_map **map;
5624
5625         if (len <= 0)
5626                 return;
5627
5628         map = start;
5629
5630         trace_event_eval_update(map, len);
5631
5632         trace_insert_eval_map_file(mod, start, len);
5633 }
5634
5635 static ssize_t
5636 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5637                        size_t cnt, loff_t *ppos)
5638 {
5639         struct trace_array *tr = filp->private_data;
5640         char buf[MAX_TRACER_SIZE+2];
5641         int r;
5642
5643         mutex_lock(&trace_types_lock);
5644         r = sprintf(buf, "%s\n", tr->current_trace->name);
5645         mutex_unlock(&trace_types_lock);
5646
5647         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5648 }
5649
5650 int tracer_init(struct tracer *t, struct trace_array *tr)
5651 {
5652         tracing_reset_online_cpus(&tr->array_buffer);
5653         return t->init(tr);
5654 }
5655
5656 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5657 {
5658         int cpu;
5659
5660         for_each_tracing_cpu(cpu)
5661                 per_cpu_ptr(buf->data, cpu)->entries = val;
5662 }
5663
5664 #ifdef CONFIG_TRACER_MAX_TRACE
5665 /* resize @tr's buffer to the size of @size_tr's entries */
5666 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5667                                         struct array_buffer *size_buf, int cpu_id)
5668 {
5669         int cpu, ret = 0;
5670
5671         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5672                 for_each_tracing_cpu(cpu) {
5673                         ret = ring_buffer_resize(trace_buf->buffer,
5674                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5675                         if (ret < 0)
5676                                 break;
5677                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5678                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5679                 }
5680         } else {
5681                 ret = ring_buffer_resize(trace_buf->buffer,
5682                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5683                 if (ret == 0)
5684                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5685                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5686         }
5687
5688         return ret;
5689 }
5690 #endif /* CONFIG_TRACER_MAX_TRACE */
5691
5692 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5693                                         unsigned long size, int cpu)
5694 {
5695         int ret;
5696
5697         /*
5698          * If kernel or user changes the size of the ring buffer
5699          * we use the size that was given, and we can forget about
5700          * expanding it later.
5701          */
5702         ring_buffer_expanded = true;
5703
5704         /* May be called before buffers are initialized */
5705         if (!tr->array_buffer.buffer)
5706                 return 0;
5707
5708         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5709         if (ret < 0)
5710                 return ret;
5711
5712 #ifdef CONFIG_TRACER_MAX_TRACE
5713         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5714             !tr->current_trace->use_max_tr)
5715                 goto out;
5716
5717         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5718         if (ret < 0) {
5719                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5720                                                      &tr->array_buffer, cpu);
5721                 if (r < 0) {
5722                         /*
5723                          * AARGH! We are left with different
5724                          * size max buffer!!!!
5725                          * The max buffer is our "snapshot" buffer.
5726                          * When a tracer needs a snapshot (one of the
5727                          * latency tracers), it swaps the max buffer
5728                          * with the saved snap shot. We succeeded to
5729                          * update the size of the main buffer, but failed to
5730                          * update the size of the max buffer. But when we tried
5731                          * to reset the main buffer to the original size, we
5732                          * failed there too. This is very unlikely to
5733                          * happen, but if it does, warn and kill all
5734                          * tracing.
5735                          */
5736                         WARN_ON(1);
5737                         tracing_disabled = 1;
5738                 }
5739                 return ret;
5740         }
5741
5742         if (cpu == RING_BUFFER_ALL_CPUS)
5743                 set_buffer_entries(&tr->max_buffer, size);
5744         else
5745                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5746
5747  out:
5748 #endif /* CONFIG_TRACER_MAX_TRACE */
5749
5750         if (cpu == RING_BUFFER_ALL_CPUS)
5751                 set_buffer_entries(&tr->array_buffer, size);
5752         else
5753                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5754
5755         return ret;
5756 }
5757
5758 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5759                                   unsigned long size, int cpu_id)
5760 {
5761         int ret = size;
5762
5763         mutex_lock(&trace_types_lock);
5764
5765         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5766                 /* make sure, this cpu is enabled in the mask */
5767                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5768                         ret = -EINVAL;
5769                         goto out;
5770                 }
5771         }
5772
5773         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5774         if (ret < 0)
5775                 ret = -ENOMEM;
5776
5777 out:
5778         mutex_unlock(&trace_types_lock);
5779
5780         return ret;
5781 }
5782
5783
5784 /**
5785  * tracing_update_buffers - used by tracing facility to expand ring buffers
5786  *
5787  * To save on memory when the tracing is never used on a system with it
5788  * configured in. The ring buffers are set to a minimum size. But once
5789  * a user starts to use the tracing facility, then they need to grow
5790  * to their default size.
5791  *
5792  * This function is to be called when a tracer is about to be used.
5793  */
5794 int tracing_update_buffers(void)
5795 {
5796         int ret = 0;
5797
5798         mutex_lock(&trace_types_lock);
5799         if (!ring_buffer_expanded)
5800                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5801                                                 RING_BUFFER_ALL_CPUS);
5802         mutex_unlock(&trace_types_lock);
5803
5804         return ret;
5805 }
5806
5807 struct trace_option_dentry;
5808
5809 static void
5810 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5811
5812 /*
5813  * Used to clear out the tracer before deletion of an instance.
5814  * Must have trace_types_lock held.
5815  */
5816 static void tracing_set_nop(struct trace_array *tr)
5817 {
5818         if (tr->current_trace == &nop_trace)
5819                 return;
5820         
5821         tr->current_trace->enabled--;
5822
5823         if (tr->current_trace->reset)
5824                 tr->current_trace->reset(tr);
5825
5826         tr->current_trace = &nop_trace;
5827 }
5828
5829 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5830 {
5831         /* Only enable if the directory has been created already. */
5832         if (!tr->dir)
5833                 return;
5834
5835         create_trace_option_files(tr, t);
5836 }
5837
5838 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5839 {
5840         struct tracer *t;
5841 #ifdef CONFIG_TRACER_MAX_TRACE
5842         bool had_max_tr;
5843 #endif
5844         int ret = 0;
5845
5846         mutex_lock(&trace_types_lock);
5847
5848         if (!ring_buffer_expanded) {
5849                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5850                                                 RING_BUFFER_ALL_CPUS);
5851                 if (ret < 0)
5852                         goto out;
5853                 ret = 0;
5854         }
5855
5856         for (t = trace_types; t; t = t->next) {
5857                 if (strcmp(t->name, buf) == 0)
5858                         break;
5859         }
5860         if (!t) {
5861                 ret = -EINVAL;
5862                 goto out;
5863         }
5864         if (t == tr->current_trace)
5865                 goto out;
5866
5867 #ifdef CONFIG_TRACER_SNAPSHOT
5868         if (t->use_max_tr) {
5869                 arch_spin_lock(&tr->max_lock);
5870                 if (tr->cond_snapshot)
5871                         ret = -EBUSY;
5872                 arch_spin_unlock(&tr->max_lock);
5873                 if (ret)
5874                         goto out;
5875         }
5876 #endif
5877         /* Some tracers won't work on kernel command line */
5878         if (system_state < SYSTEM_RUNNING && t->noboot) {
5879                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5880                         t->name);
5881                 goto out;
5882         }
5883
5884         /* Some tracers are only allowed for the top level buffer */
5885         if (!trace_ok_for_array(t, tr)) {
5886                 ret = -EINVAL;
5887                 goto out;
5888         }
5889
5890         /* If trace pipe files are being read, we can't change the tracer */
5891         if (tr->current_trace->ref) {
5892                 ret = -EBUSY;
5893                 goto out;
5894         }
5895
5896         trace_branch_disable();
5897
5898         tr->current_trace->enabled--;
5899
5900         if (tr->current_trace->reset)
5901                 tr->current_trace->reset(tr);
5902
5903         /* Current trace needs to be nop_trace before synchronize_rcu */
5904         tr->current_trace = &nop_trace;
5905
5906 #ifdef CONFIG_TRACER_MAX_TRACE
5907         had_max_tr = tr->allocated_snapshot;
5908
5909         if (had_max_tr && !t->use_max_tr) {
5910                 /*
5911                  * We need to make sure that the update_max_tr sees that
5912                  * current_trace changed to nop_trace to keep it from
5913                  * swapping the buffers after we resize it.
5914                  * The update_max_tr is called from interrupts disabled
5915                  * so a synchronized_sched() is sufficient.
5916                  */
5917                 synchronize_rcu();
5918                 free_snapshot(tr);
5919         }
5920 #endif
5921
5922 #ifdef CONFIG_TRACER_MAX_TRACE
5923         if (t->use_max_tr && !had_max_tr) {
5924                 ret = tracing_alloc_snapshot_instance(tr);
5925                 if (ret < 0)
5926                         goto out;
5927         }
5928 #endif
5929
5930         if (t->init) {
5931                 ret = tracer_init(t, tr);
5932                 if (ret)
5933                         goto out;
5934         }
5935
5936         tr->current_trace = t;
5937         tr->current_trace->enabled++;
5938         trace_branch_enable(tr);
5939  out:
5940         mutex_unlock(&trace_types_lock);
5941
5942         return ret;
5943 }
5944
5945 static ssize_t
5946 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5947                         size_t cnt, loff_t *ppos)
5948 {
5949         struct trace_array *tr = filp->private_data;
5950         char buf[MAX_TRACER_SIZE+1];
5951         int i;
5952         size_t ret;
5953         int err;
5954
5955         ret = cnt;
5956
5957         if (cnt > MAX_TRACER_SIZE)
5958                 cnt = MAX_TRACER_SIZE;
5959
5960         if (copy_from_user(buf, ubuf, cnt))
5961                 return -EFAULT;
5962
5963         buf[cnt] = 0;
5964
5965         /* strip ending whitespace. */
5966         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5967                 buf[i] = 0;
5968
5969         err = tracing_set_tracer(tr, buf);
5970         if (err)
5971                 return err;
5972
5973         *ppos += ret;
5974
5975         return ret;
5976 }
5977
5978 static ssize_t
5979 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5980                    size_t cnt, loff_t *ppos)
5981 {
5982         char buf[64];
5983         int r;
5984
5985         r = snprintf(buf, sizeof(buf), "%ld\n",
5986                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5987         if (r > sizeof(buf))
5988                 r = sizeof(buf);
5989         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5990 }
5991
5992 static ssize_t
5993 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5994                     size_t cnt, loff_t *ppos)
5995 {
5996         unsigned long val;
5997         int ret;
5998
5999         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6000         if (ret)
6001                 return ret;
6002
6003         *ptr = val * 1000;
6004
6005         return cnt;
6006 }
6007
6008 static ssize_t
6009 tracing_thresh_read(struct file *filp, char __user *ubuf,
6010                     size_t cnt, loff_t *ppos)
6011 {
6012         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6013 }
6014
6015 static ssize_t
6016 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6017                      size_t cnt, loff_t *ppos)
6018 {
6019         struct trace_array *tr = filp->private_data;
6020         int ret;
6021
6022         mutex_lock(&trace_types_lock);
6023         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6024         if (ret < 0)
6025                 goto out;
6026
6027         if (tr->current_trace->update_thresh) {
6028                 ret = tr->current_trace->update_thresh(tr);
6029                 if (ret < 0)
6030                         goto out;
6031         }
6032
6033         ret = cnt;
6034 out:
6035         mutex_unlock(&trace_types_lock);
6036
6037         return ret;
6038 }
6039
6040 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6041
6042 static ssize_t
6043 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6044                      size_t cnt, loff_t *ppos)
6045 {
6046         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6047 }
6048
6049 static ssize_t
6050 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6051                       size_t cnt, loff_t *ppos)
6052 {
6053         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6054 }
6055
6056 #endif
6057
6058 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6059 {
6060         struct trace_array *tr = inode->i_private;
6061         struct trace_iterator *iter;
6062         int ret;
6063
6064         ret = tracing_check_open_get_tr(tr);
6065         if (ret)
6066                 return ret;
6067
6068         mutex_lock(&trace_types_lock);
6069
6070         /* create a buffer to store the information to pass to userspace */
6071         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6072         if (!iter) {
6073                 ret = -ENOMEM;
6074                 __trace_array_put(tr);
6075                 goto out;
6076         }
6077
6078         trace_seq_init(&iter->seq);
6079         iter->trace = tr->current_trace;
6080
6081         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6082                 ret = -ENOMEM;
6083                 goto fail;
6084         }
6085
6086         /* trace pipe does not show start of buffer */
6087         cpumask_setall(iter->started);
6088
6089         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6090                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6091
6092         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6093         if (trace_clocks[tr->clock_id].in_ns)
6094                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6095
6096         iter->tr = tr;
6097         iter->array_buffer = &tr->array_buffer;
6098         iter->cpu_file = tracing_get_cpu(inode);
6099         mutex_init(&iter->mutex);
6100         filp->private_data = iter;
6101
6102         if (iter->trace->pipe_open)
6103                 iter->trace->pipe_open(iter);
6104
6105         nonseekable_open(inode, filp);
6106
6107         tr->current_trace->ref++;
6108 out:
6109         mutex_unlock(&trace_types_lock);
6110         return ret;
6111
6112 fail:
6113         kfree(iter);
6114         __trace_array_put(tr);
6115         mutex_unlock(&trace_types_lock);
6116         return ret;
6117 }
6118
6119 static int tracing_release_pipe(struct inode *inode, struct file *file)
6120 {
6121         struct trace_iterator *iter = file->private_data;
6122         struct trace_array *tr = inode->i_private;
6123
6124         mutex_lock(&trace_types_lock);
6125
6126         tr->current_trace->ref--;
6127
6128         if (iter->trace->pipe_close)
6129                 iter->trace->pipe_close(iter);
6130
6131         mutex_unlock(&trace_types_lock);
6132
6133         free_cpumask_var(iter->started);
6134         mutex_destroy(&iter->mutex);
6135         kfree(iter);
6136
6137         trace_array_put(tr);
6138
6139         return 0;
6140 }
6141
6142 static __poll_t
6143 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6144 {
6145         struct trace_array *tr = iter->tr;
6146
6147         /* Iterators are static, they should be filled or empty */
6148         if (trace_buffer_iter(iter, iter->cpu_file))
6149                 return EPOLLIN | EPOLLRDNORM;
6150
6151         if (tr->trace_flags & TRACE_ITER_BLOCK)
6152                 /*
6153                  * Always select as readable when in blocking mode
6154                  */
6155                 return EPOLLIN | EPOLLRDNORM;
6156         else
6157                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6158                                              filp, poll_table);
6159 }
6160
6161 static __poll_t
6162 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6163 {
6164         struct trace_iterator *iter = filp->private_data;
6165
6166         return trace_poll(iter, filp, poll_table);
6167 }
6168
6169 /* Must be called with iter->mutex held. */
6170 static int tracing_wait_pipe(struct file *filp)
6171 {
6172         struct trace_iterator *iter = filp->private_data;
6173         int ret;
6174
6175         while (trace_empty(iter)) {
6176
6177                 if ((filp->f_flags & O_NONBLOCK)) {
6178                         return -EAGAIN;
6179                 }
6180
6181                 /*
6182                  * We block until we read something and tracing is disabled.
6183                  * We still block if tracing is disabled, but we have never
6184                  * read anything. This allows a user to cat this file, and
6185                  * then enable tracing. But after we have read something,
6186                  * we give an EOF when tracing is again disabled.
6187                  *
6188                  * iter->pos will be 0 if we haven't read anything.
6189                  */
6190                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6191                         break;
6192
6193                 mutex_unlock(&iter->mutex);
6194
6195                 ret = wait_on_pipe(iter, 0);
6196
6197                 mutex_lock(&iter->mutex);
6198
6199                 if (ret)
6200                         return ret;
6201         }
6202
6203         return 1;
6204 }
6205
6206 /*
6207  * Consumer reader.
6208  */
6209 static ssize_t
6210 tracing_read_pipe(struct file *filp, char __user *ubuf,
6211                   size_t cnt, loff_t *ppos)
6212 {
6213         struct trace_iterator *iter = filp->private_data;
6214         ssize_t sret;
6215
6216         /*
6217          * Avoid more than one consumer on a single file descriptor
6218          * This is just a matter of traces coherency, the ring buffer itself
6219          * is protected.
6220          */
6221         mutex_lock(&iter->mutex);
6222
6223         /* return any leftover data */
6224         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6225         if (sret != -EBUSY)
6226                 goto out;
6227
6228         trace_seq_init(&iter->seq);
6229
6230         if (iter->trace->read) {
6231                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6232                 if (sret)
6233                         goto out;
6234         }
6235
6236 waitagain:
6237         sret = tracing_wait_pipe(filp);
6238         if (sret <= 0)
6239                 goto out;
6240
6241         /* stop when tracing is finished */
6242         if (trace_empty(iter)) {
6243                 sret = 0;
6244                 goto out;
6245         }
6246
6247         if (cnt >= PAGE_SIZE)
6248                 cnt = PAGE_SIZE - 1;
6249
6250         /* reset all but tr, trace, and overruns */
6251         memset(&iter->seq, 0,
6252                sizeof(struct trace_iterator) -
6253                offsetof(struct trace_iterator, seq));
6254         cpumask_clear(iter->started);
6255         trace_seq_init(&iter->seq);
6256         iter->pos = -1;
6257
6258         trace_event_read_lock();
6259         trace_access_lock(iter->cpu_file);
6260         while (trace_find_next_entry_inc(iter) != NULL) {
6261                 enum print_line_t ret;
6262                 int save_len = iter->seq.seq.len;
6263
6264                 ret = print_trace_line(iter);
6265                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6266                         /* don't print partial lines */
6267                         iter->seq.seq.len = save_len;
6268                         break;
6269                 }
6270                 if (ret != TRACE_TYPE_NO_CONSUME)
6271                         trace_consume(iter);
6272
6273                 if (trace_seq_used(&iter->seq) >= cnt)
6274                         break;
6275
6276                 /*
6277                  * Setting the full flag means we reached the trace_seq buffer
6278                  * size and we should leave by partial output condition above.
6279                  * One of the trace_seq_* functions is not used properly.
6280                  */
6281                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6282                           iter->ent->type);
6283         }
6284         trace_access_unlock(iter->cpu_file);
6285         trace_event_read_unlock();
6286
6287         /* Now copy what we have to the user */
6288         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6289         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6290                 trace_seq_init(&iter->seq);
6291
6292         /*
6293          * If there was nothing to send to user, in spite of consuming trace
6294          * entries, go back to wait for more entries.
6295          */
6296         if (sret == -EBUSY)
6297                 goto waitagain;
6298
6299 out:
6300         mutex_unlock(&iter->mutex);
6301
6302         return sret;
6303 }
6304
6305 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6306                                      unsigned int idx)
6307 {
6308         __free_page(spd->pages[idx]);
6309 }
6310
6311 static size_t
6312 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6313 {
6314         size_t count;
6315         int save_len;
6316         int ret;
6317
6318         /* Seq buffer is page-sized, exactly what we need. */
6319         for (;;) {
6320                 save_len = iter->seq.seq.len;
6321                 ret = print_trace_line(iter);
6322
6323                 if (trace_seq_has_overflowed(&iter->seq)) {
6324                         iter->seq.seq.len = save_len;
6325                         break;
6326                 }
6327
6328                 /*
6329                  * This should not be hit, because it should only
6330                  * be set if the iter->seq overflowed. But check it
6331                  * anyway to be safe.
6332                  */
6333                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6334                         iter->seq.seq.len = save_len;
6335                         break;
6336                 }
6337
6338                 count = trace_seq_used(&iter->seq) - save_len;
6339                 if (rem < count) {
6340                         rem = 0;
6341                         iter->seq.seq.len = save_len;
6342                         break;
6343                 }
6344
6345                 if (ret != TRACE_TYPE_NO_CONSUME)
6346                         trace_consume(iter);
6347                 rem -= count;
6348                 if (!trace_find_next_entry_inc(iter))   {
6349                         rem = 0;
6350                         iter->ent = NULL;
6351                         break;
6352                 }
6353         }
6354
6355         return rem;
6356 }
6357
6358 static ssize_t tracing_splice_read_pipe(struct file *filp,
6359                                         loff_t *ppos,
6360                                         struct pipe_inode_info *pipe,
6361                                         size_t len,
6362                                         unsigned int flags)
6363 {
6364         struct page *pages_def[PIPE_DEF_BUFFERS];
6365         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6366         struct trace_iterator *iter = filp->private_data;
6367         struct splice_pipe_desc spd = {
6368                 .pages          = pages_def,
6369                 .partial        = partial_def,
6370                 .nr_pages       = 0, /* This gets updated below. */
6371                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6372                 .ops            = &default_pipe_buf_ops,
6373                 .spd_release    = tracing_spd_release_pipe,
6374         };
6375         ssize_t ret;
6376         size_t rem;
6377         unsigned int i;
6378
6379         if (splice_grow_spd(pipe, &spd))
6380                 return -ENOMEM;
6381
6382         mutex_lock(&iter->mutex);
6383
6384         if (iter->trace->splice_read) {
6385                 ret = iter->trace->splice_read(iter, filp,
6386                                                ppos, pipe, len, flags);
6387                 if (ret)
6388                         goto out_err;
6389         }
6390
6391         ret = tracing_wait_pipe(filp);
6392         if (ret <= 0)
6393                 goto out_err;
6394
6395         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6396                 ret = -EFAULT;
6397                 goto out_err;
6398         }
6399
6400         trace_event_read_lock();
6401         trace_access_lock(iter->cpu_file);
6402
6403         /* Fill as many pages as possible. */
6404         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6405                 spd.pages[i] = alloc_page(GFP_KERNEL);
6406                 if (!spd.pages[i])
6407                         break;
6408
6409                 rem = tracing_fill_pipe_page(rem, iter);
6410
6411                 /* Copy the data into the page, so we can start over. */
6412                 ret = trace_seq_to_buffer(&iter->seq,
6413                                           page_address(spd.pages[i]),
6414                                           trace_seq_used(&iter->seq));
6415                 if (ret < 0) {
6416                         __free_page(spd.pages[i]);
6417                         break;
6418                 }
6419                 spd.partial[i].offset = 0;
6420                 spd.partial[i].len = trace_seq_used(&iter->seq);
6421
6422                 trace_seq_init(&iter->seq);
6423         }
6424
6425         trace_access_unlock(iter->cpu_file);
6426         trace_event_read_unlock();
6427         mutex_unlock(&iter->mutex);
6428
6429         spd.nr_pages = i;
6430
6431         if (i)
6432                 ret = splice_to_pipe(pipe, &spd);
6433         else
6434                 ret = 0;
6435 out:
6436         splice_shrink_spd(&spd);
6437         return ret;
6438
6439 out_err:
6440         mutex_unlock(&iter->mutex);
6441         goto out;
6442 }
6443
6444 static ssize_t
6445 tracing_entries_read(struct file *filp, char __user *ubuf,
6446                      size_t cnt, loff_t *ppos)
6447 {
6448         struct inode *inode = file_inode(filp);
6449         struct trace_array *tr = inode->i_private;
6450         int cpu = tracing_get_cpu(inode);
6451         char buf[64];
6452         int r = 0;
6453         ssize_t ret;
6454
6455         mutex_lock(&trace_types_lock);
6456
6457         if (cpu == RING_BUFFER_ALL_CPUS) {
6458                 int cpu, buf_size_same;
6459                 unsigned long size;
6460
6461                 size = 0;
6462                 buf_size_same = 1;
6463                 /* check if all cpu sizes are same */
6464                 for_each_tracing_cpu(cpu) {
6465                         /* fill in the size from first enabled cpu */
6466                         if (size == 0)
6467                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6468                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6469                                 buf_size_same = 0;
6470                                 break;
6471                         }
6472                 }
6473
6474                 if (buf_size_same) {
6475                         if (!ring_buffer_expanded)
6476                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6477                                             size >> 10,
6478                                             trace_buf_size >> 10);
6479                         else
6480                                 r = sprintf(buf, "%lu\n", size >> 10);
6481                 } else
6482                         r = sprintf(buf, "X\n");
6483         } else
6484                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6485
6486         mutex_unlock(&trace_types_lock);
6487
6488         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6489         return ret;
6490 }
6491
6492 static ssize_t
6493 tracing_entries_write(struct file *filp, const char __user *ubuf,
6494                       size_t cnt, loff_t *ppos)
6495 {
6496         struct inode *inode = file_inode(filp);
6497         struct trace_array *tr = inode->i_private;
6498         unsigned long val;
6499         int ret;
6500
6501         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6502         if (ret)
6503                 return ret;
6504
6505         /* must have at least 1 entry */
6506         if (!val)
6507                 return -EINVAL;
6508
6509         /* value is in KB */
6510         val <<= 10;
6511         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6512         if (ret < 0)
6513                 return ret;
6514
6515         *ppos += cnt;
6516
6517         return cnt;
6518 }
6519
6520 static ssize_t
6521 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6522                                 size_t cnt, loff_t *ppos)
6523 {
6524         struct trace_array *tr = filp->private_data;
6525         char buf[64];
6526         int r, cpu;
6527         unsigned long size = 0, expanded_size = 0;
6528
6529         mutex_lock(&trace_types_lock);
6530         for_each_tracing_cpu(cpu) {
6531                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6532                 if (!ring_buffer_expanded)
6533                         expanded_size += trace_buf_size >> 10;
6534         }
6535         if (ring_buffer_expanded)
6536                 r = sprintf(buf, "%lu\n", size);
6537         else
6538                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6539         mutex_unlock(&trace_types_lock);
6540
6541         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6542 }
6543
6544 static ssize_t
6545 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6546                           size_t cnt, loff_t *ppos)
6547 {
6548         /*
6549          * There is no need to read what the user has written, this function
6550          * is just to make sure that there is no error when "echo" is used
6551          */
6552
6553         *ppos += cnt;
6554
6555         return cnt;
6556 }
6557
6558 static int
6559 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6560 {
6561         struct trace_array *tr = inode->i_private;
6562
6563         /* disable tracing ? */
6564         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6565                 tracer_tracing_off(tr);
6566         /* resize the ring buffer to 0 */
6567         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6568
6569         trace_array_put(tr);
6570
6571         return 0;
6572 }
6573
6574 static ssize_t
6575 tracing_mark_write(struct file *filp, const char __user *ubuf,
6576                                         size_t cnt, loff_t *fpos)
6577 {
6578         struct trace_array *tr = filp->private_data;
6579         struct ring_buffer_event *event;
6580         enum event_trigger_type tt = ETT_NONE;
6581         struct trace_buffer *buffer;
6582         struct print_entry *entry;
6583         unsigned long irq_flags;
6584         ssize_t written;
6585         int size;
6586         int len;
6587
6588 /* Used in tracing_mark_raw_write() as well */
6589 #define FAULTED_STR "<faulted>"
6590 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6591
6592         if (tracing_disabled)
6593                 return -EINVAL;
6594
6595         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6596                 return -EINVAL;
6597
6598         if (cnt > TRACE_BUF_SIZE)
6599                 cnt = TRACE_BUF_SIZE;
6600
6601         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6602
6603         local_save_flags(irq_flags);
6604         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6605
6606         /* If less than "<faulted>", then make sure we can still add that */
6607         if (cnt < FAULTED_SIZE)
6608                 size += FAULTED_SIZE - cnt;
6609
6610         buffer = tr->array_buffer.buffer;
6611         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6612                                             irq_flags, preempt_count());
6613         if (unlikely(!event))
6614                 /* Ring buffer disabled, return as if not open for write */
6615                 return -EBADF;
6616
6617         entry = ring_buffer_event_data(event);
6618         entry->ip = _THIS_IP_;
6619
6620         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6621         if (len) {
6622                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6623                 cnt = FAULTED_SIZE;
6624                 written = -EFAULT;
6625         } else
6626                 written = cnt;
6627         len = cnt;
6628
6629         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6630                 /* do not add \n before testing triggers, but add \0 */
6631                 entry->buf[cnt] = '\0';
6632                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6633         }
6634
6635         if (entry->buf[cnt - 1] != '\n') {
6636                 entry->buf[cnt] = '\n';
6637                 entry->buf[cnt + 1] = '\0';
6638         } else
6639                 entry->buf[cnt] = '\0';
6640
6641         __buffer_unlock_commit(buffer, event);
6642
6643         if (tt)
6644                 event_triggers_post_call(tr->trace_marker_file, tt);
6645
6646         if (written > 0)
6647                 *fpos += written;
6648
6649         return written;
6650 }
6651
6652 /* Limit it for now to 3K (including tag) */
6653 #define RAW_DATA_MAX_SIZE (1024*3)
6654
6655 static ssize_t
6656 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6657                                         size_t cnt, loff_t *fpos)
6658 {
6659         struct trace_array *tr = filp->private_data;
6660         struct ring_buffer_event *event;
6661         struct trace_buffer *buffer;
6662         struct raw_data_entry *entry;
6663         unsigned long irq_flags;
6664         ssize_t written;
6665         int size;
6666         int len;
6667
6668 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6669
6670         if (tracing_disabled)
6671                 return -EINVAL;
6672
6673         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6674                 return -EINVAL;
6675
6676         /* The marker must at least have a tag id */
6677         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6678                 return -EINVAL;
6679
6680         if (cnt > TRACE_BUF_SIZE)
6681                 cnt = TRACE_BUF_SIZE;
6682
6683         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6684
6685         local_save_flags(irq_flags);
6686         size = sizeof(*entry) + cnt;
6687         if (cnt < FAULT_SIZE_ID)
6688                 size += FAULT_SIZE_ID - cnt;
6689
6690         buffer = tr->array_buffer.buffer;
6691         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6692                                             irq_flags, preempt_count());
6693         if (!event)
6694                 /* Ring buffer disabled, return as if not open for write */
6695                 return -EBADF;
6696
6697         entry = ring_buffer_event_data(event);
6698
6699         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6700         if (len) {
6701                 entry->id = -1;
6702                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6703                 written = -EFAULT;
6704         } else
6705                 written = cnt;
6706
6707         __buffer_unlock_commit(buffer, event);
6708
6709         if (written > 0)
6710                 *fpos += written;
6711
6712         return written;
6713 }
6714
6715 static int tracing_clock_show(struct seq_file *m, void *v)
6716 {
6717         struct trace_array *tr = m->private;
6718         int i;
6719
6720         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6721                 seq_printf(m,
6722                         "%s%s%s%s", i ? " " : "",
6723                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6724                         i == tr->clock_id ? "]" : "");
6725         seq_putc(m, '\n');
6726
6727         return 0;
6728 }
6729
6730 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6731 {
6732         int i;
6733
6734         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6735                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6736                         break;
6737         }
6738         if (i == ARRAY_SIZE(trace_clocks))
6739                 return -EINVAL;
6740
6741         mutex_lock(&trace_types_lock);
6742
6743         tr->clock_id = i;
6744
6745         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6746
6747         /*
6748          * New clock may not be consistent with the previous clock.
6749          * Reset the buffer so that it doesn't have incomparable timestamps.
6750          */
6751         tracing_reset_online_cpus(&tr->array_buffer);
6752
6753 #ifdef CONFIG_TRACER_MAX_TRACE
6754         if (tr->max_buffer.buffer)
6755                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6756         tracing_reset_online_cpus(&tr->max_buffer);
6757 #endif
6758
6759         mutex_unlock(&trace_types_lock);
6760
6761         return 0;
6762 }
6763
6764 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6765                                    size_t cnt, loff_t *fpos)
6766 {
6767         struct seq_file *m = filp->private_data;
6768         struct trace_array *tr = m->private;
6769         char buf[64];
6770         const char *clockstr;
6771         int ret;
6772
6773         if (cnt >= sizeof(buf))
6774                 return -EINVAL;
6775
6776         if (copy_from_user(buf, ubuf, cnt))
6777                 return -EFAULT;
6778
6779         buf[cnt] = 0;
6780
6781         clockstr = strstrip(buf);
6782
6783         ret = tracing_set_clock(tr, clockstr);
6784         if (ret)
6785                 return ret;
6786
6787         *fpos += cnt;
6788
6789         return cnt;
6790 }
6791
6792 static int tracing_clock_open(struct inode *inode, struct file *file)
6793 {
6794         struct trace_array *tr = inode->i_private;
6795         int ret;
6796
6797         ret = tracing_check_open_get_tr(tr);
6798         if (ret)
6799                 return ret;
6800
6801         ret = single_open(file, tracing_clock_show, inode->i_private);
6802         if (ret < 0)
6803                 trace_array_put(tr);
6804
6805         return ret;
6806 }
6807
6808 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6809 {
6810         struct trace_array *tr = m->private;
6811
6812         mutex_lock(&trace_types_lock);
6813
6814         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6815                 seq_puts(m, "delta [absolute]\n");
6816         else
6817                 seq_puts(m, "[delta] absolute\n");
6818
6819         mutex_unlock(&trace_types_lock);
6820
6821         return 0;
6822 }
6823
6824 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6825 {
6826         struct trace_array *tr = inode->i_private;
6827         int ret;
6828
6829         ret = tracing_check_open_get_tr(tr);
6830         if (ret)
6831                 return ret;
6832
6833         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6834         if (ret < 0)
6835                 trace_array_put(tr);
6836
6837         return ret;
6838 }
6839
6840 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6841 {
6842         int ret = 0;
6843
6844         mutex_lock(&trace_types_lock);
6845
6846         if (abs && tr->time_stamp_abs_ref++)
6847                 goto out;
6848
6849         if (!abs) {
6850                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6851                         ret = -EINVAL;
6852                         goto out;
6853                 }
6854
6855                 if (--tr->time_stamp_abs_ref)
6856                         goto out;
6857         }
6858
6859         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6860
6861 #ifdef CONFIG_TRACER_MAX_TRACE
6862         if (tr->max_buffer.buffer)
6863                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6864 #endif
6865  out:
6866         mutex_unlock(&trace_types_lock);
6867
6868         return ret;
6869 }
6870
6871 struct ftrace_buffer_info {
6872         struct trace_iterator   iter;
6873         void                    *spare;
6874         unsigned int            spare_cpu;
6875         unsigned int            read;
6876 };
6877
6878 #ifdef CONFIG_TRACER_SNAPSHOT
6879 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6880 {
6881         struct trace_array *tr = inode->i_private;
6882         struct trace_iterator *iter;
6883         struct seq_file *m;
6884         int ret;
6885
6886         ret = tracing_check_open_get_tr(tr);
6887         if (ret)
6888                 return ret;
6889
6890         if (file->f_mode & FMODE_READ) {
6891                 iter = __tracing_open(inode, file, true);
6892                 if (IS_ERR(iter))
6893                         ret = PTR_ERR(iter);
6894         } else {
6895                 /* Writes still need the seq_file to hold the private data */
6896                 ret = -ENOMEM;
6897                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6898                 if (!m)
6899                         goto out;
6900                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6901                 if (!iter) {
6902                         kfree(m);
6903                         goto out;
6904                 }
6905                 ret = 0;
6906
6907                 iter->tr = tr;
6908                 iter->array_buffer = &tr->max_buffer;
6909                 iter->cpu_file = tracing_get_cpu(inode);
6910                 m->private = iter;
6911                 file->private_data = m;
6912         }
6913 out:
6914         if (ret < 0)
6915                 trace_array_put(tr);
6916
6917         return ret;
6918 }
6919
6920 static ssize_t
6921 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6922                        loff_t *ppos)
6923 {
6924         struct seq_file *m = filp->private_data;
6925         struct trace_iterator *iter = m->private;
6926         struct trace_array *tr = iter->tr;
6927         unsigned long val;
6928         int ret;
6929
6930         ret = tracing_update_buffers();
6931         if (ret < 0)
6932                 return ret;
6933
6934         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6935         if (ret)
6936                 return ret;
6937
6938         mutex_lock(&trace_types_lock);
6939
6940         if (tr->current_trace->use_max_tr) {
6941                 ret = -EBUSY;
6942                 goto out;
6943         }
6944
6945         arch_spin_lock(&tr->max_lock);
6946         if (tr->cond_snapshot)
6947                 ret = -EBUSY;
6948         arch_spin_unlock(&tr->max_lock);
6949         if (ret)
6950                 goto out;
6951
6952         switch (val) {
6953         case 0:
6954                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6955                         ret = -EINVAL;
6956                         break;
6957                 }
6958                 if (tr->allocated_snapshot)
6959                         free_snapshot(tr);
6960                 break;
6961         case 1:
6962 /* Only allow per-cpu swap if the ring buffer supports it */
6963 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6964                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6965                         ret = -EINVAL;
6966                         break;
6967                 }
6968 #endif
6969                 if (tr->allocated_snapshot)
6970                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6971                                         &tr->array_buffer, iter->cpu_file);
6972                 else
6973                         ret = tracing_alloc_snapshot_instance(tr);
6974                 if (ret < 0)
6975                         break;
6976                 local_irq_disable();
6977                 /* Now, we're going to swap */
6978                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6979                         update_max_tr(tr, current, smp_processor_id(), NULL);
6980                 else
6981                         update_max_tr_single(tr, current, iter->cpu_file);
6982                 local_irq_enable();
6983                 break;
6984         default:
6985                 if (tr->allocated_snapshot) {
6986                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6987                                 tracing_reset_online_cpus(&tr->max_buffer);
6988                         else
6989                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6990                 }
6991                 break;
6992         }
6993
6994         if (ret >= 0) {
6995                 *ppos += cnt;
6996                 ret = cnt;
6997         }
6998 out:
6999         mutex_unlock(&trace_types_lock);
7000         return ret;
7001 }
7002
7003 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7004 {
7005         struct seq_file *m = file->private_data;
7006         int ret;
7007
7008         ret = tracing_release(inode, file);
7009
7010         if (file->f_mode & FMODE_READ)
7011                 return ret;
7012
7013         /* If write only, the seq_file is just a stub */
7014         if (m)
7015                 kfree(m->private);
7016         kfree(m);
7017
7018         return 0;
7019 }
7020
7021 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7022 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7023                                     size_t count, loff_t *ppos);
7024 static int tracing_buffers_release(struct inode *inode, struct file *file);
7025 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7026                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7027
7028 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7029 {
7030         struct ftrace_buffer_info *info;
7031         int ret;
7032
7033         /* The following checks for tracefs lockdown */
7034         ret = tracing_buffers_open(inode, filp);
7035         if (ret < 0)
7036                 return ret;
7037
7038         info = filp->private_data;
7039
7040         if (info->iter.trace->use_max_tr) {
7041                 tracing_buffers_release(inode, filp);
7042                 return -EBUSY;
7043         }
7044
7045         info->iter.snapshot = true;
7046         info->iter.array_buffer = &info->iter.tr->max_buffer;
7047
7048         return ret;
7049 }
7050
7051 #endif /* CONFIG_TRACER_SNAPSHOT */
7052
7053
7054 static const struct file_operations tracing_thresh_fops = {
7055         .open           = tracing_open_generic,
7056         .read           = tracing_thresh_read,
7057         .write          = tracing_thresh_write,
7058         .llseek         = generic_file_llseek,
7059 };
7060
7061 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7062 static const struct file_operations tracing_max_lat_fops = {
7063         .open           = tracing_open_generic,
7064         .read           = tracing_max_lat_read,
7065         .write          = tracing_max_lat_write,
7066         .llseek         = generic_file_llseek,
7067 };
7068 #endif
7069
7070 static const struct file_operations set_tracer_fops = {
7071         .open           = tracing_open_generic,
7072         .read           = tracing_set_trace_read,
7073         .write          = tracing_set_trace_write,
7074         .llseek         = generic_file_llseek,
7075 };
7076
7077 static const struct file_operations tracing_pipe_fops = {
7078         .open           = tracing_open_pipe,
7079         .poll           = tracing_poll_pipe,
7080         .read           = tracing_read_pipe,
7081         .splice_read    = tracing_splice_read_pipe,
7082         .release        = tracing_release_pipe,
7083         .llseek         = no_llseek,
7084 };
7085
7086 static const struct file_operations tracing_entries_fops = {
7087         .open           = tracing_open_generic_tr,
7088         .read           = tracing_entries_read,
7089         .write          = tracing_entries_write,
7090         .llseek         = generic_file_llseek,
7091         .release        = tracing_release_generic_tr,
7092 };
7093
7094 static const struct file_operations tracing_total_entries_fops = {
7095         .open           = tracing_open_generic_tr,
7096         .read           = tracing_total_entries_read,
7097         .llseek         = generic_file_llseek,
7098         .release        = tracing_release_generic_tr,
7099 };
7100
7101 static const struct file_operations tracing_free_buffer_fops = {
7102         .open           = tracing_open_generic_tr,
7103         .write          = tracing_free_buffer_write,
7104         .release        = tracing_free_buffer_release,
7105 };
7106
7107 static const struct file_operations tracing_mark_fops = {
7108         .open           = tracing_open_generic_tr,
7109         .write          = tracing_mark_write,
7110         .llseek         = generic_file_llseek,
7111         .release        = tracing_release_generic_tr,
7112 };
7113
7114 static const struct file_operations tracing_mark_raw_fops = {
7115         .open           = tracing_open_generic_tr,
7116         .write          = tracing_mark_raw_write,
7117         .llseek         = generic_file_llseek,
7118         .release        = tracing_release_generic_tr,
7119 };
7120
7121 static const struct file_operations trace_clock_fops = {
7122         .open           = tracing_clock_open,
7123         .read           = seq_read,
7124         .llseek         = seq_lseek,
7125         .release        = tracing_single_release_tr,
7126         .write          = tracing_clock_write,
7127 };
7128
7129 static const struct file_operations trace_time_stamp_mode_fops = {
7130         .open           = tracing_time_stamp_mode_open,
7131         .read           = seq_read,
7132         .llseek         = seq_lseek,
7133         .release        = tracing_single_release_tr,
7134 };
7135
7136 #ifdef CONFIG_TRACER_SNAPSHOT
7137 static const struct file_operations snapshot_fops = {
7138         .open           = tracing_snapshot_open,
7139         .read           = seq_read,
7140         .write          = tracing_snapshot_write,
7141         .llseek         = tracing_lseek,
7142         .release        = tracing_snapshot_release,
7143 };
7144
7145 static const struct file_operations snapshot_raw_fops = {
7146         .open           = snapshot_raw_open,
7147         .read           = tracing_buffers_read,
7148         .release        = tracing_buffers_release,
7149         .splice_read    = tracing_buffers_splice_read,
7150         .llseek         = no_llseek,
7151 };
7152
7153 #endif /* CONFIG_TRACER_SNAPSHOT */
7154
7155 #define TRACING_LOG_ERRS_MAX    8
7156 #define TRACING_LOG_LOC_MAX     128
7157
7158 #define CMD_PREFIX "  Command: "
7159
7160 struct err_info {
7161         const char      **errs; /* ptr to loc-specific array of err strings */
7162         u8              type;   /* index into errs -> specific err string */
7163         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7164         u64             ts;
7165 };
7166
7167 struct tracing_log_err {
7168         struct list_head        list;
7169         struct err_info         info;
7170         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7171         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7172 };
7173
7174 static DEFINE_MUTEX(tracing_err_log_lock);
7175
7176 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7177 {
7178         struct tracing_log_err *err;
7179
7180         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7181                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7182                 if (!err)
7183                         err = ERR_PTR(-ENOMEM);
7184                 tr->n_err_log_entries++;
7185
7186                 return err;
7187         }
7188
7189         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7190         list_del(&err->list);
7191
7192         return err;
7193 }
7194
7195 /**
7196  * err_pos - find the position of a string within a command for error careting
7197  * @cmd: The tracing command that caused the error
7198  * @str: The string to position the caret at within @cmd
7199  *
7200  * Finds the position of the first occurence of @str within @cmd.  The
7201  * return value can be passed to tracing_log_err() for caret placement
7202  * within @cmd.
7203  *
7204  * Returns the index within @cmd of the first occurence of @str or 0
7205  * if @str was not found.
7206  */
7207 unsigned int err_pos(char *cmd, const char *str)
7208 {
7209         char *found;
7210
7211         if (WARN_ON(!strlen(cmd)))
7212                 return 0;
7213
7214         found = strstr(cmd, str);
7215         if (found)
7216                 return found - cmd;
7217
7218         return 0;
7219 }
7220
7221 /**
7222  * tracing_log_err - write an error to the tracing error log
7223  * @tr: The associated trace array for the error (NULL for top level array)
7224  * @loc: A string describing where the error occurred
7225  * @cmd: The tracing command that caused the error
7226  * @errs: The array of loc-specific static error strings
7227  * @type: The index into errs[], which produces the specific static err string
7228  * @pos: The position the caret should be placed in the cmd
7229  *
7230  * Writes an error into tracing/error_log of the form:
7231  *
7232  * <loc>: error: <text>
7233  *   Command: <cmd>
7234  *              ^
7235  *
7236  * tracing/error_log is a small log file containing the last
7237  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7238  * unless there has been a tracing error, and the error log can be
7239  * cleared and have its memory freed by writing the empty string in
7240  * truncation mode to it i.e. echo > tracing/error_log.
7241  *
7242  * NOTE: the @errs array along with the @type param are used to
7243  * produce a static error string - this string is not copied and saved
7244  * when the error is logged - only a pointer to it is saved.  See
7245  * existing callers for examples of how static strings are typically
7246  * defined for use with tracing_log_err().
7247  */
7248 void tracing_log_err(struct trace_array *tr,
7249                      const char *loc, const char *cmd,
7250                      const char **errs, u8 type, u8 pos)
7251 {
7252         struct tracing_log_err *err;
7253
7254         if (!tr)
7255                 tr = &global_trace;
7256
7257         mutex_lock(&tracing_err_log_lock);
7258         err = get_tracing_log_err(tr);
7259         if (PTR_ERR(err) == -ENOMEM) {
7260                 mutex_unlock(&tracing_err_log_lock);
7261                 return;
7262         }
7263
7264         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7265         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7266
7267         err->info.errs = errs;
7268         err->info.type = type;
7269         err->info.pos = pos;
7270         err->info.ts = local_clock();
7271
7272         list_add_tail(&err->list, &tr->err_log);
7273         mutex_unlock(&tracing_err_log_lock);
7274 }
7275
7276 static void clear_tracing_err_log(struct trace_array *tr)
7277 {
7278         struct tracing_log_err *err, *next;
7279
7280         mutex_lock(&tracing_err_log_lock);
7281         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7282                 list_del(&err->list);
7283                 kfree(err);
7284         }
7285
7286         tr->n_err_log_entries = 0;
7287         mutex_unlock(&tracing_err_log_lock);
7288 }
7289
7290 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7291 {
7292         struct trace_array *tr = m->private;
7293
7294         mutex_lock(&tracing_err_log_lock);
7295
7296         return seq_list_start(&tr->err_log, *pos);
7297 }
7298
7299 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7300 {
7301         struct trace_array *tr = m->private;
7302
7303         return seq_list_next(v, &tr->err_log, pos);
7304 }
7305
7306 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7307 {
7308         mutex_unlock(&tracing_err_log_lock);
7309 }
7310
7311 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7312 {
7313         u8 i;
7314
7315         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7316                 seq_putc(m, ' ');
7317         for (i = 0; i < pos; i++)
7318                 seq_putc(m, ' ');
7319         seq_puts(m, "^\n");
7320 }
7321
7322 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7323 {
7324         struct tracing_log_err *err = v;
7325
7326         if (err) {
7327                 const char *err_text = err->info.errs[err->info.type];
7328                 u64 sec = err->info.ts;
7329                 u32 nsec;
7330
7331                 nsec = do_div(sec, NSEC_PER_SEC);
7332                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7333                            err->loc, err_text);
7334                 seq_printf(m, "%s", err->cmd);
7335                 tracing_err_log_show_pos(m, err->info.pos);
7336         }
7337
7338         return 0;
7339 }
7340
7341 static const struct seq_operations tracing_err_log_seq_ops = {
7342         .start  = tracing_err_log_seq_start,
7343         .next   = tracing_err_log_seq_next,
7344         .stop   = tracing_err_log_seq_stop,
7345         .show   = tracing_err_log_seq_show
7346 };
7347
7348 static int tracing_err_log_open(struct inode *inode, struct file *file)
7349 {
7350         struct trace_array *tr = inode->i_private;
7351         int ret = 0;
7352
7353         ret = tracing_check_open_get_tr(tr);
7354         if (ret)
7355                 return ret;
7356
7357         /* If this file was opened for write, then erase contents */
7358         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7359                 clear_tracing_err_log(tr);
7360
7361         if (file->f_mode & FMODE_READ) {
7362                 ret = seq_open(file, &tracing_err_log_seq_ops);
7363                 if (!ret) {
7364                         struct seq_file *m = file->private_data;
7365                         m->private = tr;
7366                 } else {
7367                         trace_array_put(tr);
7368                 }
7369         }
7370         return ret;
7371 }
7372
7373 static ssize_t tracing_err_log_write(struct file *file,
7374                                      const char __user *buffer,
7375                                      size_t count, loff_t *ppos)
7376 {
7377         return count;
7378 }
7379
7380 static int tracing_err_log_release(struct inode *inode, struct file *file)
7381 {
7382         struct trace_array *tr = inode->i_private;
7383
7384         trace_array_put(tr);
7385
7386         if (file->f_mode & FMODE_READ)
7387                 seq_release(inode, file);
7388
7389         return 0;
7390 }
7391
7392 static const struct file_operations tracing_err_log_fops = {
7393         .open           = tracing_err_log_open,
7394         .write          = tracing_err_log_write,
7395         .read           = seq_read,
7396         .llseek         = seq_lseek,
7397         .release        = tracing_err_log_release,
7398 };
7399
7400 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7401 {
7402         struct trace_array *tr = inode->i_private;
7403         struct ftrace_buffer_info *info;
7404         int ret;
7405
7406         ret = tracing_check_open_get_tr(tr);
7407         if (ret)
7408                 return ret;
7409
7410         info = kzalloc(sizeof(*info), GFP_KERNEL);
7411         if (!info) {
7412                 trace_array_put(tr);
7413                 return -ENOMEM;
7414         }
7415
7416         mutex_lock(&trace_types_lock);
7417
7418         info->iter.tr           = tr;
7419         info->iter.cpu_file     = tracing_get_cpu(inode);
7420         info->iter.trace        = tr->current_trace;
7421         info->iter.array_buffer = &tr->array_buffer;
7422         info->spare             = NULL;
7423         /* Force reading ring buffer for first read */
7424         info->read              = (unsigned int)-1;
7425
7426         filp->private_data = info;
7427
7428         tr->current_trace->ref++;
7429
7430         mutex_unlock(&trace_types_lock);
7431
7432         ret = nonseekable_open(inode, filp);
7433         if (ret < 0)
7434                 trace_array_put(tr);
7435
7436         return ret;
7437 }
7438
7439 static __poll_t
7440 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7441 {
7442         struct ftrace_buffer_info *info = filp->private_data;
7443         struct trace_iterator *iter = &info->iter;
7444
7445         return trace_poll(iter, filp, poll_table);
7446 }
7447
7448 static ssize_t
7449 tracing_buffers_read(struct file *filp, char __user *ubuf,
7450                      size_t count, loff_t *ppos)
7451 {
7452         struct ftrace_buffer_info *info = filp->private_data;
7453         struct trace_iterator *iter = &info->iter;
7454         ssize_t ret = 0;
7455         ssize_t size;
7456
7457         if (!count)
7458                 return 0;
7459
7460 #ifdef CONFIG_TRACER_MAX_TRACE
7461         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7462                 return -EBUSY;
7463 #endif
7464
7465         if (!info->spare) {
7466                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7467                                                           iter->cpu_file);
7468                 if (IS_ERR(info->spare)) {
7469                         ret = PTR_ERR(info->spare);
7470                         info->spare = NULL;
7471                 } else {
7472                         info->spare_cpu = iter->cpu_file;
7473                 }
7474         }
7475         if (!info->spare)
7476                 return ret;
7477
7478         /* Do we have previous read data to read? */
7479         if (info->read < PAGE_SIZE)
7480                 goto read;
7481
7482  again:
7483         trace_access_lock(iter->cpu_file);
7484         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7485                                     &info->spare,
7486                                     count,
7487                                     iter->cpu_file, 0);
7488         trace_access_unlock(iter->cpu_file);
7489
7490         if (ret < 0) {
7491                 if (trace_empty(iter)) {
7492                         if ((filp->f_flags & O_NONBLOCK))
7493                                 return -EAGAIN;
7494
7495                         ret = wait_on_pipe(iter, 0);
7496                         if (ret)
7497                                 return ret;
7498
7499                         goto again;
7500                 }
7501                 return 0;
7502         }
7503
7504         info->read = 0;
7505  read:
7506         size = PAGE_SIZE - info->read;
7507         if (size > count)
7508                 size = count;
7509
7510         ret = copy_to_user(ubuf, info->spare + info->read, size);
7511         if (ret == size)
7512                 return -EFAULT;
7513
7514         size -= ret;
7515
7516         *ppos += size;
7517         info->read += size;
7518
7519         return size;
7520 }
7521
7522 static int tracing_buffers_release(struct inode *inode, struct file *file)
7523 {
7524         struct ftrace_buffer_info *info = file->private_data;
7525         struct trace_iterator *iter = &info->iter;
7526
7527         mutex_lock(&trace_types_lock);
7528
7529         iter->tr->current_trace->ref--;
7530
7531         __trace_array_put(iter->tr);
7532
7533         if (info->spare)
7534                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7535                                            info->spare_cpu, info->spare);
7536         kfree(info);
7537
7538         mutex_unlock(&trace_types_lock);
7539
7540         return 0;
7541 }
7542
7543 struct buffer_ref {
7544         struct trace_buffer     *buffer;
7545         void                    *page;
7546         int                     cpu;
7547         refcount_t              refcount;
7548 };
7549
7550 static void buffer_ref_release(struct buffer_ref *ref)
7551 {
7552         if (!refcount_dec_and_test(&ref->refcount))
7553                 return;
7554         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7555         kfree(ref);
7556 }
7557
7558 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7559                                     struct pipe_buffer *buf)
7560 {
7561         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7562
7563         buffer_ref_release(ref);
7564         buf->private = 0;
7565 }
7566
7567 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7568                                 struct pipe_buffer *buf)
7569 {
7570         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7571
7572         if (refcount_read(&ref->refcount) > INT_MAX/2)
7573                 return false;
7574
7575         refcount_inc(&ref->refcount);
7576         return true;
7577 }
7578
7579 /* Pipe buffer operations for a buffer. */
7580 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7581         .release                = buffer_pipe_buf_release,
7582         .get                    = buffer_pipe_buf_get,
7583 };
7584
7585 /*
7586  * Callback from splice_to_pipe(), if we need to release some pages
7587  * at the end of the spd in case we error'ed out in filling the pipe.
7588  */
7589 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7590 {
7591         struct buffer_ref *ref =
7592                 (struct buffer_ref *)spd->partial[i].private;
7593
7594         buffer_ref_release(ref);
7595         spd->partial[i].private = 0;
7596 }
7597
7598 static ssize_t
7599 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7600                             struct pipe_inode_info *pipe, size_t len,
7601                             unsigned int flags)
7602 {
7603         struct ftrace_buffer_info *info = file->private_data;
7604         struct trace_iterator *iter = &info->iter;
7605         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7606         struct page *pages_def[PIPE_DEF_BUFFERS];
7607         struct splice_pipe_desc spd = {
7608                 .pages          = pages_def,
7609                 .partial        = partial_def,
7610                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7611                 .ops            = &buffer_pipe_buf_ops,
7612                 .spd_release    = buffer_spd_release,
7613         };
7614         struct buffer_ref *ref;
7615         int entries, i;
7616         ssize_t ret = 0;
7617
7618 #ifdef CONFIG_TRACER_MAX_TRACE
7619         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7620                 return -EBUSY;
7621 #endif
7622
7623         if (*ppos & (PAGE_SIZE - 1))
7624                 return -EINVAL;
7625
7626         if (len & (PAGE_SIZE - 1)) {
7627                 if (len < PAGE_SIZE)
7628                         return -EINVAL;
7629                 len &= PAGE_MASK;
7630         }
7631
7632         if (splice_grow_spd(pipe, &spd))
7633                 return -ENOMEM;
7634
7635  again:
7636         trace_access_lock(iter->cpu_file);
7637         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7638
7639         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7640                 struct page *page;
7641                 int r;
7642
7643                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7644                 if (!ref) {
7645                         ret = -ENOMEM;
7646                         break;
7647                 }
7648
7649                 refcount_set(&ref->refcount, 1);
7650                 ref->buffer = iter->array_buffer->buffer;
7651                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7652                 if (IS_ERR(ref->page)) {
7653                         ret = PTR_ERR(ref->page);
7654                         ref->page = NULL;
7655                         kfree(ref);
7656                         break;
7657                 }
7658                 ref->cpu = iter->cpu_file;
7659
7660                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7661                                           len, iter->cpu_file, 1);
7662                 if (r < 0) {
7663                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7664                                                    ref->page);
7665                         kfree(ref);
7666                         break;
7667                 }
7668
7669                 page = virt_to_page(ref->page);
7670
7671                 spd.pages[i] = page;
7672                 spd.partial[i].len = PAGE_SIZE;
7673                 spd.partial[i].offset = 0;
7674                 spd.partial[i].private = (unsigned long)ref;
7675                 spd.nr_pages++;
7676                 *ppos += PAGE_SIZE;
7677
7678                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7679         }
7680
7681         trace_access_unlock(iter->cpu_file);
7682         spd.nr_pages = i;
7683
7684         /* did we read anything? */
7685         if (!spd.nr_pages) {
7686                 if (ret)
7687                         goto out;
7688
7689                 ret = -EAGAIN;
7690                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7691                         goto out;
7692
7693                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7694                 if (ret)
7695                         goto out;
7696
7697                 goto again;
7698         }
7699
7700         ret = splice_to_pipe(pipe, &spd);
7701 out:
7702         splice_shrink_spd(&spd);
7703
7704         return ret;
7705 }
7706
7707 static const struct file_operations tracing_buffers_fops = {
7708         .open           = tracing_buffers_open,
7709         .read           = tracing_buffers_read,
7710         .poll           = tracing_buffers_poll,
7711         .release        = tracing_buffers_release,
7712         .splice_read    = tracing_buffers_splice_read,
7713         .llseek         = no_llseek,
7714 };
7715
7716 static ssize_t
7717 tracing_stats_read(struct file *filp, char __user *ubuf,
7718                    size_t count, loff_t *ppos)
7719 {
7720         struct inode *inode = file_inode(filp);
7721         struct trace_array *tr = inode->i_private;
7722         struct array_buffer *trace_buf = &tr->array_buffer;
7723         int cpu = tracing_get_cpu(inode);
7724         struct trace_seq *s;
7725         unsigned long cnt;
7726         unsigned long long t;
7727         unsigned long usec_rem;
7728
7729         s = kmalloc(sizeof(*s), GFP_KERNEL);
7730         if (!s)
7731                 return -ENOMEM;
7732
7733         trace_seq_init(s);
7734
7735         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7736         trace_seq_printf(s, "entries: %ld\n", cnt);
7737
7738         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7739         trace_seq_printf(s, "overrun: %ld\n", cnt);
7740
7741         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7742         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7743
7744         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7745         trace_seq_printf(s, "bytes: %ld\n", cnt);
7746
7747         if (trace_clocks[tr->clock_id].in_ns) {
7748                 /* local or global for trace_clock */
7749                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7750                 usec_rem = do_div(t, USEC_PER_SEC);
7751                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7752                                                                 t, usec_rem);
7753
7754                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7755                 usec_rem = do_div(t, USEC_PER_SEC);
7756                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7757         } else {
7758                 /* counter or tsc mode for trace_clock */
7759                 trace_seq_printf(s, "oldest event ts: %llu\n",
7760                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7761
7762                 trace_seq_printf(s, "now ts: %llu\n",
7763                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7764         }
7765
7766         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7767         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7768
7769         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7770         trace_seq_printf(s, "read events: %ld\n", cnt);
7771
7772         count = simple_read_from_buffer(ubuf, count, ppos,
7773                                         s->buffer, trace_seq_used(s));
7774
7775         kfree(s);
7776
7777         return count;
7778 }
7779
7780 static const struct file_operations tracing_stats_fops = {
7781         .open           = tracing_open_generic_tr,
7782         .read           = tracing_stats_read,
7783         .llseek         = generic_file_llseek,
7784         .release        = tracing_release_generic_tr,
7785 };
7786
7787 #ifdef CONFIG_DYNAMIC_FTRACE
7788
7789 static ssize_t
7790 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7791                   size_t cnt, loff_t *ppos)
7792 {
7793         ssize_t ret;
7794         char *buf;
7795         int r;
7796
7797         /* 256 should be plenty to hold the amount needed */
7798         buf = kmalloc(256, GFP_KERNEL);
7799         if (!buf)
7800                 return -ENOMEM;
7801
7802         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7803                       ftrace_update_tot_cnt,
7804                       ftrace_number_of_pages,
7805                       ftrace_number_of_groups);
7806
7807         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7808         kfree(buf);
7809         return ret;
7810 }
7811
7812 static const struct file_operations tracing_dyn_info_fops = {
7813         .open           = tracing_open_generic,
7814         .read           = tracing_read_dyn_info,
7815         .llseek         = generic_file_llseek,
7816 };
7817 #endif /* CONFIG_DYNAMIC_FTRACE */
7818
7819 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7820 static void
7821 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7822                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7823                 void *data)
7824 {
7825         tracing_snapshot_instance(tr);
7826 }
7827
7828 static void
7829 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7830                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7831                       void *data)
7832 {
7833         struct ftrace_func_mapper *mapper = data;
7834         long *count = NULL;
7835
7836         if (mapper)
7837                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7838
7839         if (count) {
7840
7841                 if (*count <= 0)
7842                         return;
7843
7844                 (*count)--;
7845         }
7846
7847         tracing_snapshot_instance(tr);
7848 }
7849
7850 static int
7851 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7852                       struct ftrace_probe_ops *ops, void *data)
7853 {
7854         struct ftrace_func_mapper *mapper = data;
7855         long *count = NULL;
7856
7857         seq_printf(m, "%ps:", (void *)ip);
7858
7859         seq_puts(m, "snapshot");
7860
7861         if (mapper)
7862                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7863
7864         if (count)
7865                 seq_printf(m, ":count=%ld\n", *count);
7866         else
7867                 seq_puts(m, ":unlimited\n");
7868
7869         return 0;
7870 }
7871
7872 static int
7873 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7874                      unsigned long ip, void *init_data, void **data)
7875 {
7876         struct ftrace_func_mapper *mapper = *data;
7877
7878         if (!mapper) {
7879                 mapper = allocate_ftrace_func_mapper();
7880                 if (!mapper)
7881                         return -ENOMEM;
7882                 *data = mapper;
7883         }
7884
7885         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7886 }
7887
7888 static void
7889 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7890                      unsigned long ip, void *data)
7891 {
7892         struct ftrace_func_mapper *mapper = data;
7893
7894         if (!ip) {
7895                 if (!mapper)
7896                         return;
7897                 free_ftrace_func_mapper(mapper, NULL);
7898                 return;
7899         }
7900
7901         ftrace_func_mapper_remove_ip(mapper, ip);
7902 }
7903
7904 static struct ftrace_probe_ops snapshot_probe_ops = {
7905         .func                   = ftrace_snapshot,
7906         .print                  = ftrace_snapshot_print,
7907 };
7908
7909 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7910         .func                   = ftrace_count_snapshot,
7911         .print                  = ftrace_snapshot_print,
7912         .init                   = ftrace_snapshot_init,
7913         .free                   = ftrace_snapshot_free,
7914 };
7915
7916 static int
7917 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7918                                char *glob, char *cmd, char *param, int enable)
7919 {
7920         struct ftrace_probe_ops *ops;
7921         void *count = (void *)-1;
7922         char *number;
7923         int ret;
7924
7925         if (!tr)
7926                 return -ENODEV;
7927
7928         /* hash funcs only work with set_ftrace_filter */
7929         if (!enable)
7930                 return -EINVAL;
7931
7932         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7933
7934         if (glob[0] == '!')
7935                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7936
7937         if (!param)
7938                 goto out_reg;
7939
7940         number = strsep(&param, ":");
7941
7942         if (!strlen(number))
7943                 goto out_reg;
7944
7945         /*
7946          * We use the callback data field (which is a pointer)
7947          * as our counter.
7948          */
7949         ret = kstrtoul(number, 0, (unsigned long *)&count);
7950         if (ret)
7951                 return ret;
7952
7953  out_reg:
7954         ret = tracing_alloc_snapshot_instance(tr);
7955         if (ret < 0)
7956                 goto out;
7957
7958         ret = register_ftrace_function_probe(glob, tr, ops, count);
7959
7960  out:
7961         return ret < 0 ? ret : 0;
7962 }
7963
7964 static struct ftrace_func_command ftrace_snapshot_cmd = {
7965         .name                   = "snapshot",
7966         .func                   = ftrace_trace_snapshot_callback,
7967 };
7968
7969 static __init int register_snapshot_cmd(void)
7970 {
7971         return register_ftrace_command(&ftrace_snapshot_cmd);
7972 }
7973 #else
7974 static inline __init int register_snapshot_cmd(void) { return 0; }
7975 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7976
7977 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7978 {
7979         if (WARN_ON(!tr->dir))
7980                 return ERR_PTR(-ENODEV);
7981
7982         /* Top directory uses NULL as the parent */
7983         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7984                 return NULL;
7985
7986         /* All sub buffers have a descriptor */
7987         return tr->dir;
7988 }
7989
7990 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7991 {
7992         struct dentry *d_tracer;
7993
7994         if (tr->percpu_dir)
7995                 return tr->percpu_dir;
7996
7997         d_tracer = tracing_get_dentry(tr);
7998         if (IS_ERR(d_tracer))
7999                 return NULL;
8000
8001         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8002
8003         MEM_FAIL(!tr->percpu_dir,
8004                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8005
8006         return tr->percpu_dir;
8007 }
8008
8009 static struct dentry *
8010 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8011                       void *data, long cpu, const struct file_operations *fops)
8012 {
8013         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8014
8015         if (ret) /* See tracing_get_cpu() */
8016                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8017         return ret;
8018 }
8019
8020 static void
8021 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8022 {
8023         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8024         struct dentry *d_cpu;
8025         char cpu_dir[30]; /* 30 characters should be more than enough */
8026
8027         if (!d_percpu)
8028                 return;
8029
8030         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8031         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8032         if (!d_cpu) {
8033                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8034                 return;
8035         }
8036
8037         /* per cpu trace_pipe */
8038         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8039                                 tr, cpu, &tracing_pipe_fops);
8040
8041         /* per cpu trace */
8042         trace_create_cpu_file("trace", 0644, d_cpu,
8043                                 tr, cpu, &tracing_fops);
8044
8045         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8046                                 tr, cpu, &tracing_buffers_fops);
8047
8048         trace_create_cpu_file("stats", 0444, d_cpu,
8049                                 tr, cpu, &tracing_stats_fops);
8050
8051         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8052                                 tr, cpu, &tracing_entries_fops);
8053
8054 #ifdef CONFIG_TRACER_SNAPSHOT
8055         trace_create_cpu_file("snapshot", 0644, d_cpu,
8056                                 tr, cpu, &snapshot_fops);
8057
8058         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8059                                 tr, cpu, &snapshot_raw_fops);
8060 #endif
8061 }
8062
8063 #ifdef CONFIG_FTRACE_SELFTEST
8064 /* Let selftest have access to static functions in this file */
8065 #include "trace_selftest.c"
8066 #endif
8067
8068 static ssize_t
8069 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8070                         loff_t *ppos)
8071 {
8072         struct trace_option_dentry *topt = filp->private_data;
8073         char *buf;
8074
8075         if (topt->flags->val & topt->opt->bit)
8076                 buf = "1\n";
8077         else
8078                 buf = "0\n";
8079
8080         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8081 }
8082
8083 static ssize_t
8084 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8085                          loff_t *ppos)
8086 {
8087         struct trace_option_dentry *topt = filp->private_data;
8088         unsigned long val;
8089         int ret;
8090
8091         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8092         if (ret)
8093                 return ret;
8094
8095         if (val != 0 && val != 1)
8096                 return -EINVAL;
8097
8098         if (!!(topt->flags->val & topt->opt->bit) != val) {
8099                 mutex_lock(&trace_types_lock);
8100                 ret = __set_tracer_option(topt->tr, topt->flags,
8101                                           topt->opt, !val);
8102                 mutex_unlock(&trace_types_lock);
8103                 if (ret)
8104                         return ret;
8105         }
8106
8107         *ppos += cnt;
8108
8109         return cnt;
8110 }
8111
8112
8113 static const struct file_operations trace_options_fops = {
8114         .open = tracing_open_generic,
8115         .read = trace_options_read,
8116         .write = trace_options_write,
8117         .llseek = generic_file_llseek,
8118 };
8119
8120 /*
8121  * In order to pass in both the trace_array descriptor as well as the index
8122  * to the flag that the trace option file represents, the trace_array
8123  * has a character array of trace_flags_index[], which holds the index
8124  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8125  * The address of this character array is passed to the flag option file
8126  * read/write callbacks.
8127  *
8128  * In order to extract both the index and the trace_array descriptor,
8129  * get_tr_index() uses the following algorithm.
8130  *
8131  *   idx = *ptr;
8132  *
8133  * As the pointer itself contains the address of the index (remember
8134  * index[1] == 1).
8135  *
8136  * Then to get the trace_array descriptor, by subtracting that index
8137  * from the ptr, we get to the start of the index itself.
8138  *
8139  *   ptr - idx == &index[0]
8140  *
8141  * Then a simple container_of() from that pointer gets us to the
8142  * trace_array descriptor.
8143  */
8144 static void get_tr_index(void *data, struct trace_array **ptr,
8145                          unsigned int *pindex)
8146 {
8147         *pindex = *(unsigned char *)data;
8148
8149         *ptr = container_of(data - *pindex, struct trace_array,
8150                             trace_flags_index);
8151 }
8152
8153 static ssize_t
8154 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8155                         loff_t *ppos)
8156 {
8157         void *tr_index = filp->private_data;
8158         struct trace_array *tr;
8159         unsigned int index;
8160         char *buf;
8161
8162         get_tr_index(tr_index, &tr, &index);
8163
8164         if (tr->trace_flags & (1 << index))
8165                 buf = "1\n";
8166         else
8167                 buf = "0\n";
8168
8169         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8170 }
8171
8172 static ssize_t
8173 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8174                          loff_t *ppos)
8175 {
8176         void *tr_index = filp->private_data;
8177         struct trace_array *tr;
8178         unsigned int index;
8179         unsigned long val;
8180         int ret;
8181
8182         get_tr_index(tr_index, &tr, &index);
8183
8184         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8185         if (ret)
8186                 return ret;
8187
8188         if (val != 0 && val != 1)
8189                 return -EINVAL;
8190
8191         mutex_lock(&event_mutex);
8192         mutex_lock(&trace_types_lock);
8193         ret = set_tracer_flag(tr, 1 << index, val);
8194         mutex_unlock(&trace_types_lock);
8195         mutex_unlock(&event_mutex);
8196
8197         if (ret < 0)
8198                 return ret;
8199
8200         *ppos += cnt;
8201
8202         return cnt;
8203 }
8204
8205 static const struct file_operations trace_options_core_fops = {
8206         .open = tracing_open_generic,
8207         .read = trace_options_core_read,
8208         .write = trace_options_core_write,
8209         .llseek = generic_file_llseek,
8210 };
8211
8212 struct dentry *trace_create_file(const char *name,
8213                                  umode_t mode,
8214                                  struct dentry *parent,
8215                                  void *data,
8216                                  const struct file_operations *fops)
8217 {
8218         struct dentry *ret;
8219
8220         ret = tracefs_create_file(name, mode, parent, data, fops);
8221         if (!ret)
8222                 pr_warn("Could not create tracefs '%s' entry\n", name);
8223
8224         return ret;
8225 }
8226
8227
8228 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8229 {
8230         struct dentry *d_tracer;
8231
8232         if (tr->options)
8233                 return tr->options;
8234
8235         d_tracer = tracing_get_dentry(tr);
8236         if (IS_ERR(d_tracer))
8237                 return NULL;
8238
8239         tr->options = tracefs_create_dir("options", d_tracer);
8240         if (!tr->options) {
8241                 pr_warn("Could not create tracefs directory 'options'\n");
8242                 return NULL;
8243         }
8244
8245         return tr->options;
8246 }
8247
8248 static void
8249 create_trace_option_file(struct trace_array *tr,
8250                          struct trace_option_dentry *topt,
8251                          struct tracer_flags *flags,
8252                          struct tracer_opt *opt)
8253 {
8254         struct dentry *t_options;
8255
8256         t_options = trace_options_init_dentry(tr);
8257         if (!t_options)
8258                 return;
8259
8260         topt->flags = flags;
8261         topt->opt = opt;
8262         topt->tr = tr;
8263
8264         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8265                                     &trace_options_fops);
8266
8267 }
8268
8269 static void
8270 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8271 {
8272         struct trace_option_dentry *topts;
8273         struct trace_options *tr_topts;
8274         struct tracer_flags *flags;
8275         struct tracer_opt *opts;
8276         int cnt;
8277         int i;
8278
8279         if (!tracer)
8280                 return;
8281
8282         flags = tracer->flags;
8283
8284         if (!flags || !flags->opts)
8285                 return;
8286
8287         /*
8288          * If this is an instance, only create flags for tracers
8289          * the instance may have.
8290          */
8291         if (!trace_ok_for_array(tracer, tr))
8292                 return;
8293
8294         for (i = 0; i < tr->nr_topts; i++) {
8295                 /* Make sure there's no duplicate flags. */
8296                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8297                         return;
8298         }
8299
8300         opts = flags->opts;
8301
8302         for (cnt = 0; opts[cnt].name; cnt++)
8303                 ;
8304
8305         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8306         if (!topts)
8307                 return;
8308
8309         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8310                             GFP_KERNEL);
8311         if (!tr_topts) {
8312                 kfree(topts);
8313                 return;
8314         }
8315
8316         tr->topts = tr_topts;
8317         tr->topts[tr->nr_topts].tracer = tracer;
8318         tr->topts[tr->nr_topts].topts = topts;
8319         tr->nr_topts++;
8320
8321         for (cnt = 0; opts[cnt].name; cnt++) {
8322                 create_trace_option_file(tr, &topts[cnt], flags,
8323                                          &opts[cnt]);
8324                 MEM_FAIL(topts[cnt].entry == NULL,
8325                           "Failed to create trace option: %s",
8326                           opts[cnt].name);
8327         }
8328 }
8329
8330 static struct dentry *
8331 create_trace_option_core_file(struct trace_array *tr,
8332                               const char *option, long index)
8333 {
8334         struct dentry *t_options;
8335
8336         t_options = trace_options_init_dentry(tr);
8337         if (!t_options)
8338                 return NULL;
8339
8340         return trace_create_file(option, 0644, t_options,
8341                                  (void *)&tr->trace_flags_index[index],
8342                                  &trace_options_core_fops);
8343 }
8344
8345 static void create_trace_options_dir(struct trace_array *tr)
8346 {
8347         struct dentry *t_options;
8348         bool top_level = tr == &global_trace;
8349         int i;
8350
8351         t_options = trace_options_init_dentry(tr);
8352         if (!t_options)
8353                 return;
8354
8355         for (i = 0; trace_options[i]; i++) {
8356                 if (top_level ||
8357                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8358                         create_trace_option_core_file(tr, trace_options[i], i);
8359         }
8360 }
8361
8362 static ssize_t
8363 rb_simple_read(struct file *filp, char __user *ubuf,
8364                size_t cnt, loff_t *ppos)
8365 {
8366         struct trace_array *tr = filp->private_data;
8367         char buf[64];
8368         int r;
8369
8370         r = tracer_tracing_is_on(tr);
8371         r = sprintf(buf, "%d\n", r);
8372
8373         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8374 }
8375
8376 static ssize_t
8377 rb_simple_write(struct file *filp, const char __user *ubuf,
8378                 size_t cnt, loff_t *ppos)
8379 {
8380         struct trace_array *tr = filp->private_data;
8381         struct trace_buffer *buffer = tr->array_buffer.buffer;
8382         unsigned long val;
8383         int ret;
8384
8385         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8386         if (ret)
8387                 return ret;
8388
8389         if (buffer) {
8390                 mutex_lock(&trace_types_lock);
8391                 if (!!val == tracer_tracing_is_on(tr)) {
8392                         val = 0; /* do nothing */
8393                 } else if (val) {
8394                         tracer_tracing_on(tr);
8395                         if (tr->current_trace->start)
8396                                 tr->current_trace->start(tr);
8397                 } else {
8398                         tracer_tracing_off(tr);
8399                         if (tr->current_trace->stop)
8400                                 tr->current_trace->stop(tr);
8401                 }
8402                 mutex_unlock(&trace_types_lock);
8403         }
8404
8405         (*ppos)++;
8406
8407         return cnt;
8408 }
8409
8410 static const struct file_operations rb_simple_fops = {
8411         .open           = tracing_open_generic_tr,
8412         .read           = rb_simple_read,
8413         .write          = rb_simple_write,
8414         .release        = tracing_release_generic_tr,
8415         .llseek         = default_llseek,
8416 };
8417
8418 static ssize_t
8419 buffer_percent_read(struct file *filp, char __user *ubuf,
8420                     size_t cnt, loff_t *ppos)
8421 {
8422         struct trace_array *tr = filp->private_data;
8423         char buf[64];
8424         int r;
8425
8426         r = tr->buffer_percent;
8427         r = sprintf(buf, "%d\n", r);
8428
8429         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8430 }
8431
8432 static ssize_t
8433 buffer_percent_write(struct file *filp, const char __user *ubuf,
8434                      size_t cnt, loff_t *ppos)
8435 {
8436         struct trace_array *tr = filp->private_data;
8437         unsigned long val;
8438         int ret;
8439
8440         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8441         if (ret)
8442                 return ret;
8443
8444         if (val > 100)
8445                 return -EINVAL;
8446
8447         if (!val)
8448                 val = 1;
8449
8450         tr->buffer_percent = val;
8451
8452         (*ppos)++;
8453
8454         return cnt;
8455 }
8456
8457 static const struct file_operations buffer_percent_fops = {
8458         .open           = tracing_open_generic_tr,
8459         .read           = buffer_percent_read,
8460         .write          = buffer_percent_write,
8461         .release        = tracing_release_generic_tr,
8462         .llseek         = default_llseek,
8463 };
8464
8465 static struct dentry *trace_instance_dir;
8466
8467 static void
8468 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8469
8470 static int
8471 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8472 {
8473         enum ring_buffer_flags rb_flags;
8474
8475         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8476
8477         buf->tr = tr;
8478
8479         buf->buffer = ring_buffer_alloc(size, rb_flags);
8480         if (!buf->buffer)
8481                 return -ENOMEM;
8482
8483         buf->data = alloc_percpu(struct trace_array_cpu);
8484         if (!buf->data) {
8485                 ring_buffer_free(buf->buffer);
8486                 buf->buffer = NULL;
8487                 return -ENOMEM;
8488         }
8489
8490         /* Allocate the first page for all buffers */
8491         set_buffer_entries(&tr->array_buffer,
8492                            ring_buffer_size(tr->array_buffer.buffer, 0));
8493
8494         return 0;
8495 }
8496
8497 static int allocate_trace_buffers(struct trace_array *tr, int size)
8498 {
8499         int ret;
8500
8501         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8502         if (ret)
8503                 return ret;
8504
8505 #ifdef CONFIG_TRACER_MAX_TRACE
8506         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8507                                     allocate_snapshot ? size : 1);
8508         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8509                 ring_buffer_free(tr->array_buffer.buffer);
8510                 tr->array_buffer.buffer = NULL;
8511                 free_percpu(tr->array_buffer.data);
8512                 tr->array_buffer.data = NULL;
8513                 return -ENOMEM;
8514         }
8515         tr->allocated_snapshot = allocate_snapshot;
8516
8517         /*
8518          * Only the top level trace array gets its snapshot allocated
8519          * from the kernel command line.
8520          */
8521         allocate_snapshot = false;
8522 #endif
8523
8524         return 0;
8525 }
8526
8527 static void free_trace_buffer(struct array_buffer *buf)
8528 {
8529         if (buf->buffer) {
8530                 ring_buffer_free(buf->buffer);
8531                 buf->buffer = NULL;
8532                 free_percpu(buf->data);
8533                 buf->data = NULL;
8534         }
8535 }
8536
8537 static void free_trace_buffers(struct trace_array *tr)
8538 {
8539         if (!tr)
8540                 return;
8541
8542         free_trace_buffer(&tr->array_buffer);
8543
8544 #ifdef CONFIG_TRACER_MAX_TRACE
8545         free_trace_buffer(&tr->max_buffer);
8546 #endif
8547 }
8548
8549 static void init_trace_flags_index(struct trace_array *tr)
8550 {
8551         int i;
8552
8553         /* Used by the trace options files */
8554         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8555                 tr->trace_flags_index[i] = i;
8556 }
8557
8558 static void __update_tracer_options(struct trace_array *tr)
8559 {
8560         struct tracer *t;
8561
8562         for (t = trace_types; t; t = t->next)
8563                 add_tracer_options(tr, t);
8564 }
8565
8566 static void update_tracer_options(struct trace_array *tr)
8567 {
8568         mutex_lock(&trace_types_lock);
8569         __update_tracer_options(tr);
8570         mutex_unlock(&trace_types_lock);
8571 }
8572
8573 /* Must have trace_types_lock held */
8574 struct trace_array *trace_array_find(const char *instance)
8575 {
8576         struct trace_array *tr, *found = NULL;
8577
8578         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8579                 if (tr->name && strcmp(tr->name, instance) == 0) {
8580                         found = tr;
8581                         break;
8582                 }
8583         }
8584
8585         return found;
8586 }
8587
8588 struct trace_array *trace_array_find_get(const char *instance)
8589 {
8590         struct trace_array *tr;
8591
8592         mutex_lock(&trace_types_lock);
8593         tr = trace_array_find(instance);
8594         if (tr)
8595                 tr->ref++;
8596         mutex_unlock(&trace_types_lock);
8597
8598         return tr;
8599 }
8600
8601 static struct trace_array *trace_array_create(const char *name)
8602 {
8603         struct trace_array *tr;
8604         int ret;
8605
8606         ret = -ENOMEM;
8607         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8608         if (!tr)
8609                 return ERR_PTR(ret);
8610
8611         tr->name = kstrdup(name, GFP_KERNEL);
8612         if (!tr->name)
8613                 goto out_free_tr;
8614
8615         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8616                 goto out_free_tr;
8617
8618         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8619
8620         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8621
8622         raw_spin_lock_init(&tr->start_lock);
8623
8624         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8625
8626         tr->current_trace = &nop_trace;
8627
8628         INIT_LIST_HEAD(&tr->systems);
8629         INIT_LIST_HEAD(&tr->events);
8630         INIT_LIST_HEAD(&tr->hist_vars);
8631         INIT_LIST_HEAD(&tr->err_log);
8632
8633         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8634                 goto out_free_tr;
8635
8636         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8637         if (!tr->dir)
8638                 goto out_free_tr;
8639
8640         ret = event_trace_add_tracer(tr->dir, tr);
8641         if (ret) {
8642                 tracefs_remove(tr->dir);
8643                 goto out_free_tr;
8644         }
8645
8646         ftrace_init_trace_array(tr);
8647
8648         init_tracer_tracefs(tr, tr->dir);
8649         init_trace_flags_index(tr);
8650         __update_tracer_options(tr);
8651
8652         list_add(&tr->list, &ftrace_trace_arrays);
8653
8654         tr->ref++;
8655
8656
8657         return tr;
8658
8659  out_free_tr:
8660         free_trace_buffers(tr);
8661         free_cpumask_var(tr->tracing_cpumask);
8662         kfree(tr->name);
8663         kfree(tr);
8664
8665         return ERR_PTR(ret);
8666 }
8667
8668 static int instance_mkdir(const char *name)
8669 {
8670         struct trace_array *tr;
8671         int ret;
8672
8673         mutex_lock(&event_mutex);
8674         mutex_lock(&trace_types_lock);
8675
8676         ret = -EEXIST;
8677         if (trace_array_find(name))
8678                 goto out_unlock;
8679
8680         tr = trace_array_create(name);
8681
8682         ret = PTR_ERR_OR_ZERO(tr);
8683
8684 out_unlock:
8685         mutex_unlock(&trace_types_lock);
8686         mutex_unlock(&event_mutex);
8687         return ret;
8688 }
8689
8690 /**
8691  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8692  * @name: The name of the trace array to be looked up/created.
8693  *
8694  * Returns pointer to trace array with given name.
8695  * NULL, if it cannot be created.
8696  *
8697  * NOTE: This function increments the reference counter associated with the
8698  * trace array returned. This makes sure it cannot be freed while in use.
8699  * Use trace_array_put() once the trace array is no longer needed.
8700  * If the trace_array is to be freed, trace_array_destroy() needs to
8701  * be called after the trace_array_put(), or simply let user space delete
8702  * it from the tracefs instances directory. But until the
8703  * trace_array_put() is called, user space can not delete it.
8704  *
8705  */
8706 struct trace_array *trace_array_get_by_name(const char *name)
8707 {
8708         struct trace_array *tr;
8709
8710         mutex_lock(&event_mutex);
8711         mutex_lock(&trace_types_lock);
8712
8713         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8714                 if (tr->name && strcmp(tr->name, name) == 0)
8715                         goto out_unlock;
8716         }
8717
8718         tr = trace_array_create(name);
8719
8720         if (IS_ERR(tr))
8721                 tr = NULL;
8722 out_unlock:
8723         if (tr)
8724                 tr->ref++;
8725
8726         mutex_unlock(&trace_types_lock);
8727         mutex_unlock(&event_mutex);
8728         return tr;
8729 }
8730 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8731
8732 static int __remove_instance(struct trace_array *tr)
8733 {
8734         int i;
8735
8736         /* Reference counter for a newly created trace array = 1. */
8737         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8738                 return -EBUSY;
8739
8740         list_del(&tr->list);
8741
8742         /* Disable all the flags that were enabled coming in */
8743         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8744                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8745                         set_tracer_flag(tr, 1 << i, 0);
8746         }
8747
8748         tracing_set_nop(tr);
8749         clear_ftrace_function_probes(tr);
8750         event_trace_del_tracer(tr);
8751         ftrace_clear_pids(tr);
8752         ftrace_destroy_function_files(tr);
8753         tracefs_remove(tr->dir);
8754         free_trace_buffers(tr);
8755
8756         for (i = 0; i < tr->nr_topts; i++) {
8757                 kfree(tr->topts[i].topts);
8758         }
8759         kfree(tr->topts);
8760
8761         free_cpumask_var(tr->tracing_cpumask);
8762         kfree(tr->name);
8763         kfree(tr);
8764         tr = NULL;
8765
8766         return 0;
8767 }
8768
8769 int trace_array_destroy(struct trace_array *this_tr)
8770 {
8771         struct trace_array *tr;
8772         int ret;
8773
8774         if (!this_tr)
8775                 return -EINVAL;
8776
8777         mutex_lock(&event_mutex);
8778         mutex_lock(&trace_types_lock);
8779
8780         ret = -ENODEV;
8781
8782         /* Making sure trace array exists before destroying it. */
8783         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8784                 if (tr == this_tr) {
8785                         ret = __remove_instance(tr);
8786                         break;
8787                 }
8788         }
8789
8790         mutex_unlock(&trace_types_lock);
8791         mutex_unlock(&event_mutex);
8792
8793         return ret;
8794 }
8795 EXPORT_SYMBOL_GPL(trace_array_destroy);
8796
8797 static int instance_rmdir(const char *name)
8798 {
8799         struct trace_array *tr;
8800         int ret;
8801
8802         mutex_lock(&event_mutex);
8803         mutex_lock(&trace_types_lock);
8804
8805         ret = -ENODEV;
8806         tr = trace_array_find(name);
8807         if (tr)
8808                 ret = __remove_instance(tr);
8809
8810         mutex_unlock(&trace_types_lock);
8811         mutex_unlock(&event_mutex);
8812
8813         return ret;
8814 }
8815
8816 static __init void create_trace_instances(struct dentry *d_tracer)
8817 {
8818         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8819                                                          instance_mkdir,
8820                                                          instance_rmdir);
8821         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8822                 return;
8823 }
8824
8825 static void
8826 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8827 {
8828         struct trace_event_file *file;
8829         int cpu;
8830
8831         trace_create_file("available_tracers", 0444, d_tracer,
8832                         tr, &show_traces_fops);
8833
8834         trace_create_file("current_tracer", 0644, d_tracer,
8835                         tr, &set_tracer_fops);
8836
8837         trace_create_file("tracing_cpumask", 0644, d_tracer,
8838                           tr, &tracing_cpumask_fops);
8839
8840         trace_create_file("trace_options", 0644, d_tracer,
8841                           tr, &tracing_iter_fops);
8842
8843         trace_create_file("trace", 0644, d_tracer,
8844                           tr, &tracing_fops);
8845
8846         trace_create_file("trace_pipe", 0444, d_tracer,
8847                           tr, &tracing_pipe_fops);
8848
8849         trace_create_file("buffer_size_kb", 0644, d_tracer,
8850                           tr, &tracing_entries_fops);
8851
8852         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8853                           tr, &tracing_total_entries_fops);
8854
8855         trace_create_file("free_buffer", 0200, d_tracer,
8856                           tr, &tracing_free_buffer_fops);
8857
8858         trace_create_file("trace_marker", 0220, d_tracer,
8859                           tr, &tracing_mark_fops);
8860
8861         file = __find_event_file(tr, "ftrace", "print");
8862         if (file && file->dir)
8863                 trace_create_file("trigger", 0644, file->dir, file,
8864                                   &event_trigger_fops);
8865         tr->trace_marker_file = file;
8866
8867         trace_create_file("trace_marker_raw", 0220, d_tracer,
8868                           tr, &tracing_mark_raw_fops);
8869
8870         trace_create_file("trace_clock", 0644, d_tracer, tr,
8871                           &trace_clock_fops);
8872
8873         trace_create_file("tracing_on", 0644, d_tracer,
8874                           tr, &rb_simple_fops);
8875
8876         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8877                           &trace_time_stamp_mode_fops);
8878
8879         tr->buffer_percent = 50;
8880
8881         trace_create_file("buffer_percent", 0444, d_tracer,
8882                         tr, &buffer_percent_fops);
8883
8884         create_trace_options_dir(tr);
8885
8886 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8887         trace_create_maxlat_file(tr, d_tracer);
8888 #endif
8889
8890         if (ftrace_create_function_files(tr, d_tracer))
8891                 MEM_FAIL(1, "Could not allocate function filter files");
8892
8893 #ifdef CONFIG_TRACER_SNAPSHOT
8894         trace_create_file("snapshot", 0644, d_tracer,
8895                           tr, &snapshot_fops);
8896 #endif
8897
8898         trace_create_file("error_log", 0644, d_tracer,
8899                           tr, &tracing_err_log_fops);
8900
8901         for_each_tracing_cpu(cpu)
8902                 tracing_init_tracefs_percpu(tr, cpu);
8903
8904         ftrace_init_tracefs(tr, d_tracer);
8905 }
8906
8907 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8908 {
8909         struct vfsmount *mnt;
8910         struct file_system_type *type;
8911
8912         /*
8913          * To maintain backward compatibility for tools that mount
8914          * debugfs to get to the tracing facility, tracefs is automatically
8915          * mounted to the debugfs/tracing directory.
8916          */
8917         type = get_fs_type("tracefs");
8918         if (!type)
8919                 return NULL;
8920         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8921         put_filesystem(type);
8922         if (IS_ERR(mnt))
8923                 return NULL;
8924         mntget(mnt);
8925
8926         return mnt;
8927 }
8928
8929 /**
8930  * tracing_init_dentry - initialize top level trace array
8931  *
8932  * This is called when creating files or directories in the tracing
8933  * directory. It is called via fs_initcall() by any of the boot up code
8934  * and expects to return the dentry of the top level tracing directory.
8935  */
8936 struct dentry *tracing_init_dentry(void)
8937 {
8938         struct trace_array *tr = &global_trace;
8939
8940         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8941                 pr_warn("Tracing disabled due to lockdown\n");
8942                 return ERR_PTR(-EPERM);
8943         }
8944
8945         /* The top level trace array uses  NULL as parent */
8946         if (tr->dir)
8947                 return NULL;
8948
8949         if (WARN_ON(!tracefs_initialized()) ||
8950                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8951                  WARN_ON(!debugfs_initialized())))
8952                 return ERR_PTR(-ENODEV);
8953
8954         /*
8955          * As there may still be users that expect the tracing
8956          * files to exist in debugfs/tracing, we must automount
8957          * the tracefs file system there, so older tools still
8958          * work with the newer kerenl.
8959          */
8960         tr->dir = debugfs_create_automount("tracing", NULL,
8961                                            trace_automount, NULL);
8962
8963         return NULL;
8964 }
8965
8966 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8967 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8968
8969 static void __init trace_eval_init(void)
8970 {
8971         int len;
8972
8973         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8974         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8975 }
8976
8977 #ifdef CONFIG_MODULES
8978 static void trace_module_add_evals(struct module *mod)
8979 {
8980         if (!mod->num_trace_evals)
8981                 return;
8982
8983         /*
8984          * Modules with bad taint do not have events created, do
8985          * not bother with enums either.
8986          */
8987         if (trace_module_has_bad_taint(mod))
8988                 return;
8989
8990         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8991 }
8992
8993 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8994 static void trace_module_remove_evals(struct module *mod)
8995 {
8996         union trace_eval_map_item *map;
8997         union trace_eval_map_item **last = &trace_eval_maps;
8998
8999         if (!mod->num_trace_evals)
9000                 return;
9001
9002         mutex_lock(&trace_eval_mutex);
9003
9004         map = trace_eval_maps;
9005
9006         while (map) {
9007                 if (map->head.mod == mod)
9008                         break;
9009                 map = trace_eval_jmp_to_tail(map);
9010                 last = &map->tail.next;
9011                 map = map->tail.next;
9012         }
9013         if (!map)
9014                 goto out;
9015
9016         *last = trace_eval_jmp_to_tail(map)->tail.next;
9017         kfree(map);
9018  out:
9019         mutex_unlock(&trace_eval_mutex);
9020 }
9021 #else
9022 static inline void trace_module_remove_evals(struct module *mod) { }
9023 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9024
9025 static int trace_module_notify(struct notifier_block *self,
9026                                unsigned long val, void *data)
9027 {
9028         struct module *mod = data;
9029
9030         switch (val) {
9031         case MODULE_STATE_COMING:
9032                 trace_module_add_evals(mod);
9033                 break;
9034         case MODULE_STATE_GOING:
9035                 trace_module_remove_evals(mod);
9036                 break;
9037         }
9038
9039         return 0;
9040 }
9041
9042 static struct notifier_block trace_module_nb = {
9043         .notifier_call = trace_module_notify,
9044         .priority = 0,
9045 };
9046 #endif /* CONFIG_MODULES */
9047
9048 static __init int tracer_init_tracefs(void)
9049 {
9050         struct dentry *d_tracer;
9051
9052         trace_access_lock_init();
9053
9054         d_tracer = tracing_init_dentry();
9055         if (IS_ERR(d_tracer))
9056                 return 0;
9057
9058         event_trace_init();
9059
9060         init_tracer_tracefs(&global_trace, d_tracer);
9061         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9062
9063         trace_create_file("tracing_thresh", 0644, d_tracer,
9064                         &global_trace, &tracing_thresh_fops);
9065
9066         trace_create_file("README", 0444, d_tracer,
9067                         NULL, &tracing_readme_fops);
9068
9069         trace_create_file("saved_cmdlines", 0444, d_tracer,
9070                         NULL, &tracing_saved_cmdlines_fops);
9071
9072         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9073                           NULL, &tracing_saved_cmdlines_size_fops);
9074
9075         trace_create_file("saved_tgids", 0444, d_tracer,
9076                         NULL, &tracing_saved_tgids_fops);
9077
9078         trace_eval_init();
9079
9080         trace_create_eval_file(d_tracer);
9081
9082 #ifdef CONFIG_MODULES
9083         register_module_notifier(&trace_module_nb);
9084 #endif
9085
9086 #ifdef CONFIG_DYNAMIC_FTRACE
9087         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9088                         NULL, &tracing_dyn_info_fops);
9089 #endif
9090
9091         create_trace_instances(d_tracer);
9092
9093         update_tracer_options(&global_trace);
9094
9095         return 0;
9096 }
9097
9098 static int trace_panic_handler(struct notifier_block *this,
9099                                unsigned long event, void *unused)
9100 {
9101         if (ftrace_dump_on_oops)
9102                 ftrace_dump(ftrace_dump_on_oops);
9103         return NOTIFY_OK;
9104 }
9105
9106 static struct notifier_block trace_panic_notifier = {
9107         .notifier_call  = trace_panic_handler,
9108         .next           = NULL,
9109         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9110 };
9111
9112 static int trace_die_handler(struct notifier_block *self,
9113                              unsigned long val,
9114                              void *data)
9115 {
9116         switch (val) {
9117         case DIE_OOPS:
9118                 if (ftrace_dump_on_oops)
9119                         ftrace_dump(ftrace_dump_on_oops);
9120                 break;
9121         default:
9122                 break;
9123         }
9124         return NOTIFY_OK;
9125 }
9126
9127 static struct notifier_block trace_die_notifier = {
9128         .notifier_call = trace_die_handler,
9129         .priority = 200
9130 };
9131
9132 /*
9133  * printk is set to max of 1024, we really don't need it that big.
9134  * Nothing should be printing 1000 characters anyway.
9135  */
9136 #define TRACE_MAX_PRINT         1000
9137
9138 /*
9139  * Define here KERN_TRACE so that we have one place to modify
9140  * it if we decide to change what log level the ftrace dump
9141  * should be at.
9142  */
9143 #define KERN_TRACE              KERN_EMERG
9144
9145 void
9146 trace_printk_seq(struct trace_seq *s)
9147 {
9148         /* Probably should print a warning here. */
9149         if (s->seq.len >= TRACE_MAX_PRINT)
9150                 s->seq.len = TRACE_MAX_PRINT;
9151
9152         /*
9153          * More paranoid code. Although the buffer size is set to
9154          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9155          * an extra layer of protection.
9156          */
9157         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9158                 s->seq.len = s->seq.size - 1;
9159
9160         /* should be zero ended, but we are paranoid. */
9161         s->buffer[s->seq.len] = 0;
9162
9163         printk(KERN_TRACE "%s", s->buffer);
9164
9165         trace_seq_init(s);
9166 }
9167
9168 void trace_init_global_iter(struct trace_iterator *iter)
9169 {
9170         iter->tr = &global_trace;
9171         iter->trace = iter->tr->current_trace;
9172         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9173         iter->array_buffer = &global_trace.array_buffer;
9174
9175         if (iter->trace && iter->trace->open)
9176                 iter->trace->open(iter);
9177
9178         /* Annotate start of buffers if we had overruns */
9179         if (ring_buffer_overruns(iter->array_buffer->buffer))
9180                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9181
9182         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9183         if (trace_clocks[iter->tr->clock_id].in_ns)
9184                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9185 }
9186
9187 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9188 {
9189         /* use static because iter can be a bit big for the stack */
9190         static struct trace_iterator iter;
9191         static atomic_t dump_running;
9192         struct trace_array *tr = &global_trace;
9193         unsigned int old_userobj;
9194         unsigned long flags;
9195         int cnt = 0, cpu;
9196
9197         /* Only allow one dump user at a time. */
9198         if (atomic_inc_return(&dump_running) != 1) {
9199                 atomic_dec(&dump_running);
9200                 return;
9201         }
9202
9203         /*
9204          * Always turn off tracing when we dump.
9205          * We don't need to show trace output of what happens
9206          * between multiple crashes.
9207          *
9208          * If the user does a sysrq-z, then they can re-enable
9209          * tracing with echo 1 > tracing_on.
9210          */
9211         tracing_off();
9212
9213         local_irq_save(flags);
9214         printk_nmi_direct_enter();
9215
9216         /* Simulate the iterator */
9217         trace_init_global_iter(&iter);
9218         /* Can not use kmalloc for iter.temp */
9219         iter.temp = static_temp_buf;
9220         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9221
9222         for_each_tracing_cpu(cpu) {
9223                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9224         }
9225
9226         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9227
9228         /* don't look at user memory in panic mode */
9229         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9230
9231         switch (oops_dump_mode) {
9232         case DUMP_ALL:
9233                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9234                 break;
9235         case DUMP_ORIG:
9236                 iter.cpu_file = raw_smp_processor_id();
9237                 break;
9238         case DUMP_NONE:
9239                 goto out_enable;
9240         default:
9241                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9242                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9243         }
9244
9245         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9246
9247         /* Did function tracer already get disabled? */
9248         if (ftrace_is_dead()) {
9249                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9250                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9251         }
9252
9253         /*
9254          * We need to stop all tracing on all CPUS to read the
9255          * the next buffer. This is a bit expensive, but is
9256          * not done often. We fill all what we can read,
9257          * and then release the locks again.
9258          */
9259
9260         while (!trace_empty(&iter)) {
9261
9262                 if (!cnt)
9263                         printk(KERN_TRACE "---------------------------------\n");
9264
9265                 cnt++;
9266
9267                 trace_iterator_reset(&iter);
9268                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9269
9270                 if (trace_find_next_entry_inc(&iter) != NULL) {
9271                         int ret;
9272
9273                         ret = print_trace_line(&iter);
9274                         if (ret != TRACE_TYPE_NO_CONSUME)
9275                                 trace_consume(&iter);
9276                 }
9277                 touch_nmi_watchdog();
9278
9279                 trace_printk_seq(&iter.seq);
9280         }
9281
9282         if (!cnt)
9283                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9284         else
9285                 printk(KERN_TRACE "---------------------------------\n");
9286
9287  out_enable:
9288         tr->trace_flags |= old_userobj;
9289
9290         for_each_tracing_cpu(cpu) {
9291                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9292         }
9293         atomic_dec(&dump_running);
9294         printk_nmi_direct_exit();
9295         local_irq_restore(flags);
9296 }
9297 EXPORT_SYMBOL_GPL(ftrace_dump);
9298
9299 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9300 {
9301         char **argv;
9302         int argc, ret;
9303
9304         argc = 0;
9305         ret = 0;
9306         argv = argv_split(GFP_KERNEL, buf, &argc);
9307         if (!argv)
9308                 return -ENOMEM;
9309
9310         if (argc)
9311                 ret = createfn(argc, argv);
9312
9313         argv_free(argv);
9314
9315         return ret;
9316 }
9317
9318 #define WRITE_BUFSIZE  4096
9319
9320 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9321                                 size_t count, loff_t *ppos,
9322                                 int (*createfn)(int, char **))
9323 {
9324         char *kbuf, *buf, *tmp;
9325         int ret = 0;
9326         size_t done = 0;
9327         size_t size;
9328
9329         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9330         if (!kbuf)
9331                 return -ENOMEM;
9332
9333         while (done < count) {
9334                 size = count - done;
9335
9336                 if (size >= WRITE_BUFSIZE)
9337                         size = WRITE_BUFSIZE - 1;
9338
9339                 if (copy_from_user(kbuf, buffer + done, size)) {
9340                         ret = -EFAULT;
9341                         goto out;
9342                 }
9343                 kbuf[size] = '\0';
9344                 buf = kbuf;
9345                 do {
9346                         tmp = strchr(buf, '\n');
9347                         if (tmp) {
9348                                 *tmp = '\0';
9349                                 size = tmp - buf + 1;
9350                         } else {
9351                                 size = strlen(buf);
9352                                 if (done + size < count) {
9353                                         if (buf != kbuf)
9354                                                 break;
9355                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9356                                         pr_warn("Line length is too long: Should be less than %d\n",
9357                                                 WRITE_BUFSIZE - 2);
9358                                         ret = -EINVAL;
9359                                         goto out;
9360                                 }
9361                         }
9362                         done += size;
9363
9364                         /* Remove comments */
9365                         tmp = strchr(buf, '#');
9366
9367                         if (tmp)
9368                                 *tmp = '\0';
9369
9370                         ret = trace_run_command(buf, createfn);
9371                         if (ret)
9372                                 goto out;
9373                         buf += size;
9374
9375                 } while (done < count);
9376         }
9377         ret = done;
9378
9379 out:
9380         kfree(kbuf);
9381
9382         return ret;
9383 }
9384
9385 __init static int tracer_alloc_buffers(void)
9386 {
9387         int ring_buf_size;
9388         int ret = -ENOMEM;
9389
9390
9391         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9392                 pr_warn("Tracing disabled due to lockdown\n");
9393                 return -EPERM;
9394         }
9395
9396         /*
9397          * Make sure we don't accidently add more trace options
9398          * than we have bits for.
9399          */
9400         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9401
9402         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9403                 goto out;
9404
9405         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9406                 goto out_free_buffer_mask;
9407
9408         /* Only allocate trace_printk buffers if a trace_printk exists */
9409         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9410                 /* Must be called before global_trace.buffer is allocated */
9411                 trace_printk_init_buffers();
9412
9413         /* To save memory, keep the ring buffer size to its minimum */
9414         if (ring_buffer_expanded)
9415                 ring_buf_size = trace_buf_size;
9416         else
9417                 ring_buf_size = 1;
9418
9419         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9420         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9421
9422         raw_spin_lock_init(&global_trace.start_lock);
9423
9424         /*
9425          * The prepare callbacks allocates some memory for the ring buffer. We
9426          * don't free the buffer if the if the CPU goes down. If we were to free
9427          * the buffer, then the user would lose any trace that was in the
9428          * buffer. The memory will be removed once the "instance" is removed.
9429          */
9430         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9431                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9432                                       NULL);
9433         if (ret < 0)
9434                 goto out_free_cpumask;
9435         /* Used for event triggers */
9436         ret = -ENOMEM;
9437         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9438         if (!temp_buffer)
9439                 goto out_rm_hp_state;
9440
9441         if (trace_create_savedcmd() < 0)
9442                 goto out_free_temp_buffer;
9443
9444         /* TODO: make the number of buffers hot pluggable with CPUS */
9445         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9446                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9447                 goto out_free_savedcmd;
9448         }
9449
9450         if (global_trace.buffer_disabled)
9451                 tracing_off();
9452
9453         if (trace_boot_clock) {
9454                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9455                 if (ret < 0)
9456                         pr_warn("Trace clock %s not defined, going back to default\n",
9457                                 trace_boot_clock);
9458         }
9459
9460         /*
9461          * register_tracer() might reference current_trace, so it
9462          * needs to be set before we register anything. This is
9463          * just a bootstrap of current_trace anyway.
9464          */
9465         global_trace.current_trace = &nop_trace;
9466
9467         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9468
9469         ftrace_init_global_array_ops(&global_trace);
9470
9471         init_trace_flags_index(&global_trace);
9472
9473         register_tracer(&nop_trace);
9474
9475         /* Function tracing may start here (via kernel command line) */
9476         init_function_trace();
9477
9478         /* All seems OK, enable tracing */
9479         tracing_disabled = 0;
9480
9481         atomic_notifier_chain_register(&panic_notifier_list,
9482                                        &trace_panic_notifier);
9483
9484         register_die_notifier(&trace_die_notifier);
9485
9486         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9487
9488         INIT_LIST_HEAD(&global_trace.systems);
9489         INIT_LIST_HEAD(&global_trace.events);
9490         INIT_LIST_HEAD(&global_trace.hist_vars);
9491         INIT_LIST_HEAD(&global_trace.err_log);
9492         list_add(&global_trace.list, &ftrace_trace_arrays);
9493
9494         apply_trace_boot_options();
9495
9496         register_snapshot_cmd();
9497
9498         return 0;
9499
9500 out_free_savedcmd:
9501         free_saved_cmdlines_buffer(savedcmd);
9502 out_free_temp_buffer:
9503         ring_buffer_free(temp_buffer);
9504 out_rm_hp_state:
9505         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9506 out_free_cpumask:
9507         free_cpumask_var(global_trace.tracing_cpumask);
9508 out_free_buffer_mask:
9509         free_cpumask_var(tracing_buffer_mask);
9510 out:
9511         return ret;
9512 }
9513
9514 void __init early_trace_init(void)
9515 {
9516         if (tracepoint_printk) {
9517                 tracepoint_print_iter =
9518                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9519                 if (MEM_FAIL(!tracepoint_print_iter,
9520                              "Failed to allocate trace iterator\n"))
9521                         tracepoint_printk = 0;
9522                 else
9523                         static_key_enable(&tracepoint_printk_key.key);
9524         }
9525         tracer_alloc_buffers();
9526 }
9527
9528 void __init trace_init(void)
9529 {
9530         trace_event_init();
9531 }
9532
9533 __init static int clear_boot_tracer(void)
9534 {
9535         /*
9536          * The default tracer at boot buffer is an init section.
9537          * This function is called in lateinit. If we did not
9538          * find the boot tracer, then clear it out, to prevent
9539          * later registration from accessing the buffer that is
9540          * about to be freed.
9541          */
9542         if (!default_bootup_tracer)
9543                 return 0;
9544
9545         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9546                default_bootup_tracer);
9547         default_bootup_tracer = NULL;
9548
9549         return 0;
9550 }
9551
9552 fs_initcall(tracer_init_tracefs);
9553 late_initcall_sync(clear_boot_tracer);
9554
9555 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9556 __init static int tracing_set_default_clock(void)
9557 {
9558         /* sched_clock_stable() is determined in late_initcall */
9559         if (!trace_boot_clock && !sched_clock_stable()) {
9560                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9561                         pr_warn("Can not set tracing clock due to lockdown\n");
9562                         return -EPERM;
9563                 }
9564
9565                 printk(KERN_WARNING
9566                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9567                        "If you want to keep using the local clock, then add:\n"
9568                        "  \"trace_clock=local\"\n"
9569                        "on the kernel command line\n");
9570                 tracing_set_clock(&global_trace, "global");
9571         }
9572
9573         return 0;
9574 }
9575 late_initcall_sync(tracing_set_default_clock);
9576 #endif