Merge tag 'dma-mapping-5.9' of git://git.infradead.org/users/hch/dma-mapping
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning) {
1303                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304                         "Disabling tracing due to warning\n");
1305                 tracing_off();
1306         }
1307 }
1308
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317         if (tr->array_buffer.buffer)
1318                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319         return !tr->buffer_disabled;
1320 }
1321
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327         return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330
1331 static int __init set_buf_size(char *str)
1332 {
1333         unsigned long buf_size;
1334
1335         if (!str)
1336                 return 0;
1337         buf_size = memparse(str, &str);
1338         /* nr_entries can not be zero */
1339         if (buf_size == 0)
1340                 return 0;
1341         trace_buf_size = buf_size;
1342         return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348         unsigned long threshold;
1349         int ret;
1350
1351         if (!str)
1352                 return 0;
1353         ret = kstrtoul(str, 0, &threshold);
1354         if (ret < 0)
1355                 return 0;
1356         tracing_thresh = threshold * 1000;
1357         return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363         return nsecs / 1000;
1364 }
1365
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377         TRACE_FLAGS
1378         NULL
1379 };
1380
1381 static struct {
1382         u64 (*func)(void);
1383         const char *name;
1384         int in_ns;              /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386         { trace_clock_local,            "local",        1 },
1387         { trace_clock_global,           "global",       1 },
1388         { trace_clock_counter,          "counter",      0 },
1389         { trace_clock_jiffies,          "uptime",       0 },
1390         { trace_clock,                  "perf",         1 },
1391         { ktime_get_mono_fast_ns,       "mono",         1 },
1392         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1393         { ktime_get_boot_fast_ns,       "boot",         1 },
1394         ARCH_TRACE_CLOCKS
1395 };
1396
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399         if (trace_clocks[tr->clock_id].in_ns)
1400                 return true;
1401
1402         return false;
1403 }
1404
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410         memset(parser, 0, sizeof(*parser));
1411
1412         parser->buffer = kmalloc(size, GFP_KERNEL);
1413         if (!parser->buffer)
1414                 return 1;
1415
1416         parser->size = size;
1417         return 0;
1418 }
1419
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425         kfree(parser->buffer);
1426         parser->buffer = NULL;
1427 }
1428
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441         size_t cnt, loff_t *ppos)
1442 {
1443         char ch;
1444         size_t read = 0;
1445         ssize_t ret;
1446
1447         if (!*ppos)
1448                 trace_parser_clear(parser);
1449
1450         ret = get_user(ch, ubuf++);
1451         if (ret)
1452                 goto out;
1453
1454         read++;
1455         cnt--;
1456
1457         /*
1458          * The parser is not finished with the last write,
1459          * continue reading the user input without skipping spaces.
1460          */
1461         if (!parser->cont) {
1462                 /* skip white space */
1463                 while (cnt && isspace(ch)) {
1464                         ret = get_user(ch, ubuf++);
1465                         if (ret)
1466                                 goto out;
1467                         read++;
1468                         cnt--;
1469                 }
1470
1471                 parser->idx = 0;
1472
1473                 /* only spaces were written */
1474                 if (isspace(ch) || !ch) {
1475                         *ppos += read;
1476                         ret = read;
1477                         goto out;
1478                 }
1479         }
1480
1481         /* read the non-space input */
1482         while (cnt && !isspace(ch) && ch) {
1483                 if (parser->idx < parser->size - 1)
1484                         parser->buffer[parser->idx++] = ch;
1485                 else {
1486                         ret = -EINVAL;
1487                         goto out;
1488                 }
1489                 ret = get_user(ch, ubuf++);
1490                 if (ret)
1491                         goto out;
1492                 read++;
1493                 cnt--;
1494         }
1495
1496         /* We either got finished input or we have to wait for another call. */
1497         if (isspace(ch) || !ch) {
1498                 parser->buffer[parser->idx] = 0;
1499                 parser->cont = false;
1500         } else if (parser->idx < parser->size - 1) {
1501                 parser->cont = true;
1502                 parser->buffer[parser->idx++] = ch;
1503                 /* Make sure the parsed string always terminates with '\0'. */
1504                 parser->buffer[parser->idx] = 0;
1505         } else {
1506                 ret = -EINVAL;
1507                 goto out;
1508         }
1509
1510         *ppos += read;
1511         ret = read;
1512
1513 out:
1514         return ret;
1515 }
1516
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520         int len;
1521
1522         if (trace_seq_used(s) <= s->seq.readpos)
1523                 return -EBUSY;
1524
1525         len = trace_seq_used(s) - s->seq.readpos;
1526         if (cnt > len)
1527                 cnt = len;
1528         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529
1530         s->seq.readpos += cnt;
1531         return cnt;
1532 }
1533
1534 unsigned long __read_mostly     tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538         defined(CONFIG_FSNOTIFY)
1539
1540 static struct workqueue_struct *fsnotify_wq;
1541
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544         struct trace_array *tr = container_of(work, struct trace_array,
1545                                               fsnotify_work);
1546         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552         struct trace_array *tr = container_of(iwork, struct trace_array,
1553                                               fsnotify_irqwork);
1554         queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558                                      struct dentry *d_tracer)
1559 {
1560         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563                                               d_tracer, &tr->max_latency,
1564                                               &tracing_max_lat_fops);
1565 }
1566
1567 __init static int latency_fsnotify_init(void)
1568 {
1569         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1571         if (!fsnotify_wq) {
1572                 pr_err("Unable to allocate tr_max_lat_wq\n");
1573                 return -ENOMEM;
1574         }
1575         return 0;
1576 }
1577
1578 late_initcall_sync(latency_fsnotify_init);
1579
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582         if (!fsnotify_wq)
1583                 return;
1584         /*
1585          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586          * possible that we are called from __schedule() or do_idle(), which
1587          * could cause a deadlock.
1588          */
1589         irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597
1598 #define trace_create_maxlat_file(tr, d_tracer)                          \
1599         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1600                           &tr->max_latency, &tracing_max_lat_fops)
1601
1602 #endif
1603
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613         struct array_buffer *trace_buf = &tr->array_buffer;
1614         struct array_buffer *max_buf = &tr->max_buffer;
1615         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617
1618         max_buf->cpu = cpu;
1619         max_buf->time_start = data->preempt_timestamp;
1620
1621         max_data->saved_latency = tr->max_latency;
1622         max_data->critical_start = data->critical_start;
1623         max_data->critical_end = data->critical_end;
1624
1625         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626         max_data->pid = tsk->pid;
1627         /*
1628          * If tsk == current, then use current_uid(), as that does not use
1629          * RCU. The irq tracer can be called out of RCU scope.
1630          */
1631         if (tsk == current)
1632                 max_data->uid = current_uid();
1633         else
1634                 max_data->uid = task_uid(tsk);
1635
1636         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637         max_data->policy = tsk->policy;
1638         max_data->rt_priority = tsk->rt_priority;
1639
1640         /* record this tasks comm */
1641         tracing_record_cmdline(tsk);
1642         latency_fsnotify(tr);
1643 }
1644
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657               void *cond_data)
1658 {
1659         if (tr->stop_count)
1660                 return;
1661
1662         WARN_ON_ONCE(!irqs_disabled());
1663
1664         if (!tr->allocated_snapshot) {
1665                 /* Only the nop tracer should hit this when disabling */
1666                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667                 return;
1668         }
1669
1670         arch_spin_lock(&tr->max_lock);
1671
1672         /* Inherit the recordable setting from array_buffer */
1673         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674                 ring_buffer_record_on(tr->max_buffer.buffer);
1675         else
1676                 ring_buffer_record_off(tr->max_buffer.buffer);
1677
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680                 goto out_unlock;
1681 #endif
1682         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683
1684         __update_max_tr(tr, tsk, cpu);
1685
1686  out_unlock:
1687         arch_spin_unlock(&tr->max_lock);
1688 }
1689
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701         int ret;
1702
1703         if (tr->stop_count)
1704                 return;
1705
1706         WARN_ON_ONCE(!irqs_disabled());
1707         if (!tr->allocated_snapshot) {
1708                 /* Only the nop tracer should hit this when disabling */
1709                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710                 return;
1711         }
1712
1713         arch_spin_lock(&tr->max_lock);
1714
1715         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716
1717         if (ret == -EBUSY) {
1718                 /*
1719                  * We failed to swap the buffer due to a commit taking
1720                  * place on this CPU. We fail to record, but we reset
1721                  * the max trace buffer (no one writes directly to it)
1722                  * and flag that it failed.
1723                  */
1724                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725                         "Failed to swap buffers due to commit in progress\n");
1726         }
1727
1728         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729
1730         __update_max_tr(tr, tsk, cpu);
1731         arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737         /* Iterators are static, they should be filled or empty */
1738         if (trace_buffer_iter(iter, iter->cpu_file))
1739                 return 0;
1740
1741         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742                                 full);
1743 }
1744
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747
1748 struct trace_selftests {
1749         struct list_head                list;
1750         struct tracer                   *type;
1751 };
1752
1753 static LIST_HEAD(postponed_selftests);
1754
1755 static int save_selftest(struct tracer *type)
1756 {
1757         struct trace_selftests *selftest;
1758
1759         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760         if (!selftest)
1761                 return -ENOMEM;
1762
1763         selftest->type = type;
1764         list_add(&selftest->list, &postponed_selftests);
1765         return 0;
1766 }
1767
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770         struct trace_array *tr = &global_trace;
1771         struct tracer *saved_tracer = tr->current_trace;
1772         int ret;
1773
1774         if (!type->selftest || tracing_selftest_disabled)
1775                 return 0;
1776
1777         /*
1778          * If a tracer registers early in boot up (before scheduling is
1779          * initialized and such), then do not run its selftests yet.
1780          * Instead, run it a little later in the boot process.
1781          */
1782         if (!selftests_can_run)
1783                 return save_selftest(type);
1784
1785         /*
1786          * Run a selftest on this tracer.
1787          * Here we reset the trace buffer, and set the current
1788          * tracer to be this tracer. The tracer can then run some
1789          * internal tracing to verify that everything is in order.
1790          * If we fail, we do not register this tracer.
1791          */
1792         tracing_reset_online_cpus(&tr->array_buffer);
1793
1794         tr->current_trace = type;
1795
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797         if (type->use_max_tr) {
1798                 /* If we expanded the buffers, make sure the max is expanded too */
1799                 if (ring_buffer_expanded)
1800                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801                                            RING_BUFFER_ALL_CPUS);
1802                 tr->allocated_snapshot = true;
1803         }
1804 #endif
1805
1806         /* the test is responsible for initializing and enabling */
1807         pr_info("Testing tracer %s: ", type->name);
1808         ret = type->selftest(type, tr);
1809         /* the test is responsible for resetting too */
1810         tr->current_trace = saved_tracer;
1811         if (ret) {
1812                 printk(KERN_CONT "FAILED!\n");
1813                 /* Add the warning after printing 'FAILED' */
1814                 WARN_ON(1);
1815                 return -1;
1816         }
1817         /* Only reset on passing, to avoid touching corrupted buffers */
1818         tracing_reset_online_cpus(&tr->array_buffer);
1819
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821         if (type->use_max_tr) {
1822                 tr->allocated_snapshot = false;
1823
1824                 /* Shrink the max buffer again */
1825                 if (ring_buffer_expanded)
1826                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1827                                            RING_BUFFER_ALL_CPUS);
1828         }
1829 #endif
1830
1831         printk(KERN_CONT "PASSED\n");
1832         return 0;
1833 }
1834
1835 static __init int init_trace_selftests(void)
1836 {
1837         struct trace_selftests *p, *n;
1838         struct tracer *t, **last;
1839         int ret;
1840
1841         selftests_can_run = true;
1842
1843         mutex_lock(&trace_types_lock);
1844
1845         if (list_empty(&postponed_selftests))
1846                 goto out;
1847
1848         pr_info("Running postponed tracer tests:\n");
1849
1850         tracing_selftest_running = true;
1851         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852                 /* This loop can take minutes when sanitizers are enabled, so
1853                  * lets make sure we allow RCU processing.
1854                  */
1855                 cond_resched();
1856                 ret = run_tracer_selftest(p->type);
1857                 /* If the test fails, then warn and remove from available_tracers */
1858                 if (ret < 0) {
1859                         WARN(1, "tracer: %s failed selftest, disabling\n",
1860                              p->type->name);
1861                         last = &trace_types;
1862                         for (t = trace_types; t; t = t->next) {
1863                                 if (t == p->type) {
1864                                         *last = t->next;
1865                                         break;
1866                                 }
1867                                 last = &t->next;
1868                         }
1869                 }
1870                 list_del(&p->list);
1871                 kfree(p);
1872         }
1873         tracing_selftest_running = false;
1874
1875  out:
1876         mutex_unlock(&trace_types_lock);
1877
1878         return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884         return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889
1890 static void __init apply_trace_boot_options(void);
1891
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900         struct tracer *t;
1901         int ret = 0;
1902
1903         if (!type->name) {
1904                 pr_info("Tracer must have a name\n");
1905                 return -1;
1906         }
1907
1908         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910                 return -1;
1911         }
1912
1913         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914                 pr_warn("Can not register tracer %s due to lockdown\n",
1915                            type->name);
1916                 return -EPERM;
1917         }
1918
1919         mutex_lock(&trace_types_lock);
1920
1921         tracing_selftest_running = true;
1922
1923         for (t = trace_types; t; t = t->next) {
1924                 if (strcmp(type->name, t->name) == 0) {
1925                         /* already found */
1926                         pr_info("Tracer %s already registered\n",
1927                                 type->name);
1928                         ret = -1;
1929                         goto out;
1930                 }
1931         }
1932
1933         if (!type->set_flag)
1934                 type->set_flag = &dummy_set_flag;
1935         if (!type->flags) {
1936                 /*allocate a dummy tracer_flags*/
1937                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938                 if (!type->flags) {
1939                         ret = -ENOMEM;
1940                         goto out;
1941                 }
1942                 type->flags->val = 0;
1943                 type->flags->opts = dummy_tracer_opt;
1944         } else
1945                 if (!type->flags->opts)
1946                         type->flags->opts = dummy_tracer_opt;
1947
1948         /* store the tracer for __set_tracer_option */
1949         type->flags->trace = type;
1950
1951         ret = run_tracer_selftest(type);
1952         if (ret < 0)
1953                 goto out;
1954
1955         type->next = trace_types;
1956         trace_types = type;
1957         add_tracer_options(&global_trace, type);
1958
1959  out:
1960         tracing_selftest_running = false;
1961         mutex_unlock(&trace_types_lock);
1962
1963         if (ret || !default_bootup_tracer)
1964                 goto out_unlock;
1965
1966         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967                 goto out_unlock;
1968
1969         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970         /* Do we want this tracer to start on bootup? */
1971         tracing_set_tracer(&global_trace, type->name);
1972         default_bootup_tracer = NULL;
1973
1974         apply_trace_boot_options();
1975
1976         /* disable other selftests, since this will break it. */
1977         tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980                type->name);
1981 #endif
1982
1983  out_unlock:
1984         return ret;
1985 }
1986
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989         struct trace_buffer *buffer = buf->buffer;
1990
1991         if (!buffer)
1992                 return;
1993
1994         ring_buffer_record_disable(buffer);
1995
1996         /* Make sure all commits have finished */
1997         synchronize_rcu();
1998         ring_buffer_reset_cpu(buffer, cpu);
1999
2000         ring_buffer_record_enable(buffer);
2001 }
2002
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005         struct trace_buffer *buffer = buf->buffer;
2006         int cpu;
2007
2008         if (!buffer)
2009                 return;
2010
2011         ring_buffer_record_disable(buffer);
2012
2013         /* Make sure all commits have finished */
2014         synchronize_rcu();
2015
2016         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2017
2018         for_each_online_cpu(cpu)
2019                 ring_buffer_reset_cpu(buffer, cpu);
2020
2021         ring_buffer_record_enable(buffer);
2022 }
2023
2024 /* Must have trace_types_lock held */
2025 void tracing_reset_all_online_cpus(void)
2026 {
2027         struct trace_array *tr;
2028
2029         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2030                 if (!tr->clear_trace)
2031                         continue;
2032                 tr->clear_trace = false;
2033                 tracing_reset_online_cpus(&tr->array_buffer);
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035                 tracing_reset_online_cpus(&tr->max_buffer);
2036 #endif
2037         }
2038 }
2039
2040 static int *tgid_map;
2041
2042 #define SAVED_CMDLINES_DEFAULT 128
2043 #define NO_CMDLINE_MAP UINT_MAX
2044 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2045 struct saved_cmdlines_buffer {
2046         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2047         unsigned *map_cmdline_to_pid;
2048         unsigned cmdline_num;
2049         int cmdline_idx;
2050         char *saved_cmdlines;
2051 };
2052 static struct saved_cmdlines_buffer *savedcmd;
2053
2054 /* temporary disable recording */
2055 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2056
2057 static inline char *get_saved_cmdlines(int idx)
2058 {
2059         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2060 }
2061
2062 static inline void set_cmdline(int idx, const char *cmdline)
2063 {
2064         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2065 }
2066
2067 static int allocate_cmdlines_buffer(unsigned int val,
2068                                     struct saved_cmdlines_buffer *s)
2069 {
2070         s->map_cmdline_to_pid = kmalloc_array(val,
2071                                               sizeof(*s->map_cmdline_to_pid),
2072                                               GFP_KERNEL);
2073         if (!s->map_cmdline_to_pid)
2074                 return -ENOMEM;
2075
2076         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2077         if (!s->saved_cmdlines) {
2078                 kfree(s->map_cmdline_to_pid);
2079                 return -ENOMEM;
2080         }
2081
2082         s->cmdline_idx = 0;
2083         s->cmdline_num = val;
2084         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2085                sizeof(s->map_pid_to_cmdline));
2086         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2087                val * sizeof(*s->map_cmdline_to_pid));
2088
2089         return 0;
2090 }
2091
2092 static int trace_create_savedcmd(void)
2093 {
2094         int ret;
2095
2096         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097         if (!savedcmd)
2098                 return -ENOMEM;
2099
2100         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2101         if (ret < 0) {
2102                 kfree(savedcmd);
2103                 savedcmd = NULL;
2104                 return -ENOMEM;
2105         }
2106
2107         return 0;
2108 }
2109
2110 int is_tracing_stopped(void)
2111 {
2112         return global_trace.stop_count;
2113 }
2114
2115 /**
2116  * tracing_start - quick start of the tracer
2117  *
2118  * If tracing is enabled but was stopped by tracing_stop,
2119  * this will start the tracer back up.
2120  */
2121 void tracing_start(void)
2122 {
2123         struct trace_buffer *buffer;
2124         unsigned long flags;
2125
2126         if (tracing_disabled)
2127                 return;
2128
2129         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2130         if (--global_trace.stop_count) {
2131                 if (global_trace.stop_count < 0) {
2132                         /* Someone screwed up their debugging */
2133                         WARN_ON_ONCE(1);
2134                         global_trace.stop_count = 0;
2135                 }
2136                 goto out;
2137         }
2138
2139         /* Prevent the buffers from switching */
2140         arch_spin_lock(&global_trace.max_lock);
2141
2142         buffer = global_trace.array_buffer.buffer;
2143         if (buffer)
2144                 ring_buffer_record_enable(buffer);
2145
2146 #ifdef CONFIG_TRACER_MAX_TRACE
2147         buffer = global_trace.max_buffer.buffer;
2148         if (buffer)
2149                 ring_buffer_record_enable(buffer);
2150 #endif
2151
2152         arch_spin_unlock(&global_trace.max_lock);
2153
2154  out:
2155         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2156 }
2157
2158 static void tracing_start_tr(struct trace_array *tr)
2159 {
2160         struct trace_buffer *buffer;
2161         unsigned long flags;
2162
2163         if (tracing_disabled)
2164                 return;
2165
2166         /* If global, we need to also start the max tracer */
2167         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2168                 return tracing_start();
2169
2170         raw_spin_lock_irqsave(&tr->start_lock, flags);
2171
2172         if (--tr->stop_count) {
2173                 if (tr->stop_count < 0) {
2174                         /* Someone screwed up their debugging */
2175                         WARN_ON_ONCE(1);
2176                         tr->stop_count = 0;
2177                 }
2178                 goto out;
2179         }
2180
2181         buffer = tr->array_buffer.buffer;
2182         if (buffer)
2183                 ring_buffer_record_enable(buffer);
2184
2185  out:
2186         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 }
2188
2189 /**
2190  * tracing_stop - quick stop of the tracer
2191  *
2192  * Light weight way to stop tracing. Use in conjunction with
2193  * tracing_start.
2194  */
2195 void tracing_stop(void)
2196 {
2197         struct trace_buffer *buffer;
2198         unsigned long flags;
2199
2200         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2201         if (global_trace.stop_count++)
2202                 goto out;
2203
2204         /* Prevent the buffers from switching */
2205         arch_spin_lock(&global_trace.max_lock);
2206
2207         buffer = global_trace.array_buffer.buffer;
2208         if (buffer)
2209                 ring_buffer_record_disable(buffer);
2210
2211 #ifdef CONFIG_TRACER_MAX_TRACE
2212         buffer = global_trace.max_buffer.buffer;
2213         if (buffer)
2214                 ring_buffer_record_disable(buffer);
2215 #endif
2216
2217         arch_spin_unlock(&global_trace.max_lock);
2218
2219  out:
2220         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2221 }
2222
2223 static void tracing_stop_tr(struct trace_array *tr)
2224 {
2225         struct trace_buffer *buffer;
2226         unsigned long flags;
2227
2228         /* If global, we need to also stop the max tracer */
2229         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2230                 return tracing_stop();
2231
2232         raw_spin_lock_irqsave(&tr->start_lock, flags);
2233         if (tr->stop_count++)
2234                 goto out;
2235
2236         buffer = tr->array_buffer.buffer;
2237         if (buffer)
2238                 ring_buffer_record_disable(buffer);
2239
2240  out:
2241         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2242 }
2243
2244 static int trace_save_cmdline(struct task_struct *tsk)
2245 {
2246         unsigned pid, idx;
2247
2248         /* treat recording of idle task as a success */
2249         if (!tsk->pid)
2250                 return 1;
2251
2252         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253                 return 0;
2254
2255         /*
2256          * It's not the end of the world if we don't get
2257          * the lock, but we also don't want to spin
2258          * nor do we want to disable interrupts,
2259          * so if we miss here, then better luck next time.
2260          */
2261         if (!arch_spin_trylock(&trace_cmdline_lock))
2262                 return 0;
2263
2264         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2265         if (idx == NO_CMDLINE_MAP) {
2266                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2267
2268                 /*
2269                  * Check whether the cmdline buffer at idx has a pid
2270                  * mapped. We are going to overwrite that entry so we
2271                  * need to clear the map_pid_to_cmdline. Otherwise we
2272                  * would read the new comm for the old pid.
2273                  */
2274                 pid = savedcmd->map_cmdline_to_pid[idx];
2275                 if (pid != NO_CMDLINE_MAP)
2276                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2277
2278                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2279                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2280
2281                 savedcmd->cmdline_idx = idx;
2282         }
2283
2284         set_cmdline(idx, tsk->comm);
2285
2286         arch_spin_unlock(&trace_cmdline_lock);
2287
2288         return 1;
2289 }
2290
2291 static void __trace_find_cmdline(int pid, char comm[])
2292 {
2293         unsigned map;
2294
2295         if (!pid) {
2296                 strcpy(comm, "<idle>");
2297                 return;
2298         }
2299
2300         if (WARN_ON_ONCE(pid < 0)) {
2301                 strcpy(comm, "<XXX>");
2302                 return;
2303         }
2304
2305         if (pid > PID_MAX_DEFAULT) {
2306                 strcpy(comm, "<...>");
2307                 return;
2308         }
2309
2310         map = savedcmd->map_pid_to_cmdline[pid];
2311         if (map != NO_CMDLINE_MAP)
2312                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2313         else
2314                 strcpy(comm, "<...>");
2315 }
2316
2317 void trace_find_cmdline(int pid, char comm[])
2318 {
2319         preempt_disable();
2320         arch_spin_lock(&trace_cmdline_lock);
2321
2322         __trace_find_cmdline(pid, comm);
2323
2324         arch_spin_unlock(&trace_cmdline_lock);
2325         preempt_enable();
2326 }
2327
2328 int trace_find_tgid(int pid)
2329 {
2330         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2331                 return 0;
2332
2333         return tgid_map[pid];
2334 }
2335
2336 static int trace_save_tgid(struct task_struct *tsk)
2337 {
2338         /* treat recording of idle task as a success */
2339         if (!tsk->pid)
2340                 return 1;
2341
2342         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2343                 return 0;
2344
2345         tgid_map[tsk->pid] = tsk->tgid;
2346         return 1;
2347 }
2348
2349 static bool tracing_record_taskinfo_skip(int flags)
2350 {
2351         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2352                 return true;
2353         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2354                 return true;
2355         if (!__this_cpu_read(trace_taskinfo_save))
2356                 return true;
2357         return false;
2358 }
2359
2360 /**
2361  * tracing_record_taskinfo - record the task info of a task
2362  *
2363  * @task:  task to record
2364  * @flags: TRACE_RECORD_CMDLINE for recording comm
2365  *         TRACE_RECORD_TGID for recording tgid
2366  */
2367 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 {
2369         bool done;
2370
2371         if (tracing_record_taskinfo_skip(flags))
2372                 return;
2373
2374         /*
2375          * Record as much task information as possible. If some fail, continue
2376          * to try to record the others.
2377          */
2378         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2379         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2380
2381         /* If recording any information failed, retry again soon. */
2382         if (!done)
2383                 return;
2384
2385         __this_cpu_write(trace_taskinfo_save, false);
2386 }
2387
2388 /**
2389  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2390  *
2391  * @prev: previous task during sched_switch
2392  * @next: next task during sched_switch
2393  * @flags: TRACE_RECORD_CMDLINE for recording comm
2394  *         TRACE_RECORD_TGID for recording tgid
2395  */
2396 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2397                                           struct task_struct *next, int flags)
2398 {
2399         bool done;
2400
2401         if (tracing_record_taskinfo_skip(flags))
2402                 return;
2403
2404         /*
2405          * Record as much task information as possible. If some fail, continue
2406          * to try to record the others.
2407          */
2408         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2409         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2410         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2411         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2412
2413         /* If recording any information failed, retry again soon. */
2414         if (!done)
2415                 return;
2416
2417         __this_cpu_write(trace_taskinfo_save, false);
2418 }
2419
2420 /* Helpers to record a specific task information */
2421 void tracing_record_cmdline(struct task_struct *task)
2422 {
2423         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2424 }
2425
2426 void tracing_record_tgid(struct task_struct *task)
2427 {
2428         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 }
2430
2431 /*
2432  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2433  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2434  * simplifies those functions and keeps them in sync.
2435  */
2436 enum print_line_t trace_handle_return(struct trace_seq *s)
2437 {
2438         return trace_seq_has_overflowed(s) ?
2439                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2440 }
2441 EXPORT_SYMBOL_GPL(trace_handle_return);
2442
2443 void
2444 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2445                              unsigned long flags, int pc)
2446 {
2447         struct task_struct *tsk = current;
2448
2449         entry->preempt_count            = pc & 0xff;
2450         entry->pid                      = (tsk) ? tsk->pid : 0;
2451         entry->type                     = type;
2452         entry->flags =
2453 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2454                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2455 #else
2456                 TRACE_FLAG_IRQS_NOSUPPORT |
2457 #endif
2458                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2459                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2460                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2461                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2462                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2463 }
2464 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2465
2466 struct ring_buffer_event *
2467 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2468                           int type,
2469                           unsigned long len,
2470                           unsigned long flags, int pc)
2471 {
2472         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2473 }
2474
2475 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2476 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2477 static int trace_buffered_event_ref;
2478
2479 /**
2480  * trace_buffered_event_enable - enable buffering events
2481  *
2482  * When events are being filtered, it is quicker to use a temporary
2483  * buffer to write the event data into if there's a likely chance
2484  * that it will not be committed. The discard of the ring buffer
2485  * is not as fast as committing, and is much slower than copying
2486  * a commit.
2487  *
2488  * When an event is to be filtered, allocate per cpu buffers to
2489  * write the event data into, and if the event is filtered and discarded
2490  * it is simply dropped, otherwise, the entire data is to be committed
2491  * in one shot.
2492  */
2493 void trace_buffered_event_enable(void)
2494 {
2495         struct ring_buffer_event *event;
2496         struct page *page;
2497         int cpu;
2498
2499         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500
2501         if (trace_buffered_event_ref++)
2502                 return;
2503
2504         for_each_tracing_cpu(cpu) {
2505                 page = alloc_pages_node(cpu_to_node(cpu),
2506                                         GFP_KERNEL | __GFP_NORETRY, 0);
2507                 if (!page)
2508                         goto failed;
2509
2510                 event = page_address(page);
2511                 memset(event, 0, sizeof(*event));
2512
2513                 per_cpu(trace_buffered_event, cpu) = event;
2514
2515                 preempt_disable();
2516                 if (cpu == smp_processor_id() &&
2517                     this_cpu_read(trace_buffered_event) !=
2518                     per_cpu(trace_buffered_event, cpu))
2519                         WARN_ON_ONCE(1);
2520                 preempt_enable();
2521         }
2522
2523         return;
2524  failed:
2525         trace_buffered_event_disable();
2526 }
2527
2528 static void enable_trace_buffered_event(void *data)
2529 {
2530         /* Probably not needed, but do it anyway */
2531         smp_rmb();
2532         this_cpu_dec(trace_buffered_event_cnt);
2533 }
2534
2535 static void disable_trace_buffered_event(void *data)
2536 {
2537         this_cpu_inc(trace_buffered_event_cnt);
2538 }
2539
2540 /**
2541  * trace_buffered_event_disable - disable buffering events
2542  *
2543  * When a filter is removed, it is faster to not use the buffered
2544  * events, and to commit directly into the ring buffer. Free up
2545  * the temp buffers when there are no more users. This requires
2546  * special synchronization with current events.
2547  */
2548 void trace_buffered_event_disable(void)
2549 {
2550         int cpu;
2551
2552         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2553
2554         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2555                 return;
2556
2557         if (--trace_buffered_event_ref)
2558                 return;
2559
2560         preempt_disable();
2561         /* For each CPU, set the buffer as used. */
2562         smp_call_function_many(tracing_buffer_mask,
2563                                disable_trace_buffered_event, NULL, 1);
2564         preempt_enable();
2565
2566         /* Wait for all current users to finish */
2567         synchronize_rcu();
2568
2569         for_each_tracing_cpu(cpu) {
2570                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2571                 per_cpu(trace_buffered_event, cpu) = NULL;
2572         }
2573         /*
2574          * Make sure trace_buffered_event is NULL before clearing
2575          * trace_buffered_event_cnt.
2576          */
2577         smp_wmb();
2578
2579         preempt_disable();
2580         /* Do the work on each cpu */
2581         smp_call_function_many(tracing_buffer_mask,
2582                                enable_trace_buffered_event, NULL, 1);
2583         preempt_enable();
2584 }
2585
2586 static struct trace_buffer *temp_buffer;
2587
2588 struct ring_buffer_event *
2589 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2590                           struct trace_event_file *trace_file,
2591                           int type, unsigned long len,
2592                           unsigned long flags, int pc)
2593 {
2594         struct ring_buffer_event *entry;
2595         int val;
2596
2597         *current_rb = trace_file->tr->array_buffer.buffer;
2598
2599         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2600              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2601             (entry = this_cpu_read(trace_buffered_event))) {
2602                 /* Try to use the per cpu buffer first */
2603                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2604                 if (val == 1) {
2605                         trace_event_setup(entry, type, flags, pc);
2606                         entry->array[0] = len;
2607                         return entry;
2608                 }
2609                 this_cpu_dec(trace_buffered_event_cnt);
2610         }
2611
2612         entry = __trace_buffer_lock_reserve(*current_rb,
2613                                             type, len, flags, pc);
2614         /*
2615          * If tracing is off, but we have triggers enabled
2616          * we still need to look at the event data. Use the temp_buffer
2617          * to store the trace event for the tigger to use. It's recusive
2618          * safe and will not be recorded anywhere.
2619          */
2620         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2621                 *current_rb = temp_buffer;
2622                 entry = __trace_buffer_lock_reserve(*current_rb,
2623                                                     type, len, flags, pc);
2624         }
2625         return entry;
2626 }
2627 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2628
2629 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2630 static DEFINE_MUTEX(tracepoint_printk_mutex);
2631
2632 static void output_printk(struct trace_event_buffer *fbuffer)
2633 {
2634         struct trace_event_call *event_call;
2635         struct trace_event_file *file;
2636         struct trace_event *event;
2637         unsigned long flags;
2638         struct trace_iterator *iter = tracepoint_print_iter;
2639
2640         /* We should never get here if iter is NULL */
2641         if (WARN_ON_ONCE(!iter))
2642                 return;
2643
2644         event_call = fbuffer->trace_file->event_call;
2645         if (!event_call || !event_call->event.funcs ||
2646             !event_call->event.funcs->trace)
2647                 return;
2648
2649         file = fbuffer->trace_file;
2650         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2651             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2652              !filter_match_preds(file->filter, fbuffer->entry)))
2653                 return;
2654
2655         event = &fbuffer->trace_file->event_call->event;
2656
2657         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2658         trace_seq_init(&iter->seq);
2659         iter->ent = fbuffer->entry;
2660         event_call->event.funcs->trace(iter, 0, event);
2661         trace_seq_putc(&iter->seq, 0);
2662         printk("%s", iter->seq.buffer);
2663
2664         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2665 }
2666
2667 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2668                              void *buffer, size_t *lenp,
2669                              loff_t *ppos)
2670 {
2671         int save_tracepoint_printk;
2672         int ret;
2673
2674         mutex_lock(&tracepoint_printk_mutex);
2675         save_tracepoint_printk = tracepoint_printk;
2676
2677         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2678
2679         /*
2680          * This will force exiting early, as tracepoint_printk
2681          * is always zero when tracepoint_printk_iter is not allocated
2682          */
2683         if (!tracepoint_print_iter)
2684                 tracepoint_printk = 0;
2685
2686         if (save_tracepoint_printk == tracepoint_printk)
2687                 goto out;
2688
2689         if (tracepoint_printk)
2690                 static_key_enable(&tracepoint_printk_key.key);
2691         else
2692                 static_key_disable(&tracepoint_printk_key.key);
2693
2694  out:
2695         mutex_unlock(&tracepoint_printk_mutex);
2696
2697         return ret;
2698 }
2699
2700 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2701 {
2702         if (static_key_false(&tracepoint_printk_key.key))
2703                 output_printk(fbuffer);
2704
2705         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2706                                     fbuffer->event, fbuffer->entry,
2707                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2708 }
2709 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2710
2711 /*
2712  * Skip 3:
2713  *
2714  *   trace_buffer_unlock_commit_regs()
2715  *   trace_event_buffer_commit()
2716  *   trace_event_raw_event_xxx()
2717  */
2718 # define STACK_SKIP 3
2719
2720 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2721                                      struct trace_buffer *buffer,
2722                                      struct ring_buffer_event *event,
2723                                      unsigned long flags, int pc,
2724                                      struct pt_regs *regs)
2725 {
2726         __buffer_unlock_commit(buffer, event);
2727
2728         /*
2729          * If regs is not set, then skip the necessary functions.
2730          * Note, we can still get here via blktrace, wakeup tracer
2731          * and mmiotrace, but that's ok if they lose a function or
2732          * two. They are not that meaningful.
2733          */
2734         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2735         ftrace_trace_userstack(buffer, flags, pc);
2736 }
2737
2738 /*
2739  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2740  */
2741 void
2742 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2743                                    struct ring_buffer_event *event)
2744 {
2745         __buffer_unlock_commit(buffer, event);
2746 }
2747
2748 static void
2749 trace_process_export(struct trace_export *export,
2750                struct ring_buffer_event *event)
2751 {
2752         struct trace_entry *entry;
2753         unsigned int size = 0;
2754
2755         entry = ring_buffer_event_data(event);
2756         size = ring_buffer_event_length(event);
2757         export->write(export, entry, size);
2758 }
2759
2760 static DEFINE_MUTEX(ftrace_export_lock);
2761
2762 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2763
2764 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2765
2766 static inline void ftrace_exports_enable(void)
2767 {
2768         static_branch_enable(&ftrace_exports_enabled);
2769 }
2770
2771 static inline void ftrace_exports_disable(void)
2772 {
2773         static_branch_disable(&ftrace_exports_enabled);
2774 }
2775
2776 static void ftrace_exports(struct ring_buffer_event *event)
2777 {
2778         struct trace_export *export;
2779
2780         preempt_disable_notrace();
2781
2782         export = rcu_dereference_raw_check(ftrace_exports_list);
2783         while (export) {
2784                 trace_process_export(export, event);
2785                 export = rcu_dereference_raw_check(export->next);
2786         }
2787
2788         preempt_enable_notrace();
2789 }
2790
2791 static inline void
2792 add_trace_export(struct trace_export **list, struct trace_export *export)
2793 {
2794         rcu_assign_pointer(export->next, *list);
2795         /*
2796          * We are entering export into the list but another
2797          * CPU might be walking that list. We need to make sure
2798          * the export->next pointer is valid before another CPU sees
2799          * the export pointer included into the list.
2800          */
2801         rcu_assign_pointer(*list, export);
2802 }
2803
2804 static inline int
2805 rm_trace_export(struct trace_export **list, struct trace_export *export)
2806 {
2807         struct trace_export **p;
2808
2809         for (p = list; *p != NULL; p = &(*p)->next)
2810                 if (*p == export)
2811                         break;
2812
2813         if (*p != export)
2814                 return -1;
2815
2816         rcu_assign_pointer(*p, (*p)->next);
2817
2818         return 0;
2819 }
2820
2821 static inline void
2822 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2823 {
2824         if (*list == NULL)
2825                 ftrace_exports_enable();
2826
2827         add_trace_export(list, export);
2828 }
2829
2830 static inline int
2831 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 {
2833         int ret;
2834
2835         ret = rm_trace_export(list, export);
2836         if (*list == NULL)
2837                 ftrace_exports_disable();
2838
2839         return ret;
2840 }
2841
2842 int register_ftrace_export(struct trace_export *export)
2843 {
2844         if (WARN_ON_ONCE(!export->write))
2845                 return -1;
2846
2847         mutex_lock(&ftrace_export_lock);
2848
2849         add_ftrace_export(&ftrace_exports_list, export);
2850
2851         mutex_unlock(&ftrace_export_lock);
2852
2853         return 0;
2854 }
2855 EXPORT_SYMBOL_GPL(register_ftrace_export);
2856
2857 int unregister_ftrace_export(struct trace_export *export)
2858 {
2859         int ret;
2860
2861         mutex_lock(&ftrace_export_lock);
2862
2863         ret = rm_ftrace_export(&ftrace_exports_list, export);
2864
2865         mutex_unlock(&ftrace_export_lock);
2866
2867         return ret;
2868 }
2869 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2870
2871 void
2872 trace_function(struct trace_array *tr,
2873                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2874                int pc)
2875 {
2876         struct trace_event_call *call = &event_function;
2877         struct trace_buffer *buffer = tr->array_buffer.buffer;
2878         struct ring_buffer_event *event;
2879         struct ftrace_entry *entry;
2880
2881         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882                                             flags, pc);
2883         if (!event)
2884                 return;
2885         entry   = ring_buffer_event_data(event);
2886         entry->ip                       = ip;
2887         entry->parent_ip                = parent_ip;
2888
2889         if (!call_filter_check_discard(call, entry, buffer, event)) {
2890                 if (static_branch_unlikely(&ftrace_exports_enabled))
2891                         ftrace_exports(event);
2892                 __buffer_unlock_commit(buffer, event);
2893         }
2894 }
2895
2896 #ifdef CONFIG_STACKTRACE
2897
2898 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2899 #define FTRACE_KSTACK_NESTING   4
2900
2901 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2902
2903 struct ftrace_stack {
2904         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2905 };
2906
2907
2908 struct ftrace_stacks {
2909         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2910 };
2911
2912 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2913 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2914
2915 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2916                                  unsigned long flags,
2917                                  int skip, int pc, struct pt_regs *regs)
2918 {
2919         struct trace_event_call *call = &event_kernel_stack;
2920         struct ring_buffer_event *event;
2921         unsigned int size, nr_entries;
2922         struct ftrace_stack *fstack;
2923         struct stack_entry *entry;
2924         int stackidx;
2925
2926         /*
2927          * Add one, for this function and the call to save_stack_trace()
2928          * If regs is set, then these functions will not be in the way.
2929          */
2930 #ifndef CONFIG_UNWINDER_ORC
2931         if (!regs)
2932                 skip++;
2933 #endif
2934
2935         /*
2936          * Since events can happen in NMIs there's no safe way to
2937          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2938          * or NMI comes in, it will just have to use the default
2939          * FTRACE_STACK_SIZE.
2940          */
2941         preempt_disable_notrace();
2942
2943         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944
2945         /* This should never happen. If it does, yell once and skip */
2946         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2947                 goto out;
2948
2949         /*
2950          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951          * interrupt will either see the value pre increment or post
2952          * increment. If the interrupt happens pre increment it will have
2953          * restored the counter when it returns.  We just need a barrier to
2954          * keep gcc from moving things around.
2955          */
2956         barrier();
2957
2958         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959         size = ARRAY_SIZE(fstack->calls);
2960
2961         if (regs) {
2962                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963                                                    size, skip);
2964         } else {
2965                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2966         }
2967
2968         size = nr_entries * sizeof(unsigned long);
2969         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2970                                             sizeof(*entry) + size, flags, pc);
2971         if (!event)
2972                 goto out;
2973         entry = ring_buffer_event_data(event);
2974
2975         memcpy(&entry->caller, fstack->calls, size);
2976         entry->size = nr_entries;
2977
2978         if (!call_filter_check_discard(call, entry, buffer, event))
2979                 __buffer_unlock_commit(buffer, event);
2980
2981  out:
2982         /* Again, don't let gcc optimize things here */
2983         barrier();
2984         __this_cpu_dec(ftrace_stack_reserve);
2985         preempt_enable_notrace();
2986
2987 }
2988
2989 static inline void ftrace_trace_stack(struct trace_array *tr,
2990                                       struct trace_buffer *buffer,
2991                                       unsigned long flags,
2992                                       int skip, int pc, struct pt_regs *regs)
2993 {
2994         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2995                 return;
2996
2997         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2998 }
2999
3000 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3001                    int pc)
3002 {
3003         struct trace_buffer *buffer = tr->array_buffer.buffer;
3004
3005         if (rcu_is_watching()) {
3006                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3007                 return;
3008         }
3009
3010         /*
3011          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3012          * but if the above rcu_is_watching() failed, then the NMI
3013          * triggered someplace critical, and rcu_irq_enter() should
3014          * not be called from NMI.
3015          */
3016         if (unlikely(in_nmi()))
3017                 return;
3018
3019         rcu_irq_enter_irqson();
3020         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3021         rcu_irq_exit_irqson();
3022 }
3023
3024 /**
3025  * trace_dump_stack - record a stack back trace in the trace buffer
3026  * @skip: Number of functions to skip (helper handlers)
3027  */
3028 void trace_dump_stack(int skip)
3029 {
3030         unsigned long flags;
3031
3032         if (tracing_disabled || tracing_selftest_running)
3033                 return;
3034
3035         local_save_flags(flags);
3036
3037 #ifndef CONFIG_UNWINDER_ORC
3038         /* Skip 1 to skip this function. */
3039         skip++;
3040 #endif
3041         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3042                              flags, skip, preempt_count(), NULL);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045
3046 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3047 static DEFINE_PER_CPU(int, user_stack_count);
3048
3049 static void
3050 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3051 {
3052         struct trace_event_call *call = &event_user_stack;
3053         struct ring_buffer_event *event;
3054         struct userstack_entry *entry;
3055
3056         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3057                 return;
3058
3059         /*
3060          * NMIs can not handle page faults, even with fix ups.
3061          * The save user stack can (and often does) fault.
3062          */
3063         if (unlikely(in_nmi()))
3064                 return;
3065
3066         /*
3067          * prevent recursion, since the user stack tracing may
3068          * trigger other kernel events.
3069          */
3070         preempt_disable();
3071         if (__this_cpu_read(user_stack_count))
3072                 goto out;
3073
3074         __this_cpu_inc(user_stack_count);
3075
3076         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077                                             sizeof(*entry), flags, pc);
3078         if (!event)
3079                 goto out_drop_count;
3080         entry   = ring_buffer_event_data(event);
3081
3082         entry->tgid             = current->tgid;
3083         memset(&entry->caller, 0, sizeof(entry->caller));
3084
3085         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086         if (!call_filter_check_discard(call, entry, buffer, event))
3087                 __buffer_unlock_commit(buffer, event);
3088
3089  out_drop_count:
3090         __this_cpu_dec(user_stack_count);
3091  out:
3092         preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3096                                    unsigned long flags, int pc)
3097 {
3098 }
3099 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3100
3101 #endif /* CONFIG_STACKTRACE */
3102
3103 /* created for use with alloc_percpu */
3104 struct trace_buffer_struct {
3105         int nesting;
3106         char buffer[4][TRACE_BUF_SIZE];
3107 };
3108
3109 static struct trace_buffer_struct *trace_percpu_buffer;
3110
3111 /*
3112  * Thise allows for lockless recording.  If we're nested too deeply, then
3113  * this returns NULL.
3114  */
3115 static char *get_trace_buf(void)
3116 {
3117         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3118
3119         if (!buffer || buffer->nesting >= 4)
3120                 return NULL;
3121
3122         buffer->nesting++;
3123
3124         /* Interrupts must see nesting incremented before we use the buffer */
3125         barrier();
3126         return &buffer->buffer[buffer->nesting][0];
3127 }
3128
3129 static void put_trace_buf(void)
3130 {
3131         /* Don't let the decrement of nesting leak before this */
3132         barrier();
3133         this_cpu_dec(trace_percpu_buffer->nesting);
3134 }
3135
3136 static int alloc_percpu_trace_buffer(void)
3137 {
3138         struct trace_buffer_struct *buffers;
3139
3140         buffers = alloc_percpu(struct trace_buffer_struct);
3141         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3142                 return -ENOMEM;
3143
3144         trace_percpu_buffer = buffers;
3145         return 0;
3146 }
3147
3148 static int buffers_allocated;
3149
3150 void trace_printk_init_buffers(void)
3151 {
3152         if (buffers_allocated)
3153                 return;
3154
3155         if (alloc_percpu_trace_buffer())
3156                 return;
3157
3158         /* trace_printk() is for debug use only. Don't use it in production. */
3159
3160         pr_warn("\n");
3161         pr_warn("**********************************************************\n");
3162         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163         pr_warn("**                                                      **\n");
3164         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3165         pr_warn("**                                                      **\n");
3166         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3167         pr_warn("** unsafe for production use.                           **\n");
3168         pr_warn("**                                                      **\n");
3169         pr_warn("** If you see this message and you are not debugging    **\n");
3170         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3171         pr_warn("**                                                      **\n");
3172         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3173         pr_warn("**********************************************************\n");
3174
3175         /* Expand the buffers to set size */
3176         tracing_update_buffers();
3177
3178         buffers_allocated = 1;
3179
3180         /*
3181          * trace_printk_init_buffers() can be called by modules.
3182          * If that happens, then we need to start cmdline recording
3183          * directly here. If the global_trace.buffer is already
3184          * allocated here, then this was called by module code.
3185          */
3186         if (global_trace.array_buffer.buffer)
3187                 tracing_start_cmdline_record();
3188 }
3189 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3190
3191 void trace_printk_start_comm(void)
3192 {
3193         /* Start tracing comms if trace printk is set */
3194         if (!buffers_allocated)
3195                 return;
3196         tracing_start_cmdline_record();
3197 }
3198
3199 static void trace_printk_start_stop_comm(int enabled)
3200 {
3201         if (!buffers_allocated)
3202                 return;
3203
3204         if (enabled)
3205                 tracing_start_cmdline_record();
3206         else
3207                 tracing_stop_cmdline_record();
3208 }
3209
3210 /**
3211  * trace_vbprintk - write binary msg to tracing buffer
3212  * @ip:    The address of the caller
3213  * @fmt:   The string format to write to the buffer
3214  * @args:  Arguments for @fmt
3215  */
3216 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3217 {
3218         struct trace_event_call *call = &event_bprint;
3219         struct ring_buffer_event *event;
3220         struct trace_buffer *buffer;
3221         struct trace_array *tr = &global_trace;
3222         struct bprint_entry *entry;
3223         unsigned long flags;
3224         char *tbuffer;
3225         int len = 0, size, pc;
3226
3227         if (unlikely(tracing_selftest_running || tracing_disabled))
3228                 return 0;
3229
3230         /* Don't pollute graph traces with trace_vprintk internals */
3231         pause_graph_tracing();
3232
3233         pc = preempt_count();
3234         preempt_disable_notrace();
3235
3236         tbuffer = get_trace_buf();
3237         if (!tbuffer) {
3238                 len = 0;
3239                 goto out_nobuffer;
3240         }
3241
3242         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3243
3244         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3245                 goto out_put;
3246
3247         local_save_flags(flags);
3248         size = sizeof(*entry) + sizeof(u32) * len;
3249         buffer = tr->array_buffer.buffer;
3250         ring_buffer_nest_start(buffer);
3251         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3252                                             flags, pc);
3253         if (!event)
3254                 goto out;
3255         entry = ring_buffer_event_data(event);
3256         entry->ip                       = ip;
3257         entry->fmt                      = fmt;
3258
3259         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3260         if (!call_filter_check_discard(call, entry, buffer, event)) {
3261                 __buffer_unlock_commit(buffer, event);
3262                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3263         }
3264
3265 out:
3266         ring_buffer_nest_end(buffer);
3267 out_put:
3268         put_trace_buf();
3269
3270 out_nobuffer:
3271         preempt_enable_notrace();
3272         unpause_graph_tracing();
3273
3274         return len;
3275 }
3276 EXPORT_SYMBOL_GPL(trace_vbprintk);
3277
3278 __printf(3, 0)
3279 static int
3280 __trace_array_vprintk(struct trace_buffer *buffer,
3281                       unsigned long ip, const char *fmt, va_list args)
3282 {
3283         struct trace_event_call *call = &event_print;
3284         struct ring_buffer_event *event;
3285         int len = 0, size, pc;
3286         struct print_entry *entry;
3287         unsigned long flags;
3288         char *tbuffer;
3289
3290         if (tracing_disabled || tracing_selftest_running)
3291                 return 0;
3292
3293         /* Don't pollute graph traces with trace_vprintk internals */
3294         pause_graph_tracing();
3295
3296         pc = preempt_count();
3297         preempt_disable_notrace();
3298
3299
3300         tbuffer = get_trace_buf();
3301         if (!tbuffer) {
3302                 len = 0;
3303                 goto out_nobuffer;
3304         }
3305
3306         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3307
3308         local_save_flags(flags);
3309         size = sizeof(*entry) + len + 1;
3310         ring_buffer_nest_start(buffer);
3311         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3312                                             flags, pc);
3313         if (!event)
3314                 goto out;
3315         entry = ring_buffer_event_data(event);
3316         entry->ip = ip;
3317
3318         memcpy(&entry->buf, tbuffer, len + 1);
3319         if (!call_filter_check_discard(call, entry, buffer, event)) {
3320                 __buffer_unlock_commit(buffer, event);
3321                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3322         }
3323
3324 out:
3325         ring_buffer_nest_end(buffer);
3326         put_trace_buf();
3327
3328 out_nobuffer:
3329         preempt_enable_notrace();
3330         unpause_graph_tracing();
3331
3332         return len;
3333 }
3334
3335 __printf(3, 0)
3336 int trace_array_vprintk(struct trace_array *tr,
3337                         unsigned long ip, const char *fmt, va_list args)
3338 {
3339         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3340 }
3341
3342 __printf(3, 0)
3343 int trace_array_printk(struct trace_array *tr,
3344                        unsigned long ip, const char *fmt, ...)
3345 {
3346         int ret;
3347         va_list ap;
3348
3349         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3350                 return 0;
3351
3352         if (!tr)
3353                 return -ENOENT;
3354
3355         va_start(ap, fmt);
3356         ret = trace_array_vprintk(tr, ip, fmt, ap);
3357         va_end(ap);
3358         return ret;
3359 }
3360 EXPORT_SYMBOL_GPL(trace_array_printk);
3361
3362 __printf(3, 4)
3363 int trace_array_printk_buf(struct trace_buffer *buffer,
3364                            unsigned long ip, const char *fmt, ...)
3365 {
3366         int ret;
3367         va_list ap;
3368
3369         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3370                 return 0;
3371
3372         va_start(ap, fmt);
3373         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3374         va_end(ap);
3375         return ret;
3376 }
3377
3378 __printf(2, 0)
3379 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3380 {
3381         return trace_array_vprintk(&global_trace, ip, fmt, args);
3382 }
3383 EXPORT_SYMBOL_GPL(trace_vprintk);
3384
3385 static void trace_iterator_increment(struct trace_iterator *iter)
3386 {
3387         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3388
3389         iter->idx++;
3390         if (buf_iter)
3391                 ring_buffer_iter_advance(buf_iter);
3392 }
3393
3394 static struct trace_entry *
3395 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3396                 unsigned long *lost_events)
3397 {
3398         struct ring_buffer_event *event;
3399         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3400
3401         if (buf_iter) {
3402                 event = ring_buffer_iter_peek(buf_iter, ts);
3403                 if (lost_events)
3404                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3405                                 (unsigned long)-1 : 0;
3406         } else {
3407                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3408                                          lost_events);
3409         }
3410
3411         if (event) {
3412                 iter->ent_size = ring_buffer_event_length(event);
3413                 return ring_buffer_event_data(event);
3414         }
3415         iter->ent_size = 0;
3416         return NULL;
3417 }
3418
3419 static struct trace_entry *
3420 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3421                   unsigned long *missing_events, u64 *ent_ts)
3422 {
3423         struct trace_buffer *buffer = iter->array_buffer->buffer;
3424         struct trace_entry *ent, *next = NULL;
3425         unsigned long lost_events = 0, next_lost = 0;
3426         int cpu_file = iter->cpu_file;
3427         u64 next_ts = 0, ts;
3428         int next_cpu = -1;
3429         int next_size = 0;
3430         int cpu;
3431
3432         /*
3433          * If we are in a per_cpu trace file, don't bother by iterating over
3434          * all cpu and peek directly.
3435          */
3436         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3437                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3438                         return NULL;
3439                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3440                 if (ent_cpu)
3441                         *ent_cpu = cpu_file;
3442
3443                 return ent;
3444         }
3445
3446         for_each_tracing_cpu(cpu) {
3447
3448                 if (ring_buffer_empty_cpu(buffer, cpu))
3449                         continue;
3450
3451                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3452
3453                 /*
3454                  * Pick the entry with the smallest timestamp:
3455                  */
3456                 if (ent && (!next || ts < next_ts)) {
3457                         next = ent;
3458                         next_cpu = cpu;
3459                         next_ts = ts;
3460                         next_lost = lost_events;
3461                         next_size = iter->ent_size;
3462                 }
3463         }
3464
3465         iter->ent_size = next_size;
3466
3467         if (ent_cpu)
3468                 *ent_cpu = next_cpu;
3469
3470         if (ent_ts)
3471                 *ent_ts = next_ts;
3472
3473         if (missing_events)
3474                 *missing_events = next_lost;
3475
3476         return next;
3477 }
3478
3479 #define STATIC_TEMP_BUF_SIZE    128
3480 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3481
3482 /* Find the next real entry, without updating the iterator itself */
3483 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3484                                           int *ent_cpu, u64 *ent_ts)
3485 {
3486         /* __find_next_entry will reset ent_size */
3487         int ent_size = iter->ent_size;
3488         struct trace_entry *entry;
3489
3490         /*
3491          * If called from ftrace_dump(), then the iter->temp buffer
3492          * will be the static_temp_buf and not created from kmalloc.
3493          * If the entry size is greater than the buffer, we can
3494          * not save it. Just return NULL in that case. This is only
3495          * used to add markers when two consecutive events' time
3496          * stamps have a large delta. See trace_print_lat_context()
3497          */
3498         if (iter->temp == static_temp_buf &&
3499             STATIC_TEMP_BUF_SIZE < ent_size)
3500                 return NULL;
3501
3502         /*
3503          * The __find_next_entry() may call peek_next_entry(), which may
3504          * call ring_buffer_peek() that may make the contents of iter->ent
3505          * undefined. Need to copy iter->ent now.
3506          */
3507         if (iter->ent && iter->ent != iter->temp) {
3508                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3509                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3510                         kfree(iter->temp);
3511                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3512                         if (!iter->temp)
3513                                 return NULL;
3514                 }
3515                 memcpy(iter->temp, iter->ent, iter->ent_size);
3516                 iter->temp_size = iter->ent_size;
3517                 iter->ent = iter->temp;
3518         }
3519         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3520         /* Put back the original ent_size */
3521         iter->ent_size = ent_size;
3522
3523         return entry;
3524 }
3525
3526 /* Find the next real entry, and increment the iterator to the next entry */
3527 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3528 {
3529         iter->ent = __find_next_entry(iter, &iter->cpu,
3530                                       &iter->lost_events, &iter->ts);
3531
3532         if (iter->ent)
3533                 trace_iterator_increment(iter);
3534
3535         return iter->ent ? iter : NULL;
3536 }
3537
3538 static void trace_consume(struct trace_iterator *iter)
3539 {
3540         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3541                             &iter->lost_events);
3542 }
3543
3544 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3545 {
3546         struct trace_iterator *iter = m->private;
3547         int i = (int)*pos;
3548         void *ent;
3549
3550         WARN_ON_ONCE(iter->leftover);
3551
3552         (*pos)++;
3553
3554         /* can't go backwards */
3555         if (iter->idx > i)
3556                 return NULL;
3557
3558         if (iter->idx < 0)
3559                 ent = trace_find_next_entry_inc(iter);
3560         else
3561                 ent = iter;
3562
3563         while (ent && iter->idx < i)
3564                 ent = trace_find_next_entry_inc(iter);
3565
3566         iter->pos = *pos;
3567
3568         return ent;
3569 }
3570
3571 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3572 {
3573         struct ring_buffer_iter *buf_iter;
3574         unsigned long entries = 0;
3575         u64 ts;
3576
3577         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3578
3579         buf_iter = trace_buffer_iter(iter, cpu);
3580         if (!buf_iter)
3581                 return;
3582
3583         ring_buffer_iter_reset(buf_iter);
3584
3585         /*
3586          * We could have the case with the max latency tracers
3587          * that a reset never took place on a cpu. This is evident
3588          * by the timestamp being before the start of the buffer.
3589          */
3590         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3591                 if (ts >= iter->array_buffer->time_start)
3592                         break;
3593                 entries++;
3594                 ring_buffer_iter_advance(buf_iter);
3595         }
3596
3597         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3598 }
3599
3600 /*
3601  * The current tracer is copied to avoid a global locking
3602  * all around.
3603  */
3604 static void *s_start(struct seq_file *m, loff_t *pos)
3605 {
3606         struct trace_iterator *iter = m->private;
3607         struct trace_array *tr = iter->tr;
3608         int cpu_file = iter->cpu_file;
3609         void *p = NULL;
3610         loff_t l = 0;
3611         int cpu;
3612
3613         /*
3614          * copy the tracer to avoid using a global lock all around.
3615          * iter->trace is a copy of current_trace, the pointer to the
3616          * name may be used instead of a strcmp(), as iter->trace->name
3617          * will point to the same string as current_trace->name.
3618          */
3619         mutex_lock(&trace_types_lock);
3620         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3621                 *iter->trace = *tr->current_trace;
3622         mutex_unlock(&trace_types_lock);
3623
3624 #ifdef CONFIG_TRACER_MAX_TRACE
3625         if (iter->snapshot && iter->trace->use_max_tr)
3626                 return ERR_PTR(-EBUSY);
3627 #endif
3628
3629         if (!iter->snapshot)
3630                 atomic_inc(&trace_record_taskinfo_disabled);
3631
3632         if (*pos != iter->pos) {
3633                 iter->ent = NULL;
3634                 iter->cpu = 0;
3635                 iter->idx = -1;
3636
3637                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3638                         for_each_tracing_cpu(cpu)
3639                                 tracing_iter_reset(iter, cpu);
3640                 } else
3641                         tracing_iter_reset(iter, cpu_file);
3642
3643                 iter->leftover = 0;
3644                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3645                         ;
3646
3647         } else {
3648                 /*
3649                  * If we overflowed the seq_file before, then we want
3650                  * to just reuse the trace_seq buffer again.
3651                  */
3652                 if (iter->leftover)
3653                         p = iter;
3654                 else {
3655                         l = *pos - 1;
3656                         p = s_next(m, p, &l);
3657                 }
3658         }
3659
3660         trace_event_read_lock();
3661         trace_access_lock(cpu_file);
3662         return p;
3663 }
3664
3665 static void s_stop(struct seq_file *m, void *p)
3666 {
3667         struct trace_iterator *iter = m->private;
3668
3669 #ifdef CONFIG_TRACER_MAX_TRACE
3670         if (iter->snapshot && iter->trace->use_max_tr)
3671                 return;
3672 #endif
3673
3674         if (!iter->snapshot)
3675                 atomic_dec(&trace_record_taskinfo_disabled);
3676
3677         trace_access_unlock(iter->cpu_file);
3678         trace_event_read_unlock();
3679 }
3680
3681 static void
3682 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3683                       unsigned long *entries, int cpu)
3684 {
3685         unsigned long count;
3686
3687         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3688         /*
3689          * If this buffer has skipped entries, then we hold all
3690          * entries for the trace and we need to ignore the
3691          * ones before the time stamp.
3692          */
3693         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3694                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3695                 /* total is the same as the entries */
3696                 *total = count;
3697         } else
3698                 *total = count +
3699                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3700         *entries = count;
3701 }
3702
3703 static void
3704 get_total_entries(struct array_buffer *buf,
3705                   unsigned long *total, unsigned long *entries)
3706 {
3707         unsigned long t, e;
3708         int cpu;
3709
3710         *total = 0;
3711         *entries = 0;
3712
3713         for_each_tracing_cpu(cpu) {
3714                 get_total_entries_cpu(buf, &t, &e, cpu);
3715                 *total += t;
3716                 *entries += e;
3717         }
3718 }
3719
3720 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3721 {
3722         unsigned long total, entries;
3723
3724         if (!tr)
3725                 tr = &global_trace;
3726
3727         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3728
3729         return entries;
3730 }
3731
3732 unsigned long trace_total_entries(struct trace_array *tr)
3733 {
3734         unsigned long total, entries;
3735
3736         if (!tr)
3737                 tr = &global_trace;
3738
3739         get_total_entries(&tr->array_buffer, &total, &entries);
3740
3741         return entries;
3742 }
3743
3744 static void print_lat_help_header(struct seq_file *m)
3745 {
3746         seq_puts(m, "#                  _------=> CPU#            \n"
3747                     "#                 / _-----=> irqs-off        \n"
3748                     "#                | / _----=> need-resched    \n"
3749                     "#                || / _---=> hardirq/softirq \n"
3750                     "#                ||| / _--=> preempt-depth   \n"
3751                     "#                |||| /     delay            \n"
3752                     "#  cmd     pid   ||||| time  |   caller      \n"
3753                     "#     \\   /      |||||  \\    |   /         \n");
3754 }
3755
3756 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3757 {
3758         unsigned long total;
3759         unsigned long entries;
3760
3761         get_total_entries(buf, &total, &entries);
3762         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3763                    entries, total, num_online_cpus());
3764         seq_puts(m, "#\n");
3765 }
3766
3767 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3768                                    unsigned int flags)
3769 {
3770         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3771
3772         print_event_info(buf, m);
3773
3774         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3775         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3776 }
3777
3778 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3779                                        unsigned int flags)
3780 {
3781         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3782         const char *space = "          ";
3783         int prec = tgid ? 10 : 2;
3784
3785         print_event_info(buf, m);
3786
3787         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3788         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3789         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3790         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3791         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3792         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3793         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3794 }
3795
3796 void
3797 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3798 {
3799         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3800         struct array_buffer *buf = iter->array_buffer;
3801         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3802         struct tracer *type = iter->trace;
3803         unsigned long entries;
3804         unsigned long total;
3805         const char *name = "preemption";
3806
3807         name = type->name;
3808
3809         get_total_entries(buf, &total, &entries);
3810
3811         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3812                    name, UTS_RELEASE);
3813         seq_puts(m, "# -----------------------------------"
3814                  "---------------------------------\n");
3815         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3816                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3817                    nsecs_to_usecs(data->saved_latency),
3818                    entries,
3819                    total,
3820                    buf->cpu,
3821 #if defined(CONFIG_PREEMPT_NONE)
3822                    "server",
3823 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3824                    "desktop",
3825 #elif defined(CONFIG_PREEMPT)
3826                    "preempt",
3827 #elif defined(CONFIG_PREEMPT_RT)
3828                    "preempt_rt",
3829 #else
3830                    "unknown",
3831 #endif
3832                    /* These are reserved for later use */
3833                    0, 0, 0, 0);
3834 #ifdef CONFIG_SMP
3835         seq_printf(m, " #P:%d)\n", num_online_cpus());
3836 #else
3837         seq_puts(m, ")\n");
3838 #endif
3839         seq_puts(m, "#    -----------------\n");
3840         seq_printf(m, "#    | task: %.16s-%d "
3841                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3842                    data->comm, data->pid,
3843                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3844                    data->policy, data->rt_priority);
3845         seq_puts(m, "#    -----------------\n");
3846
3847         if (data->critical_start) {
3848                 seq_puts(m, "#  => started at: ");
3849                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3850                 trace_print_seq(m, &iter->seq);
3851                 seq_puts(m, "\n#  => ended at:   ");
3852                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3853                 trace_print_seq(m, &iter->seq);
3854                 seq_puts(m, "\n#\n");
3855         }
3856
3857         seq_puts(m, "#\n");
3858 }
3859
3860 static void test_cpu_buff_start(struct trace_iterator *iter)
3861 {
3862         struct trace_seq *s = &iter->seq;
3863         struct trace_array *tr = iter->tr;
3864
3865         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3866                 return;
3867
3868         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3869                 return;
3870
3871         if (cpumask_available(iter->started) &&
3872             cpumask_test_cpu(iter->cpu, iter->started))
3873                 return;
3874
3875         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3876                 return;
3877
3878         if (cpumask_available(iter->started))
3879                 cpumask_set_cpu(iter->cpu, iter->started);
3880
3881         /* Don't print started cpu buffer for the first entry of the trace */
3882         if (iter->idx > 1)
3883                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3884                                 iter->cpu);
3885 }
3886
3887 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3888 {
3889         struct trace_array *tr = iter->tr;
3890         struct trace_seq *s = &iter->seq;
3891         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3892         struct trace_entry *entry;
3893         struct trace_event *event;
3894
3895         entry = iter->ent;
3896
3897         test_cpu_buff_start(iter);
3898
3899         event = ftrace_find_event(entry->type);
3900
3901         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3902                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3903                         trace_print_lat_context(iter);
3904                 else
3905                         trace_print_context(iter);
3906         }
3907
3908         if (trace_seq_has_overflowed(s))
3909                 return TRACE_TYPE_PARTIAL_LINE;
3910
3911         if (event)
3912                 return event->funcs->trace(iter, sym_flags, event);
3913
3914         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3915
3916         return trace_handle_return(s);
3917 }
3918
3919 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3920 {
3921         struct trace_array *tr = iter->tr;
3922         struct trace_seq *s = &iter->seq;
3923         struct trace_entry *entry;
3924         struct trace_event *event;
3925
3926         entry = iter->ent;
3927
3928         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3929                 trace_seq_printf(s, "%d %d %llu ",
3930                                  entry->pid, iter->cpu, iter->ts);
3931
3932         if (trace_seq_has_overflowed(s))
3933                 return TRACE_TYPE_PARTIAL_LINE;
3934
3935         event = ftrace_find_event(entry->type);
3936         if (event)
3937                 return event->funcs->raw(iter, 0, event);
3938
3939         trace_seq_printf(s, "%d ?\n", entry->type);
3940
3941         return trace_handle_return(s);
3942 }
3943
3944 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3945 {
3946         struct trace_array *tr = iter->tr;
3947         struct trace_seq *s = &iter->seq;
3948         unsigned char newline = '\n';
3949         struct trace_entry *entry;
3950         struct trace_event *event;
3951
3952         entry = iter->ent;
3953
3954         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3955                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3956                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3957                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3958                 if (trace_seq_has_overflowed(s))
3959                         return TRACE_TYPE_PARTIAL_LINE;
3960         }
3961
3962         event = ftrace_find_event(entry->type);
3963         if (event) {
3964                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3965                 if (ret != TRACE_TYPE_HANDLED)
3966                         return ret;
3967         }
3968
3969         SEQ_PUT_FIELD(s, newline);
3970
3971         return trace_handle_return(s);
3972 }
3973
3974 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3975 {
3976         struct trace_array *tr = iter->tr;
3977         struct trace_seq *s = &iter->seq;
3978         struct trace_entry *entry;
3979         struct trace_event *event;
3980
3981         entry = iter->ent;
3982
3983         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3984                 SEQ_PUT_FIELD(s, entry->pid);
3985                 SEQ_PUT_FIELD(s, iter->cpu);
3986                 SEQ_PUT_FIELD(s, iter->ts);
3987                 if (trace_seq_has_overflowed(s))
3988                         return TRACE_TYPE_PARTIAL_LINE;
3989         }
3990
3991         event = ftrace_find_event(entry->type);
3992         return event ? event->funcs->binary(iter, 0, event) :
3993                 TRACE_TYPE_HANDLED;
3994 }
3995
3996 int trace_empty(struct trace_iterator *iter)
3997 {
3998         struct ring_buffer_iter *buf_iter;
3999         int cpu;
4000
4001         /* If we are looking at one CPU buffer, only check that one */
4002         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4003                 cpu = iter->cpu_file;
4004                 buf_iter = trace_buffer_iter(iter, cpu);
4005                 if (buf_iter) {
4006                         if (!ring_buffer_iter_empty(buf_iter))
4007                                 return 0;
4008                 } else {
4009                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4010                                 return 0;
4011                 }
4012                 return 1;
4013         }
4014
4015         for_each_tracing_cpu(cpu) {
4016                 buf_iter = trace_buffer_iter(iter, cpu);
4017                 if (buf_iter) {
4018                         if (!ring_buffer_iter_empty(buf_iter))
4019                                 return 0;
4020                 } else {
4021                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4022                                 return 0;
4023                 }
4024         }
4025
4026         return 1;
4027 }
4028
4029 /*  Called with trace_event_read_lock() held. */
4030 enum print_line_t print_trace_line(struct trace_iterator *iter)
4031 {
4032         struct trace_array *tr = iter->tr;
4033         unsigned long trace_flags = tr->trace_flags;
4034         enum print_line_t ret;
4035
4036         if (iter->lost_events) {
4037                 if (iter->lost_events == (unsigned long)-1)
4038                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4039                                          iter->cpu);
4040                 else
4041                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4042                                          iter->cpu, iter->lost_events);
4043                 if (trace_seq_has_overflowed(&iter->seq))
4044                         return TRACE_TYPE_PARTIAL_LINE;
4045         }
4046
4047         if (iter->trace && iter->trace->print_line) {
4048                 ret = iter->trace->print_line(iter);
4049                 if (ret != TRACE_TYPE_UNHANDLED)
4050                         return ret;
4051         }
4052
4053         if (iter->ent->type == TRACE_BPUTS &&
4054                         trace_flags & TRACE_ITER_PRINTK &&
4055                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4056                 return trace_print_bputs_msg_only(iter);
4057
4058         if (iter->ent->type == TRACE_BPRINT &&
4059                         trace_flags & TRACE_ITER_PRINTK &&
4060                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4061                 return trace_print_bprintk_msg_only(iter);
4062
4063         if (iter->ent->type == TRACE_PRINT &&
4064                         trace_flags & TRACE_ITER_PRINTK &&
4065                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4066                 return trace_print_printk_msg_only(iter);
4067
4068         if (trace_flags & TRACE_ITER_BIN)
4069                 return print_bin_fmt(iter);
4070
4071         if (trace_flags & TRACE_ITER_HEX)
4072                 return print_hex_fmt(iter);
4073
4074         if (trace_flags & TRACE_ITER_RAW)
4075                 return print_raw_fmt(iter);
4076
4077         return print_trace_fmt(iter);
4078 }
4079
4080 void trace_latency_header(struct seq_file *m)
4081 {
4082         struct trace_iterator *iter = m->private;
4083         struct trace_array *tr = iter->tr;
4084
4085         /* print nothing if the buffers are empty */
4086         if (trace_empty(iter))
4087                 return;
4088
4089         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4090                 print_trace_header(m, iter);
4091
4092         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4093                 print_lat_help_header(m);
4094 }
4095
4096 void trace_default_header(struct seq_file *m)
4097 {
4098         struct trace_iterator *iter = m->private;
4099         struct trace_array *tr = iter->tr;
4100         unsigned long trace_flags = tr->trace_flags;
4101
4102         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4103                 return;
4104
4105         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4106                 /* print nothing if the buffers are empty */
4107                 if (trace_empty(iter))
4108                         return;
4109                 print_trace_header(m, iter);
4110                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4111                         print_lat_help_header(m);
4112         } else {
4113                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4114                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4115                                 print_func_help_header_irq(iter->array_buffer,
4116                                                            m, trace_flags);
4117                         else
4118                                 print_func_help_header(iter->array_buffer, m,
4119                                                        trace_flags);
4120                 }
4121         }
4122 }
4123
4124 static void test_ftrace_alive(struct seq_file *m)
4125 {
4126         if (!ftrace_is_dead())
4127                 return;
4128         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4129                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4130 }
4131
4132 #ifdef CONFIG_TRACER_MAX_TRACE
4133 static void show_snapshot_main_help(struct seq_file *m)
4134 {
4135         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4136                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4137                     "#                      Takes a snapshot of the main buffer.\n"
4138                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4139                     "#                      (Doesn't have to be '2' works with any number that\n"
4140                     "#                       is not a '0' or '1')\n");
4141 }
4142
4143 static void show_snapshot_percpu_help(struct seq_file *m)
4144 {
4145         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4146 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4147         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4148                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4149 #else
4150         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4151                     "#                     Must use main snapshot file to allocate.\n");
4152 #endif
4153         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4154                     "#                      (Doesn't have to be '2' works with any number that\n"
4155                     "#                       is not a '0' or '1')\n");
4156 }
4157
4158 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4159 {
4160         if (iter->tr->allocated_snapshot)
4161                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4162         else
4163                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4164
4165         seq_puts(m, "# Snapshot commands:\n");
4166         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4167                 show_snapshot_main_help(m);
4168         else
4169                 show_snapshot_percpu_help(m);
4170 }
4171 #else
4172 /* Should never be called */
4173 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4174 #endif
4175
4176 static int s_show(struct seq_file *m, void *v)
4177 {
4178         struct trace_iterator *iter = v;
4179         int ret;
4180
4181         if (iter->ent == NULL) {
4182                 if (iter->tr) {
4183                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4184                         seq_puts(m, "#\n");
4185                         test_ftrace_alive(m);
4186                 }
4187                 if (iter->snapshot && trace_empty(iter))
4188                         print_snapshot_help(m, iter);
4189                 else if (iter->trace && iter->trace->print_header)
4190                         iter->trace->print_header(m);
4191                 else
4192                         trace_default_header(m);
4193
4194         } else if (iter->leftover) {
4195                 /*
4196                  * If we filled the seq_file buffer earlier, we
4197                  * want to just show it now.
4198                  */
4199                 ret = trace_print_seq(m, &iter->seq);
4200
4201                 /* ret should this time be zero, but you never know */
4202                 iter->leftover = ret;
4203
4204         } else {
4205                 print_trace_line(iter);
4206                 ret = trace_print_seq(m, &iter->seq);
4207                 /*
4208                  * If we overflow the seq_file buffer, then it will
4209                  * ask us for this data again at start up.
4210                  * Use that instead.
4211                  *  ret is 0 if seq_file write succeeded.
4212                  *        -1 otherwise.
4213                  */
4214                 iter->leftover = ret;
4215         }
4216
4217         return 0;
4218 }
4219
4220 /*
4221  * Should be used after trace_array_get(), trace_types_lock
4222  * ensures that i_cdev was already initialized.
4223  */
4224 static inline int tracing_get_cpu(struct inode *inode)
4225 {
4226         if (inode->i_cdev) /* See trace_create_cpu_file() */
4227                 return (long)inode->i_cdev - 1;
4228         return RING_BUFFER_ALL_CPUS;
4229 }
4230
4231 static const struct seq_operations tracer_seq_ops = {
4232         .start          = s_start,
4233         .next           = s_next,
4234         .stop           = s_stop,
4235         .show           = s_show,
4236 };
4237
4238 static struct trace_iterator *
4239 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4240 {
4241         struct trace_array *tr = inode->i_private;
4242         struct trace_iterator *iter;
4243         int cpu;
4244
4245         if (tracing_disabled)
4246                 return ERR_PTR(-ENODEV);
4247
4248         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4249         if (!iter)
4250                 return ERR_PTR(-ENOMEM);
4251
4252         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4253                                     GFP_KERNEL);
4254         if (!iter->buffer_iter)
4255                 goto release;
4256
4257         /*
4258          * trace_find_next_entry() may need to save off iter->ent.
4259          * It will place it into the iter->temp buffer. As most
4260          * events are less than 128, allocate a buffer of that size.
4261          * If one is greater, then trace_find_next_entry() will
4262          * allocate a new buffer to adjust for the bigger iter->ent.
4263          * It's not critical if it fails to get allocated here.
4264          */
4265         iter->temp = kmalloc(128, GFP_KERNEL);
4266         if (iter->temp)
4267                 iter->temp_size = 128;
4268
4269         /*
4270          * We make a copy of the current tracer to avoid concurrent
4271          * changes on it while we are reading.
4272          */
4273         mutex_lock(&trace_types_lock);
4274         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4275         if (!iter->trace)
4276                 goto fail;
4277
4278         *iter->trace = *tr->current_trace;
4279
4280         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4281                 goto fail;
4282
4283         iter->tr = tr;
4284
4285 #ifdef CONFIG_TRACER_MAX_TRACE
4286         /* Currently only the top directory has a snapshot */
4287         if (tr->current_trace->print_max || snapshot)
4288                 iter->array_buffer = &tr->max_buffer;
4289         else
4290 #endif
4291                 iter->array_buffer = &tr->array_buffer;
4292         iter->snapshot = snapshot;
4293         iter->pos = -1;
4294         iter->cpu_file = tracing_get_cpu(inode);
4295         mutex_init(&iter->mutex);
4296
4297         /* Notify the tracer early; before we stop tracing. */
4298         if (iter->trace->open)
4299                 iter->trace->open(iter);
4300
4301         /* Annotate start of buffers if we had overruns */
4302         if (ring_buffer_overruns(iter->array_buffer->buffer))
4303                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4304
4305         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4306         if (trace_clocks[tr->clock_id].in_ns)
4307                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4308
4309         /*
4310          * If pause-on-trace is enabled, then stop the trace while
4311          * dumping, unless this is the "snapshot" file
4312          */
4313         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4314                 tracing_stop_tr(tr);
4315
4316         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4317                 for_each_tracing_cpu(cpu) {
4318                         iter->buffer_iter[cpu] =
4319                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4320                                                          cpu, GFP_KERNEL);
4321                 }
4322                 ring_buffer_read_prepare_sync();
4323                 for_each_tracing_cpu(cpu) {
4324                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4325                         tracing_iter_reset(iter, cpu);
4326                 }
4327         } else {
4328                 cpu = iter->cpu_file;
4329                 iter->buffer_iter[cpu] =
4330                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4331                                                  cpu, GFP_KERNEL);
4332                 ring_buffer_read_prepare_sync();
4333                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4334                 tracing_iter_reset(iter, cpu);
4335         }
4336
4337         mutex_unlock(&trace_types_lock);
4338
4339         return iter;
4340
4341  fail:
4342         mutex_unlock(&trace_types_lock);
4343         kfree(iter->trace);
4344         kfree(iter->temp);
4345         kfree(iter->buffer_iter);
4346 release:
4347         seq_release_private(inode, file);
4348         return ERR_PTR(-ENOMEM);
4349 }
4350
4351 int tracing_open_generic(struct inode *inode, struct file *filp)
4352 {
4353         int ret;
4354
4355         ret = tracing_check_open_get_tr(NULL);
4356         if (ret)
4357                 return ret;
4358
4359         filp->private_data = inode->i_private;
4360         return 0;
4361 }
4362
4363 bool tracing_is_disabled(void)
4364 {
4365         return (tracing_disabled) ? true: false;
4366 }
4367
4368 /*
4369  * Open and update trace_array ref count.
4370  * Must have the current trace_array passed to it.
4371  */
4372 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4373 {
4374         struct trace_array *tr = inode->i_private;
4375         int ret;
4376
4377         ret = tracing_check_open_get_tr(tr);
4378         if (ret)
4379                 return ret;
4380
4381         filp->private_data = inode->i_private;
4382
4383         return 0;
4384 }
4385
4386 static int tracing_release(struct inode *inode, struct file *file)
4387 {
4388         struct trace_array *tr = inode->i_private;
4389         struct seq_file *m = file->private_data;
4390         struct trace_iterator *iter;
4391         int cpu;
4392
4393         if (!(file->f_mode & FMODE_READ)) {
4394                 trace_array_put(tr);
4395                 return 0;
4396         }
4397
4398         /* Writes do not use seq_file */
4399         iter = m->private;
4400         mutex_lock(&trace_types_lock);
4401
4402         for_each_tracing_cpu(cpu) {
4403                 if (iter->buffer_iter[cpu])
4404                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4405         }
4406
4407         if (iter->trace && iter->trace->close)
4408                 iter->trace->close(iter);
4409
4410         if (!iter->snapshot && tr->stop_count)
4411                 /* reenable tracing if it was previously enabled */
4412                 tracing_start_tr(tr);
4413
4414         __trace_array_put(tr);
4415
4416         mutex_unlock(&trace_types_lock);
4417
4418         mutex_destroy(&iter->mutex);
4419         free_cpumask_var(iter->started);
4420         kfree(iter->temp);
4421         kfree(iter->trace);
4422         kfree(iter->buffer_iter);
4423         seq_release_private(inode, file);
4424
4425         return 0;
4426 }
4427
4428 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4429 {
4430         struct trace_array *tr = inode->i_private;
4431
4432         trace_array_put(tr);
4433         return 0;
4434 }
4435
4436 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4437 {
4438         struct trace_array *tr = inode->i_private;
4439
4440         trace_array_put(tr);
4441
4442         return single_release(inode, file);
4443 }
4444
4445 static int tracing_open(struct inode *inode, struct file *file)
4446 {
4447         struct trace_array *tr = inode->i_private;
4448         struct trace_iterator *iter;
4449         int ret;
4450
4451         ret = tracing_check_open_get_tr(tr);
4452         if (ret)
4453                 return ret;
4454
4455         /* If this file was open for write, then erase contents */
4456         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4457                 int cpu = tracing_get_cpu(inode);
4458                 struct array_buffer *trace_buf = &tr->array_buffer;
4459
4460 #ifdef CONFIG_TRACER_MAX_TRACE
4461                 if (tr->current_trace->print_max)
4462                         trace_buf = &tr->max_buffer;
4463 #endif
4464
4465                 if (cpu == RING_BUFFER_ALL_CPUS)
4466                         tracing_reset_online_cpus(trace_buf);
4467                 else
4468                         tracing_reset_cpu(trace_buf, cpu);
4469         }
4470
4471         if (file->f_mode & FMODE_READ) {
4472                 iter = __tracing_open(inode, file, false);
4473                 if (IS_ERR(iter))
4474                         ret = PTR_ERR(iter);
4475                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4476                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4477         }
4478
4479         if (ret < 0)
4480                 trace_array_put(tr);
4481
4482         return ret;
4483 }
4484
4485 /*
4486  * Some tracers are not suitable for instance buffers.
4487  * A tracer is always available for the global array (toplevel)
4488  * or if it explicitly states that it is.
4489  */
4490 static bool
4491 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4492 {
4493         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4494 }
4495
4496 /* Find the next tracer that this trace array may use */
4497 static struct tracer *
4498 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4499 {
4500         while (t && !trace_ok_for_array(t, tr))
4501                 t = t->next;
4502
4503         return t;
4504 }
4505
4506 static void *
4507 t_next(struct seq_file *m, void *v, loff_t *pos)
4508 {
4509         struct trace_array *tr = m->private;
4510         struct tracer *t = v;
4511
4512         (*pos)++;
4513
4514         if (t)
4515                 t = get_tracer_for_array(tr, t->next);
4516
4517         return t;
4518 }
4519
4520 static void *t_start(struct seq_file *m, loff_t *pos)
4521 {
4522         struct trace_array *tr = m->private;
4523         struct tracer *t;
4524         loff_t l = 0;
4525
4526         mutex_lock(&trace_types_lock);
4527
4528         t = get_tracer_for_array(tr, trace_types);
4529         for (; t && l < *pos; t = t_next(m, t, &l))
4530                         ;
4531
4532         return t;
4533 }
4534
4535 static void t_stop(struct seq_file *m, void *p)
4536 {
4537         mutex_unlock(&trace_types_lock);
4538 }
4539
4540 static int t_show(struct seq_file *m, void *v)
4541 {
4542         struct tracer *t = v;
4543
4544         if (!t)
4545                 return 0;
4546
4547         seq_puts(m, t->name);
4548         if (t->next)
4549                 seq_putc(m, ' ');
4550         else
4551                 seq_putc(m, '\n');
4552
4553         return 0;
4554 }
4555
4556 static const struct seq_operations show_traces_seq_ops = {
4557         .start          = t_start,
4558         .next           = t_next,
4559         .stop           = t_stop,
4560         .show           = t_show,
4561 };
4562
4563 static int show_traces_open(struct inode *inode, struct file *file)
4564 {
4565         struct trace_array *tr = inode->i_private;
4566         struct seq_file *m;
4567         int ret;
4568
4569         ret = tracing_check_open_get_tr(tr);
4570         if (ret)
4571                 return ret;
4572
4573         ret = seq_open(file, &show_traces_seq_ops);
4574         if (ret) {
4575                 trace_array_put(tr);
4576                 return ret;
4577         }
4578
4579         m = file->private_data;
4580         m->private = tr;
4581
4582         return 0;
4583 }
4584
4585 static int show_traces_release(struct inode *inode, struct file *file)
4586 {
4587         struct trace_array *tr = inode->i_private;
4588
4589         trace_array_put(tr);
4590         return seq_release(inode, file);
4591 }
4592
4593 static ssize_t
4594 tracing_write_stub(struct file *filp, const char __user *ubuf,
4595                    size_t count, loff_t *ppos)
4596 {
4597         return count;
4598 }
4599
4600 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4601 {
4602         int ret;
4603
4604         if (file->f_mode & FMODE_READ)
4605                 ret = seq_lseek(file, offset, whence);
4606         else
4607                 file->f_pos = ret = 0;
4608
4609         return ret;
4610 }
4611
4612 static const struct file_operations tracing_fops = {
4613         .open           = tracing_open,
4614         .read           = seq_read,
4615         .write          = tracing_write_stub,
4616         .llseek         = tracing_lseek,
4617         .release        = tracing_release,
4618 };
4619
4620 static const struct file_operations show_traces_fops = {
4621         .open           = show_traces_open,
4622         .read           = seq_read,
4623         .llseek         = seq_lseek,
4624         .release        = show_traces_release,
4625 };
4626
4627 static ssize_t
4628 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4629                      size_t count, loff_t *ppos)
4630 {
4631         struct trace_array *tr = file_inode(filp)->i_private;
4632         char *mask_str;
4633         int len;
4634
4635         len = snprintf(NULL, 0, "%*pb\n",
4636                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4637         mask_str = kmalloc(len, GFP_KERNEL);
4638         if (!mask_str)
4639                 return -ENOMEM;
4640
4641         len = snprintf(mask_str, len, "%*pb\n",
4642                        cpumask_pr_args(tr->tracing_cpumask));
4643         if (len >= count) {
4644                 count = -EINVAL;
4645                 goto out_err;
4646         }
4647         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4648
4649 out_err:
4650         kfree(mask_str);
4651
4652         return count;
4653 }
4654
4655 int tracing_set_cpumask(struct trace_array *tr,
4656                         cpumask_var_t tracing_cpumask_new)
4657 {
4658         int cpu;
4659
4660         if (!tr)
4661                 return -EINVAL;
4662
4663         local_irq_disable();
4664         arch_spin_lock(&tr->max_lock);
4665         for_each_tracing_cpu(cpu) {
4666                 /*
4667                  * Increase/decrease the disabled counter if we are
4668                  * about to flip a bit in the cpumask:
4669                  */
4670                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4671                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4672                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4673                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4674                 }
4675                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4676                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4677                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4678                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4679                 }
4680         }
4681         arch_spin_unlock(&tr->max_lock);
4682         local_irq_enable();
4683
4684         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4685
4686         return 0;
4687 }
4688
4689 static ssize_t
4690 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4691                       size_t count, loff_t *ppos)
4692 {
4693         struct trace_array *tr = file_inode(filp)->i_private;
4694         cpumask_var_t tracing_cpumask_new;
4695         int err;
4696
4697         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4698                 return -ENOMEM;
4699
4700         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4701         if (err)
4702                 goto err_free;
4703
4704         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4705         if (err)
4706                 goto err_free;
4707
4708         free_cpumask_var(tracing_cpumask_new);
4709
4710         return count;
4711
4712 err_free:
4713         free_cpumask_var(tracing_cpumask_new);
4714
4715         return err;
4716 }
4717
4718 static const struct file_operations tracing_cpumask_fops = {
4719         .open           = tracing_open_generic_tr,
4720         .read           = tracing_cpumask_read,
4721         .write          = tracing_cpumask_write,
4722         .release        = tracing_release_generic_tr,
4723         .llseek         = generic_file_llseek,
4724 };
4725
4726 static int tracing_trace_options_show(struct seq_file *m, void *v)
4727 {
4728         struct tracer_opt *trace_opts;
4729         struct trace_array *tr = m->private;
4730         u32 tracer_flags;
4731         int i;
4732
4733         mutex_lock(&trace_types_lock);
4734         tracer_flags = tr->current_trace->flags->val;
4735         trace_opts = tr->current_trace->flags->opts;
4736
4737         for (i = 0; trace_options[i]; i++) {
4738                 if (tr->trace_flags & (1 << i))
4739                         seq_printf(m, "%s\n", trace_options[i]);
4740                 else
4741                         seq_printf(m, "no%s\n", trace_options[i]);
4742         }
4743
4744         for (i = 0; trace_opts[i].name; i++) {
4745                 if (tracer_flags & trace_opts[i].bit)
4746                         seq_printf(m, "%s\n", trace_opts[i].name);
4747                 else
4748                         seq_printf(m, "no%s\n", trace_opts[i].name);
4749         }
4750         mutex_unlock(&trace_types_lock);
4751
4752         return 0;
4753 }
4754
4755 static int __set_tracer_option(struct trace_array *tr,
4756                                struct tracer_flags *tracer_flags,
4757                                struct tracer_opt *opts, int neg)
4758 {
4759         struct tracer *trace = tracer_flags->trace;
4760         int ret;
4761
4762         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4763         if (ret)
4764                 return ret;
4765
4766         if (neg)
4767                 tracer_flags->val &= ~opts->bit;
4768         else
4769                 tracer_flags->val |= opts->bit;
4770         return 0;
4771 }
4772
4773 /* Try to assign a tracer specific option */
4774 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4775 {
4776         struct tracer *trace = tr->current_trace;
4777         struct tracer_flags *tracer_flags = trace->flags;
4778         struct tracer_opt *opts = NULL;
4779         int i;
4780
4781         for (i = 0; tracer_flags->opts[i].name; i++) {
4782                 opts = &tracer_flags->opts[i];
4783
4784                 if (strcmp(cmp, opts->name) == 0)
4785                         return __set_tracer_option(tr, trace->flags, opts, neg);
4786         }
4787
4788         return -EINVAL;
4789 }
4790
4791 /* Some tracers require overwrite to stay enabled */
4792 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4793 {
4794         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4795                 return -1;
4796
4797         return 0;
4798 }
4799
4800 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4801 {
4802         if ((mask == TRACE_ITER_RECORD_TGID) ||
4803             (mask == TRACE_ITER_RECORD_CMD))
4804                 lockdep_assert_held(&event_mutex);
4805
4806         /* do nothing if flag is already set */
4807         if (!!(tr->trace_flags & mask) == !!enabled)
4808                 return 0;
4809
4810         /* Give the tracer a chance to approve the change */
4811         if (tr->current_trace->flag_changed)
4812                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4813                         return -EINVAL;
4814
4815         if (enabled)
4816                 tr->trace_flags |= mask;
4817         else
4818                 tr->trace_flags &= ~mask;
4819
4820         if (mask == TRACE_ITER_RECORD_CMD)
4821                 trace_event_enable_cmd_record(enabled);
4822
4823         if (mask == TRACE_ITER_RECORD_TGID) {
4824                 if (!tgid_map)
4825                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4826                                            sizeof(*tgid_map),
4827                                            GFP_KERNEL);
4828                 if (!tgid_map) {
4829                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4830                         return -ENOMEM;
4831                 }
4832
4833                 trace_event_enable_tgid_record(enabled);
4834         }
4835
4836         if (mask == TRACE_ITER_EVENT_FORK)
4837                 trace_event_follow_fork(tr, enabled);
4838
4839         if (mask == TRACE_ITER_FUNC_FORK)
4840                 ftrace_pid_follow_fork(tr, enabled);
4841
4842         if (mask == TRACE_ITER_OVERWRITE) {
4843                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4844 #ifdef CONFIG_TRACER_MAX_TRACE
4845                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4846 #endif
4847         }
4848
4849         if (mask == TRACE_ITER_PRINTK) {
4850                 trace_printk_start_stop_comm(enabled);
4851                 trace_printk_control(enabled);
4852         }
4853
4854         return 0;
4855 }
4856
4857 int trace_set_options(struct trace_array *tr, char *option)
4858 {
4859         char *cmp;
4860         int neg = 0;
4861         int ret;
4862         size_t orig_len = strlen(option);
4863         int len;
4864
4865         cmp = strstrip(option);
4866
4867         len = str_has_prefix(cmp, "no");
4868         if (len)
4869                 neg = 1;
4870
4871         cmp += len;
4872
4873         mutex_lock(&event_mutex);
4874         mutex_lock(&trace_types_lock);
4875
4876         ret = match_string(trace_options, -1, cmp);
4877         /* If no option could be set, test the specific tracer options */
4878         if (ret < 0)
4879                 ret = set_tracer_option(tr, cmp, neg);
4880         else
4881                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4882
4883         mutex_unlock(&trace_types_lock);
4884         mutex_unlock(&event_mutex);
4885
4886         /*
4887          * If the first trailing whitespace is replaced with '\0' by strstrip,
4888          * turn it back into a space.
4889          */
4890         if (orig_len > strlen(option))
4891                 option[strlen(option)] = ' ';
4892
4893         return ret;
4894 }
4895
4896 static void __init apply_trace_boot_options(void)
4897 {
4898         char *buf = trace_boot_options_buf;
4899         char *option;
4900
4901         while (true) {
4902                 option = strsep(&buf, ",");
4903
4904                 if (!option)
4905                         break;
4906
4907                 if (*option)
4908                         trace_set_options(&global_trace, option);
4909
4910                 /* Put back the comma to allow this to be called again */
4911                 if (buf)
4912                         *(buf - 1) = ',';
4913         }
4914 }
4915
4916 static ssize_t
4917 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4918                         size_t cnt, loff_t *ppos)
4919 {
4920         struct seq_file *m = filp->private_data;
4921         struct trace_array *tr = m->private;
4922         char buf[64];
4923         int ret;
4924
4925         if (cnt >= sizeof(buf))
4926                 return -EINVAL;
4927
4928         if (copy_from_user(buf, ubuf, cnt))
4929                 return -EFAULT;
4930
4931         buf[cnt] = 0;
4932
4933         ret = trace_set_options(tr, buf);
4934         if (ret < 0)
4935                 return ret;
4936
4937         *ppos += cnt;
4938
4939         return cnt;
4940 }
4941
4942 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4943 {
4944         struct trace_array *tr = inode->i_private;
4945         int ret;
4946
4947         ret = tracing_check_open_get_tr(tr);
4948         if (ret)
4949                 return ret;
4950
4951         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4952         if (ret < 0)
4953                 trace_array_put(tr);
4954
4955         return ret;
4956 }
4957
4958 static const struct file_operations tracing_iter_fops = {
4959         .open           = tracing_trace_options_open,
4960         .read           = seq_read,
4961         .llseek         = seq_lseek,
4962         .release        = tracing_single_release_tr,
4963         .write          = tracing_trace_options_write,
4964 };
4965
4966 static const char readme_msg[] =
4967         "tracing mini-HOWTO:\n\n"
4968         "# echo 0 > tracing_on : quick way to disable tracing\n"
4969         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4970         " Important files:\n"
4971         "  trace\t\t\t- The static contents of the buffer\n"
4972         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4973         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4974         "  current_tracer\t- function and latency tracers\n"
4975         "  available_tracers\t- list of configured tracers for current_tracer\n"
4976         "  error_log\t- error log for failed commands (that support it)\n"
4977         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4978         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4979         "  trace_clock\t\t-change the clock used to order events\n"
4980         "       local:   Per cpu clock but may not be synced across CPUs\n"
4981         "      global:   Synced across CPUs but slows tracing down.\n"
4982         "     counter:   Not a clock, but just an increment\n"
4983         "      uptime:   Jiffy counter from time of boot\n"
4984         "        perf:   Same clock that perf events use\n"
4985 #ifdef CONFIG_X86_64
4986         "     x86-tsc:   TSC cycle counter\n"
4987 #endif
4988         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4989         "       delta:   Delta difference against a buffer-wide timestamp\n"
4990         "    absolute:   Absolute (standalone) timestamp\n"
4991         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4992         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4993         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4994         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4995         "\t\t\t  Remove sub-buffer with rmdir\n"
4996         "  trace_options\t\t- Set format or modify how tracing happens\n"
4997         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4998         "\t\t\t  option name\n"
4999         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5000 #ifdef CONFIG_DYNAMIC_FTRACE
5001         "\n  available_filter_functions - list of functions that can be filtered on\n"
5002         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5003         "\t\t\t  functions\n"
5004         "\t     accepts: func_full_name or glob-matching-pattern\n"
5005         "\t     modules: Can select a group via module\n"
5006         "\t      Format: :mod:<module-name>\n"
5007         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5008         "\t    triggers: a command to perform when function is hit\n"
5009         "\t      Format: <function>:<trigger>[:count]\n"
5010         "\t     trigger: traceon, traceoff\n"
5011         "\t\t      enable_event:<system>:<event>\n"
5012         "\t\t      disable_event:<system>:<event>\n"
5013 #ifdef CONFIG_STACKTRACE
5014         "\t\t      stacktrace\n"
5015 #endif
5016 #ifdef CONFIG_TRACER_SNAPSHOT
5017         "\t\t      snapshot\n"
5018 #endif
5019         "\t\t      dump\n"
5020         "\t\t      cpudump\n"
5021         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5022         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5023         "\t     The first one will disable tracing every time do_fault is hit\n"
5024         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5025         "\t       The first time do trap is hit and it disables tracing, the\n"
5026         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5027         "\t       the counter will not decrement. It only decrements when the\n"
5028         "\t       trigger did work\n"
5029         "\t     To remove trigger without count:\n"
5030         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5031         "\t     To remove trigger with a count:\n"
5032         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5033         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5034         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5035         "\t    modules: Can select a group via module command :mod:\n"
5036         "\t    Does not accept triggers\n"
5037 #endif /* CONFIG_DYNAMIC_FTRACE */
5038 #ifdef CONFIG_FUNCTION_TRACER
5039         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5040         "\t\t    (function)\n"
5041         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5042         "\t\t    (function)\n"
5043 #endif
5044 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5045         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5046         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5047         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5048 #endif
5049 #ifdef CONFIG_TRACER_SNAPSHOT
5050         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5051         "\t\t\t  snapshot buffer. Read the contents for more\n"
5052         "\t\t\t  information\n"
5053 #endif
5054 #ifdef CONFIG_STACK_TRACER
5055         "  stack_trace\t\t- Shows the max stack trace when active\n"
5056         "  stack_max_size\t- Shows current max stack size that was traced\n"
5057         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5058         "\t\t\t  new trace)\n"
5059 #ifdef CONFIG_DYNAMIC_FTRACE
5060         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5061         "\t\t\t  traces\n"
5062 #endif
5063 #endif /* CONFIG_STACK_TRACER */
5064 #ifdef CONFIG_DYNAMIC_EVENTS
5065         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5066         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5067 #endif
5068 #ifdef CONFIG_KPROBE_EVENTS
5069         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5070         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5071 #endif
5072 #ifdef CONFIG_UPROBE_EVENTS
5073         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5074         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5075 #endif
5076 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5077         "\t  accepts: event-definitions (one definition per line)\n"
5078         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5079         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5080 #ifdef CONFIG_HIST_TRIGGERS
5081         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5082 #endif
5083         "\t           -:[<group>/]<event>\n"
5084 #ifdef CONFIG_KPROBE_EVENTS
5085         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5086   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5087 #endif
5088 #ifdef CONFIG_UPROBE_EVENTS
5089   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5090 #endif
5091         "\t     args: <name>=fetcharg[:type]\n"
5092         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5093 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5094         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5095 #else
5096         "\t           $stack<index>, $stack, $retval, $comm,\n"
5097 #endif
5098         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5099         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5100         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5101         "\t           <type>\\[<array-size>\\]\n"
5102 #ifdef CONFIG_HIST_TRIGGERS
5103         "\t    field: <stype> <name>;\n"
5104         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5105         "\t           [unsigned] char/int/long\n"
5106 #endif
5107 #endif
5108         "  events/\t\t- Directory containing all trace event subsystems:\n"
5109         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5110         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5111         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5112         "\t\t\t  events\n"
5113         "      filter\t\t- If set, only events passing filter are traced\n"
5114         "  events/<system>/<event>/\t- Directory containing control files for\n"
5115         "\t\t\t  <event>:\n"
5116         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5117         "      filter\t\t- If set, only events passing filter are traced\n"
5118         "      trigger\t\t- If set, a command to perform when event is hit\n"
5119         "\t    Format: <trigger>[:count][if <filter>]\n"
5120         "\t   trigger: traceon, traceoff\n"
5121         "\t            enable_event:<system>:<event>\n"
5122         "\t            disable_event:<system>:<event>\n"
5123 #ifdef CONFIG_HIST_TRIGGERS
5124         "\t            enable_hist:<system>:<event>\n"
5125         "\t            disable_hist:<system>:<event>\n"
5126 #endif
5127 #ifdef CONFIG_STACKTRACE
5128         "\t\t    stacktrace\n"
5129 #endif
5130 #ifdef CONFIG_TRACER_SNAPSHOT
5131         "\t\t    snapshot\n"
5132 #endif
5133 #ifdef CONFIG_HIST_TRIGGERS
5134         "\t\t    hist (see below)\n"
5135 #endif
5136         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5137         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5138         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5139         "\t                  events/block/block_unplug/trigger\n"
5140         "\t   The first disables tracing every time block_unplug is hit.\n"
5141         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5142         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5143         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5144         "\t   Like function triggers, the counter is only decremented if it\n"
5145         "\t    enabled or disabled tracing.\n"
5146         "\t   To remove a trigger without a count:\n"
5147         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5148         "\t   To remove a trigger with a count:\n"
5149         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5150         "\t   Filters can be ignored when removing a trigger.\n"
5151 #ifdef CONFIG_HIST_TRIGGERS
5152         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5153         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5154         "\t            [:values=<field1[,field2,...]>]\n"
5155         "\t            [:sort=<field1[,field2,...]>]\n"
5156         "\t            [:size=#entries]\n"
5157         "\t            [:pause][:continue][:clear]\n"
5158         "\t            [:name=histname1]\n"
5159         "\t            [:<handler>.<action>]\n"
5160         "\t            [if <filter>]\n\n"
5161         "\t    When a matching event is hit, an entry is added to a hash\n"
5162         "\t    table using the key(s) and value(s) named, and the value of a\n"
5163         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5164         "\t    correspond to fields in the event's format description.  Keys\n"
5165         "\t    can be any field, or the special string 'stacktrace'.\n"
5166         "\t    Compound keys consisting of up to two fields can be specified\n"
5167         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5168         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5169         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5170         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5171         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5172         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5173         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5174         "\t    its histogram data will be shared with other triggers of the\n"
5175         "\t    same name, and trigger hits will update this common data.\n\n"
5176         "\t    Reading the 'hist' file for the event will dump the hash\n"
5177         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5178         "\t    triggers attached to an event, there will be a table for each\n"
5179         "\t    trigger in the output.  The table displayed for a named\n"
5180         "\t    trigger will be the same as any other instance having the\n"
5181         "\t    same name.  The default format used to display a given field\n"
5182         "\t    can be modified by appending any of the following modifiers\n"
5183         "\t    to the field name, as applicable:\n\n"
5184         "\t            .hex        display a number as a hex value\n"
5185         "\t            .sym        display an address as a symbol\n"
5186         "\t            .sym-offset display an address as a symbol and offset\n"
5187         "\t            .execname   display a common_pid as a program name\n"
5188         "\t            .syscall    display a syscall id as a syscall name\n"
5189         "\t            .log2       display log2 value rather than raw number\n"
5190         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5191         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5192         "\t    trigger or to start a hist trigger but not log any events\n"
5193         "\t    until told to do so.  'continue' can be used to start or\n"
5194         "\t    restart a paused hist trigger.\n\n"
5195         "\t    The 'clear' parameter will clear the contents of a running\n"
5196         "\t    hist trigger and leave its current paused/active state\n"
5197         "\t    unchanged.\n\n"
5198         "\t    The enable_hist and disable_hist triggers can be used to\n"
5199         "\t    have one event conditionally start and stop another event's\n"
5200         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5201         "\t    the enable_event and disable_event triggers.\n\n"
5202         "\t    Hist trigger handlers and actions are executed whenever a\n"
5203         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5204         "\t        <handler>.<action>\n\n"
5205         "\t    The available handlers are:\n\n"
5206         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5207         "\t        onmax(var)               - invoke if var exceeds current max\n"
5208         "\t        onchange(var)            - invoke action if var changes\n\n"
5209         "\t    The available actions are:\n\n"
5210         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5211         "\t        save(field,...)                      - save current event fields\n"
5212 #ifdef CONFIG_TRACER_SNAPSHOT
5213         "\t        snapshot()                           - snapshot the trace buffer\n"
5214 #endif
5215 #endif
5216 ;
5217
5218 static ssize_t
5219 tracing_readme_read(struct file *filp, char __user *ubuf,
5220                        size_t cnt, loff_t *ppos)
5221 {
5222         return simple_read_from_buffer(ubuf, cnt, ppos,
5223                                         readme_msg, strlen(readme_msg));
5224 }
5225
5226 static const struct file_operations tracing_readme_fops = {
5227         .open           = tracing_open_generic,
5228         .read           = tracing_readme_read,
5229         .llseek         = generic_file_llseek,
5230 };
5231
5232 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5233 {
5234         int *ptr = v;
5235
5236         if (*pos || m->count)
5237                 ptr++;
5238
5239         (*pos)++;
5240
5241         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5242                 if (trace_find_tgid(*ptr))
5243                         return ptr;
5244         }
5245
5246         return NULL;
5247 }
5248
5249 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5250 {
5251         void *v;
5252         loff_t l = 0;
5253
5254         if (!tgid_map)
5255                 return NULL;
5256
5257         v = &tgid_map[0];
5258         while (l <= *pos) {
5259                 v = saved_tgids_next(m, v, &l);
5260                 if (!v)
5261                         return NULL;
5262         }
5263
5264         return v;
5265 }
5266
5267 static void saved_tgids_stop(struct seq_file *m, void *v)
5268 {
5269 }
5270
5271 static int saved_tgids_show(struct seq_file *m, void *v)
5272 {
5273         int pid = (int *)v - tgid_map;
5274
5275         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5276         return 0;
5277 }
5278
5279 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5280         .start          = saved_tgids_start,
5281         .stop           = saved_tgids_stop,
5282         .next           = saved_tgids_next,
5283         .show           = saved_tgids_show,
5284 };
5285
5286 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5287 {
5288         int ret;
5289
5290         ret = tracing_check_open_get_tr(NULL);
5291         if (ret)
5292                 return ret;
5293
5294         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5295 }
5296
5297
5298 static const struct file_operations tracing_saved_tgids_fops = {
5299         .open           = tracing_saved_tgids_open,
5300         .read           = seq_read,
5301         .llseek         = seq_lseek,
5302         .release        = seq_release,
5303 };
5304
5305 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5306 {
5307         unsigned int *ptr = v;
5308
5309         if (*pos || m->count)
5310                 ptr++;
5311
5312         (*pos)++;
5313
5314         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5315              ptr++) {
5316                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5317                         continue;
5318
5319                 return ptr;
5320         }
5321
5322         return NULL;
5323 }
5324
5325 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5326 {
5327         void *v;
5328         loff_t l = 0;
5329
5330         preempt_disable();
5331         arch_spin_lock(&trace_cmdline_lock);
5332
5333         v = &savedcmd->map_cmdline_to_pid[0];
5334         while (l <= *pos) {
5335                 v = saved_cmdlines_next(m, v, &l);
5336                 if (!v)
5337                         return NULL;
5338         }
5339
5340         return v;
5341 }
5342
5343 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5344 {
5345         arch_spin_unlock(&trace_cmdline_lock);
5346         preempt_enable();
5347 }
5348
5349 static int saved_cmdlines_show(struct seq_file *m, void *v)
5350 {
5351         char buf[TASK_COMM_LEN];
5352         unsigned int *pid = v;
5353
5354         __trace_find_cmdline(*pid, buf);
5355         seq_printf(m, "%d %s\n", *pid, buf);
5356         return 0;
5357 }
5358
5359 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5360         .start          = saved_cmdlines_start,
5361         .next           = saved_cmdlines_next,
5362         .stop           = saved_cmdlines_stop,
5363         .show           = saved_cmdlines_show,
5364 };
5365
5366 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5367 {
5368         int ret;
5369
5370         ret = tracing_check_open_get_tr(NULL);
5371         if (ret)
5372                 return ret;
5373
5374         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5375 }
5376
5377 static const struct file_operations tracing_saved_cmdlines_fops = {
5378         .open           = tracing_saved_cmdlines_open,
5379         .read           = seq_read,
5380         .llseek         = seq_lseek,
5381         .release        = seq_release,
5382 };
5383
5384 static ssize_t
5385 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5386                                  size_t cnt, loff_t *ppos)
5387 {
5388         char buf[64];
5389         int r;
5390
5391         arch_spin_lock(&trace_cmdline_lock);
5392         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5393         arch_spin_unlock(&trace_cmdline_lock);
5394
5395         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5396 }
5397
5398 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5399 {
5400         kfree(s->saved_cmdlines);
5401         kfree(s->map_cmdline_to_pid);
5402         kfree(s);
5403 }
5404
5405 static int tracing_resize_saved_cmdlines(unsigned int val)
5406 {
5407         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5408
5409         s = kmalloc(sizeof(*s), GFP_KERNEL);
5410         if (!s)
5411                 return -ENOMEM;
5412
5413         if (allocate_cmdlines_buffer(val, s) < 0) {
5414                 kfree(s);
5415                 return -ENOMEM;
5416         }
5417
5418         arch_spin_lock(&trace_cmdline_lock);
5419         savedcmd_temp = savedcmd;
5420         savedcmd = s;
5421         arch_spin_unlock(&trace_cmdline_lock);
5422         free_saved_cmdlines_buffer(savedcmd_temp);
5423
5424         return 0;
5425 }
5426
5427 static ssize_t
5428 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5429                                   size_t cnt, loff_t *ppos)
5430 {
5431         unsigned long val;
5432         int ret;
5433
5434         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5435         if (ret)
5436                 return ret;
5437
5438         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5439         if (!val || val > PID_MAX_DEFAULT)
5440                 return -EINVAL;
5441
5442         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5443         if (ret < 0)
5444                 return ret;
5445
5446         *ppos += cnt;
5447
5448         return cnt;
5449 }
5450
5451 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5452         .open           = tracing_open_generic,
5453         .read           = tracing_saved_cmdlines_size_read,
5454         .write          = tracing_saved_cmdlines_size_write,
5455 };
5456
5457 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5458 static union trace_eval_map_item *
5459 update_eval_map(union trace_eval_map_item *ptr)
5460 {
5461         if (!ptr->map.eval_string) {
5462                 if (ptr->tail.next) {
5463                         ptr = ptr->tail.next;
5464                         /* Set ptr to the next real item (skip head) */
5465                         ptr++;
5466                 } else
5467                         return NULL;
5468         }
5469         return ptr;
5470 }
5471
5472 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5473 {
5474         union trace_eval_map_item *ptr = v;
5475
5476         /*
5477          * Paranoid! If ptr points to end, we don't want to increment past it.
5478          * This really should never happen.
5479          */
5480         (*pos)++;
5481         ptr = update_eval_map(ptr);
5482         if (WARN_ON_ONCE(!ptr))
5483                 return NULL;
5484
5485         ptr++;
5486         ptr = update_eval_map(ptr);
5487
5488         return ptr;
5489 }
5490
5491 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5492 {
5493         union trace_eval_map_item *v;
5494         loff_t l = 0;
5495
5496         mutex_lock(&trace_eval_mutex);
5497
5498         v = trace_eval_maps;
5499         if (v)
5500                 v++;
5501
5502         while (v && l < *pos) {
5503                 v = eval_map_next(m, v, &l);
5504         }
5505
5506         return v;
5507 }
5508
5509 static void eval_map_stop(struct seq_file *m, void *v)
5510 {
5511         mutex_unlock(&trace_eval_mutex);
5512 }
5513
5514 static int eval_map_show(struct seq_file *m, void *v)
5515 {
5516         union trace_eval_map_item *ptr = v;
5517
5518         seq_printf(m, "%s %ld (%s)\n",
5519                    ptr->map.eval_string, ptr->map.eval_value,
5520                    ptr->map.system);
5521
5522         return 0;
5523 }
5524
5525 static const struct seq_operations tracing_eval_map_seq_ops = {
5526         .start          = eval_map_start,
5527         .next           = eval_map_next,
5528         .stop           = eval_map_stop,
5529         .show           = eval_map_show,
5530 };
5531
5532 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5533 {
5534         int ret;
5535
5536         ret = tracing_check_open_get_tr(NULL);
5537         if (ret)
5538                 return ret;
5539
5540         return seq_open(filp, &tracing_eval_map_seq_ops);
5541 }
5542
5543 static const struct file_operations tracing_eval_map_fops = {
5544         .open           = tracing_eval_map_open,
5545         .read           = seq_read,
5546         .llseek         = seq_lseek,
5547         .release        = seq_release,
5548 };
5549
5550 static inline union trace_eval_map_item *
5551 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5552 {
5553         /* Return tail of array given the head */
5554         return ptr + ptr->head.length + 1;
5555 }
5556
5557 static void
5558 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5559                            int len)
5560 {
5561         struct trace_eval_map **stop;
5562         struct trace_eval_map **map;
5563         union trace_eval_map_item *map_array;
5564         union trace_eval_map_item *ptr;
5565
5566         stop = start + len;
5567
5568         /*
5569          * The trace_eval_maps contains the map plus a head and tail item,
5570          * where the head holds the module and length of array, and the
5571          * tail holds a pointer to the next list.
5572          */
5573         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5574         if (!map_array) {
5575                 pr_warn("Unable to allocate trace eval mapping\n");
5576                 return;
5577         }
5578
5579         mutex_lock(&trace_eval_mutex);
5580
5581         if (!trace_eval_maps)
5582                 trace_eval_maps = map_array;
5583         else {
5584                 ptr = trace_eval_maps;
5585                 for (;;) {
5586                         ptr = trace_eval_jmp_to_tail(ptr);
5587                         if (!ptr->tail.next)
5588                                 break;
5589                         ptr = ptr->tail.next;
5590
5591                 }
5592                 ptr->tail.next = map_array;
5593         }
5594         map_array->head.mod = mod;
5595         map_array->head.length = len;
5596         map_array++;
5597
5598         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5599                 map_array->map = **map;
5600                 map_array++;
5601         }
5602         memset(map_array, 0, sizeof(*map_array));
5603
5604         mutex_unlock(&trace_eval_mutex);
5605 }
5606
5607 static void trace_create_eval_file(struct dentry *d_tracer)
5608 {
5609         trace_create_file("eval_map", 0444, d_tracer,
5610                           NULL, &tracing_eval_map_fops);
5611 }
5612
5613 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5614 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5615 static inline void trace_insert_eval_map_file(struct module *mod,
5616                               struct trace_eval_map **start, int len) { }
5617 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5618
5619 static void trace_insert_eval_map(struct module *mod,
5620                                   struct trace_eval_map **start, int len)
5621 {
5622         struct trace_eval_map **map;
5623
5624         if (len <= 0)
5625                 return;
5626
5627         map = start;
5628
5629         trace_event_eval_update(map, len);
5630
5631         trace_insert_eval_map_file(mod, start, len);
5632 }
5633
5634 static ssize_t
5635 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5636                        size_t cnt, loff_t *ppos)
5637 {
5638         struct trace_array *tr = filp->private_data;
5639         char buf[MAX_TRACER_SIZE+2];
5640         int r;
5641
5642         mutex_lock(&trace_types_lock);
5643         r = sprintf(buf, "%s\n", tr->current_trace->name);
5644         mutex_unlock(&trace_types_lock);
5645
5646         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5647 }
5648
5649 int tracer_init(struct tracer *t, struct trace_array *tr)
5650 {
5651         tracing_reset_online_cpus(&tr->array_buffer);
5652         return t->init(tr);
5653 }
5654
5655 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5656 {
5657         int cpu;
5658
5659         for_each_tracing_cpu(cpu)
5660                 per_cpu_ptr(buf->data, cpu)->entries = val;
5661 }
5662
5663 #ifdef CONFIG_TRACER_MAX_TRACE
5664 /* resize @tr's buffer to the size of @size_tr's entries */
5665 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5666                                         struct array_buffer *size_buf, int cpu_id)
5667 {
5668         int cpu, ret = 0;
5669
5670         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5671                 for_each_tracing_cpu(cpu) {
5672                         ret = ring_buffer_resize(trace_buf->buffer,
5673                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5674                         if (ret < 0)
5675                                 break;
5676                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5677                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5678                 }
5679         } else {
5680                 ret = ring_buffer_resize(trace_buf->buffer,
5681                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5682                 if (ret == 0)
5683                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5684                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5685         }
5686
5687         return ret;
5688 }
5689 #endif /* CONFIG_TRACER_MAX_TRACE */
5690
5691 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5692                                         unsigned long size, int cpu)
5693 {
5694         int ret;
5695
5696         /*
5697          * If kernel or user changes the size of the ring buffer
5698          * we use the size that was given, and we can forget about
5699          * expanding it later.
5700          */
5701         ring_buffer_expanded = true;
5702
5703         /* May be called before buffers are initialized */
5704         if (!tr->array_buffer.buffer)
5705                 return 0;
5706
5707         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5708         if (ret < 0)
5709                 return ret;
5710
5711 #ifdef CONFIG_TRACER_MAX_TRACE
5712         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5713             !tr->current_trace->use_max_tr)
5714                 goto out;
5715
5716         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5717         if (ret < 0) {
5718                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5719                                                      &tr->array_buffer, cpu);
5720                 if (r < 0) {
5721                         /*
5722                          * AARGH! We are left with different
5723                          * size max buffer!!!!
5724                          * The max buffer is our "snapshot" buffer.
5725                          * When a tracer needs a snapshot (one of the
5726                          * latency tracers), it swaps the max buffer
5727                          * with the saved snap shot. We succeeded to
5728                          * update the size of the main buffer, but failed to
5729                          * update the size of the max buffer. But when we tried
5730                          * to reset the main buffer to the original size, we
5731                          * failed there too. This is very unlikely to
5732                          * happen, but if it does, warn and kill all
5733                          * tracing.
5734                          */
5735                         WARN_ON(1);
5736                         tracing_disabled = 1;
5737                 }
5738                 return ret;
5739         }
5740
5741         if (cpu == RING_BUFFER_ALL_CPUS)
5742                 set_buffer_entries(&tr->max_buffer, size);
5743         else
5744                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5745
5746  out:
5747 #endif /* CONFIG_TRACER_MAX_TRACE */
5748
5749         if (cpu == RING_BUFFER_ALL_CPUS)
5750                 set_buffer_entries(&tr->array_buffer, size);
5751         else
5752                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5753
5754         return ret;
5755 }
5756
5757 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5758                                   unsigned long size, int cpu_id)
5759 {
5760         int ret = size;
5761
5762         mutex_lock(&trace_types_lock);
5763
5764         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5765                 /* make sure, this cpu is enabled in the mask */
5766                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5767                         ret = -EINVAL;
5768                         goto out;
5769                 }
5770         }
5771
5772         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5773         if (ret < 0)
5774                 ret = -ENOMEM;
5775
5776 out:
5777         mutex_unlock(&trace_types_lock);
5778
5779         return ret;
5780 }
5781
5782
5783 /**
5784  * tracing_update_buffers - used by tracing facility to expand ring buffers
5785  *
5786  * To save on memory when the tracing is never used on a system with it
5787  * configured in. The ring buffers are set to a minimum size. But once
5788  * a user starts to use the tracing facility, then they need to grow
5789  * to their default size.
5790  *
5791  * This function is to be called when a tracer is about to be used.
5792  */
5793 int tracing_update_buffers(void)
5794 {
5795         int ret = 0;
5796
5797         mutex_lock(&trace_types_lock);
5798         if (!ring_buffer_expanded)
5799                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5800                                                 RING_BUFFER_ALL_CPUS);
5801         mutex_unlock(&trace_types_lock);
5802
5803         return ret;
5804 }
5805
5806 struct trace_option_dentry;
5807
5808 static void
5809 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5810
5811 /*
5812  * Used to clear out the tracer before deletion of an instance.
5813  * Must have trace_types_lock held.
5814  */
5815 static void tracing_set_nop(struct trace_array *tr)
5816 {
5817         if (tr->current_trace == &nop_trace)
5818                 return;
5819         
5820         tr->current_trace->enabled--;
5821
5822         if (tr->current_trace->reset)
5823                 tr->current_trace->reset(tr);
5824
5825         tr->current_trace = &nop_trace;
5826 }
5827
5828 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5829 {
5830         /* Only enable if the directory has been created already. */
5831         if (!tr->dir)
5832                 return;
5833
5834         create_trace_option_files(tr, t);
5835 }
5836
5837 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5838 {
5839         struct tracer *t;
5840 #ifdef CONFIG_TRACER_MAX_TRACE
5841         bool had_max_tr;
5842 #endif
5843         int ret = 0;
5844
5845         mutex_lock(&trace_types_lock);
5846
5847         if (!ring_buffer_expanded) {
5848                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5849                                                 RING_BUFFER_ALL_CPUS);
5850                 if (ret < 0)
5851                         goto out;
5852                 ret = 0;
5853         }
5854
5855         for (t = trace_types; t; t = t->next) {
5856                 if (strcmp(t->name, buf) == 0)
5857                         break;
5858         }
5859         if (!t) {
5860                 ret = -EINVAL;
5861                 goto out;
5862         }
5863         if (t == tr->current_trace)
5864                 goto out;
5865
5866 #ifdef CONFIG_TRACER_SNAPSHOT
5867         if (t->use_max_tr) {
5868                 arch_spin_lock(&tr->max_lock);
5869                 if (tr->cond_snapshot)
5870                         ret = -EBUSY;
5871                 arch_spin_unlock(&tr->max_lock);
5872                 if (ret)
5873                         goto out;
5874         }
5875 #endif
5876         /* Some tracers won't work on kernel command line */
5877         if (system_state < SYSTEM_RUNNING && t->noboot) {
5878                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5879                         t->name);
5880                 goto out;
5881         }
5882
5883         /* Some tracers are only allowed for the top level buffer */
5884         if (!trace_ok_for_array(t, tr)) {
5885                 ret = -EINVAL;
5886                 goto out;
5887         }
5888
5889         /* If trace pipe files are being read, we can't change the tracer */
5890         if (tr->current_trace->ref) {
5891                 ret = -EBUSY;
5892                 goto out;
5893         }
5894
5895         trace_branch_disable();
5896
5897         tr->current_trace->enabled--;
5898
5899         if (tr->current_trace->reset)
5900                 tr->current_trace->reset(tr);
5901
5902         /* Current trace needs to be nop_trace before synchronize_rcu */
5903         tr->current_trace = &nop_trace;
5904
5905 #ifdef CONFIG_TRACER_MAX_TRACE
5906         had_max_tr = tr->allocated_snapshot;
5907
5908         if (had_max_tr && !t->use_max_tr) {
5909                 /*
5910                  * We need to make sure that the update_max_tr sees that
5911                  * current_trace changed to nop_trace to keep it from
5912                  * swapping the buffers after we resize it.
5913                  * The update_max_tr is called from interrupts disabled
5914                  * so a synchronized_sched() is sufficient.
5915                  */
5916                 synchronize_rcu();
5917                 free_snapshot(tr);
5918         }
5919 #endif
5920
5921 #ifdef CONFIG_TRACER_MAX_TRACE
5922         if (t->use_max_tr && !had_max_tr) {
5923                 ret = tracing_alloc_snapshot_instance(tr);
5924                 if (ret < 0)
5925                         goto out;
5926         }
5927 #endif
5928
5929         if (t->init) {
5930                 ret = tracer_init(t, tr);
5931                 if (ret)
5932                         goto out;
5933         }
5934
5935         tr->current_trace = t;
5936         tr->current_trace->enabled++;
5937         trace_branch_enable(tr);
5938  out:
5939         mutex_unlock(&trace_types_lock);
5940
5941         return ret;
5942 }
5943
5944 static ssize_t
5945 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5946                         size_t cnt, loff_t *ppos)
5947 {
5948         struct trace_array *tr = filp->private_data;
5949         char buf[MAX_TRACER_SIZE+1];
5950         int i;
5951         size_t ret;
5952         int err;
5953
5954         ret = cnt;
5955
5956         if (cnt > MAX_TRACER_SIZE)
5957                 cnt = MAX_TRACER_SIZE;
5958
5959         if (copy_from_user(buf, ubuf, cnt))
5960                 return -EFAULT;
5961
5962         buf[cnt] = 0;
5963
5964         /* strip ending whitespace. */
5965         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5966                 buf[i] = 0;
5967
5968         err = tracing_set_tracer(tr, buf);
5969         if (err)
5970                 return err;
5971
5972         *ppos += ret;
5973
5974         return ret;
5975 }
5976
5977 static ssize_t
5978 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5979                    size_t cnt, loff_t *ppos)
5980 {
5981         char buf[64];
5982         int r;
5983
5984         r = snprintf(buf, sizeof(buf), "%ld\n",
5985                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5986         if (r > sizeof(buf))
5987                 r = sizeof(buf);
5988         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5989 }
5990
5991 static ssize_t
5992 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5993                     size_t cnt, loff_t *ppos)
5994 {
5995         unsigned long val;
5996         int ret;
5997
5998         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5999         if (ret)
6000                 return ret;
6001
6002         *ptr = val * 1000;
6003
6004         return cnt;
6005 }
6006
6007 static ssize_t
6008 tracing_thresh_read(struct file *filp, char __user *ubuf,
6009                     size_t cnt, loff_t *ppos)
6010 {
6011         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6012 }
6013
6014 static ssize_t
6015 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6016                      size_t cnt, loff_t *ppos)
6017 {
6018         struct trace_array *tr = filp->private_data;
6019         int ret;
6020
6021         mutex_lock(&trace_types_lock);
6022         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6023         if (ret < 0)
6024                 goto out;
6025
6026         if (tr->current_trace->update_thresh) {
6027                 ret = tr->current_trace->update_thresh(tr);
6028                 if (ret < 0)
6029                         goto out;
6030         }
6031
6032         ret = cnt;
6033 out:
6034         mutex_unlock(&trace_types_lock);
6035
6036         return ret;
6037 }
6038
6039 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6040
6041 static ssize_t
6042 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6043                      size_t cnt, loff_t *ppos)
6044 {
6045         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6046 }
6047
6048 static ssize_t
6049 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6050                       size_t cnt, loff_t *ppos)
6051 {
6052         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6053 }
6054
6055 #endif
6056
6057 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6058 {
6059         struct trace_array *tr = inode->i_private;
6060         struct trace_iterator *iter;
6061         int ret;
6062
6063         ret = tracing_check_open_get_tr(tr);
6064         if (ret)
6065                 return ret;
6066
6067         mutex_lock(&trace_types_lock);
6068
6069         /* create a buffer to store the information to pass to userspace */
6070         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6071         if (!iter) {
6072                 ret = -ENOMEM;
6073                 __trace_array_put(tr);
6074                 goto out;
6075         }
6076
6077         trace_seq_init(&iter->seq);
6078         iter->trace = tr->current_trace;
6079
6080         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6081                 ret = -ENOMEM;
6082                 goto fail;
6083         }
6084
6085         /* trace pipe does not show start of buffer */
6086         cpumask_setall(iter->started);
6087
6088         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6089                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6090
6091         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6092         if (trace_clocks[tr->clock_id].in_ns)
6093                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6094
6095         iter->tr = tr;
6096         iter->array_buffer = &tr->array_buffer;
6097         iter->cpu_file = tracing_get_cpu(inode);
6098         mutex_init(&iter->mutex);
6099         filp->private_data = iter;
6100
6101         if (iter->trace->pipe_open)
6102                 iter->trace->pipe_open(iter);
6103
6104         nonseekable_open(inode, filp);
6105
6106         tr->current_trace->ref++;
6107 out:
6108         mutex_unlock(&trace_types_lock);
6109         return ret;
6110
6111 fail:
6112         kfree(iter);
6113         __trace_array_put(tr);
6114         mutex_unlock(&trace_types_lock);
6115         return ret;
6116 }
6117
6118 static int tracing_release_pipe(struct inode *inode, struct file *file)
6119 {
6120         struct trace_iterator *iter = file->private_data;
6121         struct trace_array *tr = inode->i_private;
6122
6123         mutex_lock(&trace_types_lock);
6124
6125         tr->current_trace->ref--;
6126
6127         if (iter->trace->pipe_close)
6128                 iter->trace->pipe_close(iter);
6129
6130         mutex_unlock(&trace_types_lock);
6131
6132         free_cpumask_var(iter->started);
6133         mutex_destroy(&iter->mutex);
6134         kfree(iter);
6135
6136         trace_array_put(tr);
6137
6138         return 0;
6139 }
6140
6141 static __poll_t
6142 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6143 {
6144         struct trace_array *tr = iter->tr;
6145
6146         /* Iterators are static, they should be filled or empty */
6147         if (trace_buffer_iter(iter, iter->cpu_file))
6148                 return EPOLLIN | EPOLLRDNORM;
6149
6150         if (tr->trace_flags & TRACE_ITER_BLOCK)
6151                 /*
6152                  * Always select as readable when in blocking mode
6153                  */
6154                 return EPOLLIN | EPOLLRDNORM;
6155         else
6156                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6157                                              filp, poll_table);
6158 }
6159
6160 static __poll_t
6161 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6162 {
6163         struct trace_iterator *iter = filp->private_data;
6164
6165         return trace_poll(iter, filp, poll_table);
6166 }
6167
6168 /* Must be called with iter->mutex held. */
6169 static int tracing_wait_pipe(struct file *filp)
6170 {
6171         struct trace_iterator *iter = filp->private_data;
6172         int ret;
6173
6174         while (trace_empty(iter)) {
6175
6176                 if ((filp->f_flags & O_NONBLOCK)) {
6177                         return -EAGAIN;
6178                 }
6179
6180                 /*
6181                  * We block until we read something and tracing is disabled.
6182                  * We still block if tracing is disabled, but we have never
6183                  * read anything. This allows a user to cat this file, and
6184                  * then enable tracing. But after we have read something,
6185                  * we give an EOF when tracing is again disabled.
6186                  *
6187                  * iter->pos will be 0 if we haven't read anything.
6188                  */
6189                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6190                         break;
6191
6192                 mutex_unlock(&iter->mutex);
6193
6194                 ret = wait_on_pipe(iter, 0);
6195
6196                 mutex_lock(&iter->mutex);
6197
6198                 if (ret)
6199                         return ret;
6200         }
6201
6202         return 1;
6203 }
6204
6205 /*
6206  * Consumer reader.
6207  */
6208 static ssize_t
6209 tracing_read_pipe(struct file *filp, char __user *ubuf,
6210                   size_t cnt, loff_t *ppos)
6211 {
6212         struct trace_iterator *iter = filp->private_data;
6213         ssize_t sret;
6214
6215         /*
6216          * Avoid more than one consumer on a single file descriptor
6217          * This is just a matter of traces coherency, the ring buffer itself
6218          * is protected.
6219          */
6220         mutex_lock(&iter->mutex);
6221
6222         /* return any leftover data */
6223         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6224         if (sret != -EBUSY)
6225                 goto out;
6226
6227         trace_seq_init(&iter->seq);
6228
6229         if (iter->trace->read) {
6230                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6231                 if (sret)
6232                         goto out;
6233         }
6234
6235 waitagain:
6236         sret = tracing_wait_pipe(filp);
6237         if (sret <= 0)
6238                 goto out;
6239
6240         /* stop when tracing is finished */
6241         if (trace_empty(iter)) {
6242                 sret = 0;
6243                 goto out;
6244         }
6245
6246         if (cnt >= PAGE_SIZE)
6247                 cnt = PAGE_SIZE - 1;
6248
6249         /* reset all but tr, trace, and overruns */
6250         memset(&iter->seq, 0,
6251                sizeof(struct trace_iterator) -
6252                offsetof(struct trace_iterator, seq));
6253         cpumask_clear(iter->started);
6254         trace_seq_init(&iter->seq);
6255         iter->pos = -1;
6256
6257         trace_event_read_lock();
6258         trace_access_lock(iter->cpu_file);
6259         while (trace_find_next_entry_inc(iter) != NULL) {
6260                 enum print_line_t ret;
6261                 int save_len = iter->seq.seq.len;
6262
6263                 ret = print_trace_line(iter);
6264                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6265                         /* don't print partial lines */
6266                         iter->seq.seq.len = save_len;
6267                         break;
6268                 }
6269                 if (ret != TRACE_TYPE_NO_CONSUME)
6270                         trace_consume(iter);
6271
6272                 if (trace_seq_used(&iter->seq) >= cnt)
6273                         break;
6274
6275                 /*
6276                  * Setting the full flag means we reached the trace_seq buffer
6277                  * size and we should leave by partial output condition above.
6278                  * One of the trace_seq_* functions is not used properly.
6279                  */
6280                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6281                           iter->ent->type);
6282         }
6283         trace_access_unlock(iter->cpu_file);
6284         trace_event_read_unlock();
6285
6286         /* Now copy what we have to the user */
6287         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6288         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6289                 trace_seq_init(&iter->seq);
6290
6291         /*
6292          * If there was nothing to send to user, in spite of consuming trace
6293          * entries, go back to wait for more entries.
6294          */
6295         if (sret == -EBUSY)
6296                 goto waitagain;
6297
6298 out:
6299         mutex_unlock(&iter->mutex);
6300
6301         return sret;
6302 }
6303
6304 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6305                                      unsigned int idx)
6306 {
6307         __free_page(spd->pages[idx]);
6308 }
6309
6310 static size_t
6311 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6312 {
6313         size_t count;
6314         int save_len;
6315         int ret;
6316
6317         /* Seq buffer is page-sized, exactly what we need. */
6318         for (;;) {
6319                 save_len = iter->seq.seq.len;
6320                 ret = print_trace_line(iter);
6321
6322                 if (trace_seq_has_overflowed(&iter->seq)) {
6323                         iter->seq.seq.len = save_len;
6324                         break;
6325                 }
6326
6327                 /*
6328                  * This should not be hit, because it should only
6329                  * be set if the iter->seq overflowed. But check it
6330                  * anyway to be safe.
6331                  */
6332                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6333                         iter->seq.seq.len = save_len;
6334                         break;
6335                 }
6336
6337                 count = trace_seq_used(&iter->seq) - save_len;
6338                 if (rem < count) {
6339                         rem = 0;
6340                         iter->seq.seq.len = save_len;
6341                         break;
6342                 }
6343
6344                 if (ret != TRACE_TYPE_NO_CONSUME)
6345                         trace_consume(iter);
6346                 rem -= count;
6347                 if (!trace_find_next_entry_inc(iter))   {
6348                         rem = 0;
6349                         iter->ent = NULL;
6350                         break;
6351                 }
6352         }
6353
6354         return rem;
6355 }
6356
6357 static ssize_t tracing_splice_read_pipe(struct file *filp,
6358                                         loff_t *ppos,
6359                                         struct pipe_inode_info *pipe,
6360                                         size_t len,
6361                                         unsigned int flags)
6362 {
6363         struct page *pages_def[PIPE_DEF_BUFFERS];
6364         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6365         struct trace_iterator *iter = filp->private_data;
6366         struct splice_pipe_desc spd = {
6367                 .pages          = pages_def,
6368                 .partial        = partial_def,
6369                 .nr_pages       = 0, /* This gets updated below. */
6370                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6371                 .ops            = &default_pipe_buf_ops,
6372                 .spd_release    = tracing_spd_release_pipe,
6373         };
6374         ssize_t ret;
6375         size_t rem;
6376         unsigned int i;
6377
6378         if (splice_grow_spd(pipe, &spd))
6379                 return -ENOMEM;
6380
6381         mutex_lock(&iter->mutex);
6382
6383         if (iter->trace->splice_read) {
6384                 ret = iter->trace->splice_read(iter, filp,
6385                                                ppos, pipe, len, flags);
6386                 if (ret)
6387                         goto out_err;
6388         }
6389
6390         ret = tracing_wait_pipe(filp);
6391         if (ret <= 0)
6392                 goto out_err;
6393
6394         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6395                 ret = -EFAULT;
6396                 goto out_err;
6397         }
6398
6399         trace_event_read_lock();
6400         trace_access_lock(iter->cpu_file);
6401
6402         /* Fill as many pages as possible. */
6403         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6404                 spd.pages[i] = alloc_page(GFP_KERNEL);
6405                 if (!spd.pages[i])
6406                         break;
6407
6408                 rem = tracing_fill_pipe_page(rem, iter);
6409
6410                 /* Copy the data into the page, so we can start over. */
6411                 ret = trace_seq_to_buffer(&iter->seq,
6412                                           page_address(spd.pages[i]),
6413                                           trace_seq_used(&iter->seq));
6414                 if (ret < 0) {
6415                         __free_page(spd.pages[i]);
6416                         break;
6417                 }
6418                 spd.partial[i].offset = 0;
6419                 spd.partial[i].len = trace_seq_used(&iter->seq);
6420
6421                 trace_seq_init(&iter->seq);
6422         }
6423
6424         trace_access_unlock(iter->cpu_file);
6425         trace_event_read_unlock();
6426         mutex_unlock(&iter->mutex);
6427
6428         spd.nr_pages = i;
6429
6430         if (i)
6431                 ret = splice_to_pipe(pipe, &spd);
6432         else
6433                 ret = 0;
6434 out:
6435         splice_shrink_spd(&spd);
6436         return ret;
6437
6438 out_err:
6439         mutex_unlock(&iter->mutex);
6440         goto out;
6441 }
6442
6443 static ssize_t
6444 tracing_entries_read(struct file *filp, char __user *ubuf,
6445                      size_t cnt, loff_t *ppos)
6446 {
6447         struct inode *inode = file_inode(filp);
6448         struct trace_array *tr = inode->i_private;
6449         int cpu = tracing_get_cpu(inode);
6450         char buf[64];
6451         int r = 0;
6452         ssize_t ret;
6453
6454         mutex_lock(&trace_types_lock);
6455
6456         if (cpu == RING_BUFFER_ALL_CPUS) {
6457                 int cpu, buf_size_same;
6458                 unsigned long size;
6459
6460                 size = 0;
6461                 buf_size_same = 1;
6462                 /* check if all cpu sizes are same */
6463                 for_each_tracing_cpu(cpu) {
6464                         /* fill in the size from first enabled cpu */
6465                         if (size == 0)
6466                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6467                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6468                                 buf_size_same = 0;
6469                                 break;
6470                         }
6471                 }
6472
6473                 if (buf_size_same) {
6474                         if (!ring_buffer_expanded)
6475                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6476                                             size >> 10,
6477                                             trace_buf_size >> 10);
6478                         else
6479                                 r = sprintf(buf, "%lu\n", size >> 10);
6480                 } else
6481                         r = sprintf(buf, "X\n");
6482         } else
6483                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6484
6485         mutex_unlock(&trace_types_lock);
6486
6487         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6488         return ret;
6489 }
6490
6491 static ssize_t
6492 tracing_entries_write(struct file *filp, const char __user *ubuf,
6493                       size_t cnt, loff_t *ppos)
6494 {
6495         struct inode *inode = file_inode(filp);
6496         struct trace_array *tr = inode->i_private;
6497         unsigned long val;
6498         int ret;
6499
6500         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6501         if (ret)
6502                 return ret;
6503
6504         /* must have at least 1 entry */
6505         if (!val)
6506                 return -EINVAL;
6507
6508         /* value is in KB */
6509         val <<= 10;
6510         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6511         if (ret < 0)
6512                 return ret;
6513
6514         *ppos += cnt;
6515
6516         return cnt;
6517 }
6518
6519 static ssize_t
6520 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6521                                 size_t cnt, loff_t *ppos)
6522 {
6523         struct trace_array *tr = filp->private_data;
6524         char buf[64];
6525         int r, cpu;
6526         unsigned long size = 0, expanded_size = 0;
6527
6528         mutex_lock(&trace_types_lock);
6529         for_each_tracing_cpu(cpu) {
6530                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6531                 if (!ring_buffer_expanded)
6532                         expanded_size += trace_buf_size >> 10;
6533         }
6534         if (ring_buffer_expanded)
6535                 r = sprintf(buf, "%lu\n", size);
6536         else
6537                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6538         mutex_unlock(&trace_types_lock);
6539
6540         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6541 }
6542
6543 static ssize_t
6544 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6545                           size_t cnt, loff_t *ppos)
6546 {
6547         /*
6548          * There is no need to read what the user has written, this function
6549          * is just to make sure that there is no error when "echo" is used
6550          */
6551
6552         *ppos += cnt;
6553
6554         return cnt;
6555 }
6556
6557 static int
6558 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6559 {
6560         struct trace_array *tr = inode->i_private;
6561
6562         /* disable tracing ? */
6563         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6564                 tracer_tracing_off(tr);
6565         /* resize the ring buffer to 0 */
6566         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6567
6568         trace_array_put(tr);
6569
6570         return 0;
6571 }
6572
6573 static ssize_t
6574 tracing_mark_write(struct file *filp, const char __user *ubuf,
6575                                         size_t cnt, loff_t *fpos)
6576 {
6577         struct trace_array *tr = filp->private_data;
6578         struct ring_buffer_event *event;
6579         enum event_trigger_type tt = ETT_NONE;
6580         struct trace_buffer *buffer;
6581         struct print_entry *entry;
6582         unsigned long irq_flags;
6583         ssize_t written;
6584         int size;
6585         int len;
6586
6587 /* Used in tracing_mark_raw_write() as well */
6588 #define FAULTED_STR "<faulted>"
6589 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6590
6591         if (tracing_disabled)
6592                 return -EINVAL;
6593
6594         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6595                 return -EINVAL;
6596
6597         if (cnt > TRACE_BUF_SIZE)
6598                 cnt = TRACE_BUF_SIZE;
6599
6600         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6601
6602         local_save_flags(irq_flags);
6603         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6604
6605         /* If less than "<faulted>", then make sure we can still add that */
6606         if (cnt < FAULTED_SIZE)
6607                 size += FAULTED_SIZE - cnt;
6608
6609         buffer = tr->array_buffer.buffer;
6610         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6611                                             irq_flags, preempt_count());
6612         if (unlikely(!event))
6613                 /* Ring buffer disabled, return as if not open for write */
6614                 return -EBADF;
6615
6616         entry = ring_buffer_event_data(event);
6617         entry->ip = _THIS_IP_;
6618
6619         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6620         if (len) {
6621                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6622                 cnt = FAULTED_SIZE;
6623                 written = -EFAULT;
6624         } else
6625                 written = cnt;
6626         len = cnt;
6627
6628         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6629                 /* do not add \n before testing triggers, but add \0 */
6630                 entry->buf[cnt] = '\0';
6631                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6632         }
6633
6634         if (entry->buf[cnt - 1] != '\n') {
6635                 entry->buf[cnt] = '\n';
6636                 entry->buf[cnt + 1] = '\0';
6637         } else
6638                 entry->buf[cnt] = '\0';
6639
6640         __buffer_unlock_commit(buffer, event);
6641
6642         if (tt)
6643                 event_triggers_post_call(tr->trace_marker_file, tt);
6644
6645         if (written > 0)
6646                 *fpos += written;
6647
6648         return written;
6649 }
6650
6651 /* Limit it for now to 3K (including tag) */
6652 #define RAW_DATA_MAX_SIZE (1024*3)
6653
6654 static ssize_t
6655 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6656                                         size_t cnt, loff_t *fpos)
6657 {
6658         struct trace_array *tr = filp->private_data;
6659         struct ring_buffer_event *event;
6660         struct trace_buffer *buffer;
6661         struct raw_data_entry *entry;
6662         unsigned long irq_flags;
6663         ssize_t written;
6664         int size;
6665         int len;
6666
6667 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6668
6669         if (tracing_disabled)
6670                 return -EINVAL;
6671
6672         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6673                 return -EINVAL;
6674
6675         /* The marker must at least have a tag id */
6676         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6677                 return -EINVAL;
6678
6679         if (cnt > TRACE_BUF_SIZE)
6680                 cnt = TRACE_BUF_SIZE;
6681
6682         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6683
6684         local_save_flags(irq_flags);
6685         size = sizeof(*entry) + cnt;
6686         if (cnt < FAULT_SIZE_ID)
6687                 size += FAULT_SIZE_ID - cnt;
6688
6689         buffer = tr->array_buffer.buffer;
6690         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6691                                             irq_flags, preempt_count());
6692         if (!event)
6693                 /* Ring buffer disabled, return as if not open for write */
6694                 return -EBADF;
6695
6696         entry = ring_buffer_event_data(event);
6697
6698         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6699         if (len) {
6700                 entry->id = -1;
6701                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6702                 written = -EFAULT;
6703         } else
6704                 written = cnt;
6705
6706         __buffer_unlock_commit(buffer, event);
6707
6708         if (written > 0)
6709                 *fpos += written;
6710
6711         return written;
6712 }
6713
6714 static int tracing_clock_show(struct seq_file *m, void *v)
6715 {
6716         struct trace_array *tr = m->private;
6717         int i;
6718
6719         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6720                 seq_printf(m,
6721                         "%s%s%s%s", i ? " " : "",
6722                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6723                         i == tr->clock_id ? "]" : "");
6724         seq_putc(m, '\n');
6725
6726         return 0;
6727 }
6728
6729 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6730 {
6731         int i;
6732
6733         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6734                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6735                         break;
6736         }
6737         if (i == ARRAY_SIZE(trace_clocks))
6738                 return -EINVAL;
6739
6740         mutex_lock(&trace_types_lock);
6741
6742         tr->clock_id = i;
6743
6744         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6745
6746         /*
6747          * New clock may not be consistent with the previous clock.
6748          * Reset the buffer so that it doesn't have incomparable timestamps.
6749          */
6750         tracing_reset_online_cpus(&tr->array_buffer);
6751
6752 #ifdef CONFIG_TRACER_MAX_TRACE
6753         if (tr->max_buffer.buffer)
6754                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6755         tracing_reset_online_cpus(&tr->max_buffer);
6756 #endif
6757
6758         mutex_unlock(&trace_types_lock);
6759
6760         return 0;
6761 }
6762
6763 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6764                                    size_t cnt, loff_t *fpos)
6765 {
6766         struct seq_file *m = filp->private_data;
6767         struct trace_array *tr = m->private;
6768         char buf[64];
6769         const char *clockstr;
6770         int ret;
6771
6772         if (cnt >= sizeof(buf))
6773                 return -EINVAL;
6774
6775         if (copy_from_user(buf, ubuf, cnt))
6776                 return -EFAULT;
6777
6778         buf[cnt] = 0;
6779
6780         clockstr = strstrip(buf);
6781
6782         ret = tracing_set_clock(tr, clockstr);
6783         if (ret)
6784                 return ret;
6785
6786         *fpos += cnt;
6787
6788         return cnt;
6789 }
6790
6791 static int tracing_clock_open(struct inode *inode, struct file *file)
6792 {
6793         struct trace_array *tr = inode->i_private;
6794         int ret;
6795
6796         ret = tracing_check_open_get_tr(tr);
6797         if (ret)
6798                 return ret;
6799
6800         ret = single_open(file, tracing_clock_show, inode->i_private);
6801         if (ret < 0)
6802                 trace_array_put(tr);
6803
6804         return ret;
6805 }
6806
6807 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6808 {
6809         struct trace_array *tr = m->private;
6810
6811         mutex_lock(&trace_types_lock);
6812
6813         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6814                 seq_puts(m, "delta [absolute]\n");
6815         else
6816                 seq_puts(m, "[delta] absolute\n");
6817
6818         mutex_unlock(&trace_types_lock);
6819
6820         return 0;
6821 }
6822
6823 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6824 {
6825         struct trace_array *tr = inode->i_private;
6826         int ret;
6827
6828         ret = tracing_check_open_get_tr(tr);
6829         if (ret)
6830                 return ret;
6831
6832         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6833         if (ret < 0)
6834                 trace_array_put(tr);
6835
6836         return ret;
6837 }
6838
6839 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6840 {
6841         int ret = 0;
6842
6843         mutex_lock(&trace_types_lock);
6844
6845         if (abs && tr->time_stamp_abs_ref++)
6846                 goto out;
6847
6848         if (!abs) {
6849                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6850                         ret = -EINVAL;
6851                         goto out;
6852                 }
6853
6854                 if (--tr->time_stamp_abs_ref)
6855                         goto out;
6856         }
6857
6858         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6859
6860 #ifdef CONFIG_TRACER_MAX_TRACE
6861         if (tr->max_buffer.buffer)
6862                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6863 #endif
6864  out:
6865         mutex_unlock(&trace_types_lock);
6866
6867         return ret;
6868 }
6869
6870 struct ftrace_buffer_info {
6871         struct trace_iterator   iter;
6872         void                    *spare;
6873         unsigned int            spare_cpu;
6874         unsigned int            read;
6875 };
6876
6877 #ifdef CONFIG_TRACER_SNAPSHOT
6878 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6879 {
6880         struct trace_array *tr = inode->i_private;
6881         struct trace_iterator *iter;
6882         struct seq_file *m;
6883         int ret;
6884
6885         ret = tracing_check_open_get_tr(tr);
6886         if (ret)
6887                 return ret;
6888
6889         if (file->f_mode & FMODE_READ) {
6890                 iter = __tracing_open(inode, file, true);
6891                 if (IS_ERR(iter))
6892                         ret = PTR_ERR(iter);
6893         } else {
6894                 /* Writes still need the seq_file to hold the private data */
6895                 ret = -ENOMEM;
6896                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6897                 if (!m)
6898                         goto out;
6899                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6900                 if (!iter) {
6901                         kfree(m);
6902                         goto out;
6903                 }
6904                 ret = 0;
6905
6906                 iter->tr = tr;
6907                 iter->array_buffer = &tr->max_buffer;
6908                 iter->cpu_file = tracing_get_cpu(inode);
6909                 m->private = iter;
6910                 file->private_data = m;
6911         }
6912 out:
6913         if (ret < 0)
6914                 trace_array_put(tr);
6915
6916         return ret;
6917 }
6918
6919 static ssize_t
6920 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6921                        loff_t *ppos)
6922 {
6923         struct seq_file *m = filp->private_data;
6924         struct trace_iterator *iter = m->private;
6925         struct trace_array *tr = iter->tr;
6926         unsigned long val;
6927         int ret;
6928
6929         ret = tracing_update_buffers();
6930         if (ret < 0)
6931                 return ret;
6932
6933         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6934         if (ret)
6935                 return ret;
6936
6937         mutex_lock(&trace_types_lock);
6938
6939         if (tr->current_trace->use_max_tr) {
6940                 ret = -EBUSY;
6941                 goto out;
6942         }
6943
6944         arch_spin_lock(&tr->max_lock);
6945         if (tr->cond_snapshot)
6946                 ret = -EBUSY;
6947         arch_spin_unlock(&tr->max_lock);
6948         if (ret)
6949                 goto out;
6950
6951         switch (val) {
6952         case 0:
6953                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6954                         ret = -EINVAL;
6955                         break;
6956                 }
6957                 if (tr->allocated_snapshot)
6958                         free_snapshot(tr);
6959                 break;
6960         case 1:
6961 /* Only allow per-cpu swap if the ring buffer supports it */
6962 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6963                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6964                         ret = -EINVAL;
6965                         break;
6966                 }
6967 #endif
6968                 if (tr->allocated_snapshot)
6969                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6970                                         &tr->array_buffer, iter->cpu_file);
6971                 else
6972                         ret = tracing_alloc_snapshot_instance(tr);
6973                 if (ret < 0)
6974                         break;
6975                 local_irq_disable();
6976                 /* Now, we're going to swap */
6977                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6978                         update_max_tr(tr, current, smp_processor_id(), NULL);
6979                 else
6980                         update_max_tr_single(tr, current, iter->cpu_file);
6981                 local_irq_enable();
6982                 break;
6983         default:
6984                 if (tr->allocated_snapshot) {
6985                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6986                                 tracing_reset_online_cpus(&tr->max_buffer);
6987                         else
6988                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6989                 }
6990                 break;
6991         }
6992
6993         if (ret >= 0) {
6994                 *ppos += cnt;
6995                 ret = cnt;
6996         }
6997 out:
6998         mutex_unlock(&trace_types_lock);
6999         return ret;
7000 }
7001
7002 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7003 {
7004         struct seq_file *m = file->private_data;
7005         int ret;
7006
7007         ret = tracing_release(inode, file);
7008
7009         if (file->f_mode & FMODE_READ)
7010                 return ret;
7011
7012         /* If write only, the seq_file is just a stub */
7013         if (m)
7014                 kfree(m->private);
7015         kfree(m);
7016
7017         return 0;
7018 }
7019
7020 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7021 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7022                                     size_t count, loff_t *ppos);
7023 static int tracing_buffers_release(struct inode *inode, struct file *file);
7024 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7025                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7026
7027 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7028 {
7029         struct ftrace_buffer_info *info;
7030         int ret;
7031
7032         /* The following checks for tracefs lockdown */
7033         ret = tracing_buffers_open(inode, filp);
7034         if (ret < 0)
7035                 return ret;
7036
7037         info = filp->private_data;
7038
7039         if (info->iter.trace->use_max_tr) {
7040                 tracing_buffers_release(inode, filp);
7041                 return -EBUSY;
7042         }
7043
7044         info->iter.snapshot = true;
7045         info->iter.array_buffer = &info->iter.tr->max_buffer;
7046
7047         return ret;
7048 }
7049
7050 #endif /* CONFIG_TRACER_SNAPSHOT */
7051
7052
7053 static const struct file_operations tracing_thresh_fops = {
7054         .open           = tracing_open_generic,
7055         .read           = tracing_thresh_read,
7056         .write          = tracing_thresh_write,
7057         .llseek         = generic_file_llseek,
7058 };
7059
7060 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7061 static const struct file_operations tracing_max_lat_fops = {
7062         .open           = tracing_open_generic,
7063         .read           = tracing_max_lat_read,
7064         .write          = tracing_max_lat_write,
7065         .llseek         = generic_file_llseek,
7066 };
7067 #endif
7068
7069 static const struct file_operations set_tracer_fops = {
7070         .open           = tracing_open_generic,
7071         .read           = tracing_set_trace_read,
7072         .write          = tracing_set_trace_write,
7073         .llseek         = generic_file_llseek,
7074 };
7075
7076 static const struct file_operations tracing_pipe_fops = {
7077         .open           = tracing_open_pipe,
7078         .poll           = tracing_poll_pipe,
7079         .read           = tracing_read_pipe,
7080         .splice_read    = tracing_splice_read_pipe,
7081         .release        = tracing_release_pipe,
7082         .llseek         = no_llseek,
7083 };
7084
7085 static const struct file_operations tracing_entries_fops = {
7086         .open           = tracing_open_generic_tr,
7087         .read           = tracing_entries_read,
7088         .write          = tracing_entries_write,
7089         .llseek         = generic_file_llseek,
7090         .release        = tracing_release_generic_tr,
7091 };
7092
7093 static const struct file_operations tracing_total_entries_fops = {
7094         .open           = tracing_open_generic_tr,
7095         .read           = tracing_total_entries_read,
7096         .llseek         = generic_file_llseek,
7097         .release        = tracing_release_generic_tr,
7098 };
7099
7100 static const struct file_operations tracing_free_buffer_fops = {
7101         .open           = tracing_open_generic_tr,
7102         .write          = tracing_free_buffer_write,
7103         .release        = tracing_free_buffer_release,
7104 };
7105
7106 static const struct file_operations tracing_mark_fops = {
7107         .open           = tracing_open_generic_tr,
7108         .write          = tracing_mark_write,
7109         .llseek         = generic_file_llseek,
7110         .release        = tracing_release_generic_tr,
7111 };
7112
7113 static const struct file_operations tracing_mark_raw_fops = {
7114         .open           = tracing_open_generic_tr,
7115         .write          = tracing_mark_raw_write,
7116         .llseek         = generic_file_llseek,
7117         .release        = tracing_release_generic_tr,
7118 };
7119
7120 static const struct file_operations trace_clock_fops = {
7121         .open           = tracing_clock_open,
7122         .read           = seq_read,
7123         .llseek         = seq_lseek,
7124         .release        = tracing_single_release_tr,
7125         .write          = tracing_clock_write,
7126 };
7127
7128 static const struct file_operations trace_time_stamp_mode_fops = {
7129         .open           = tracing_time_stamp_mode_open,
7130         .read           = seq_read,
7131         .llseek         = seq_lseek,
7132         .release        = tracing_single_release_tr,
7133 };
7134
7135 #ifdef CONFIG_TRACER_SNAPSHOT
7136 static const struct file_operations snapshot_fops = {
7137         .open           = tracing_snapshot_open,
7138         .read           = seq_read,
7139         .write          = tracing_snapshot_write,
7140         .llseek         = tracing_lseek,
7141         .release        = tracing_snapshot_release,
7142 };
7143
7144 static const struct file_operations snapshot_raw_fops = {
7145         .open           = snapshot_raw_open,
7146         .read           = tracing_buffers_read,
7147         .release        = tracing_buffers_release,
7148         .splice_read    = tracing_buffers_splice_read,
7149         .llseek         = no_llseek,
7150 };
7151
7152 #endif /* CONFIG_TRACER_SNAPSHOT */
7153
7154 #define TRACING_LOG_ERRS_MAX    8
7155 #define TRACING_LOG_LOC_MAX     128
7156
7157 #define CMD_PREFIX "  Command: "
7158
7159 struct err_info {
7160         const char      **errs; /* ptr to loc-specific array of err strings */
7161         u8              type;   /* index into errs -> specific err string */
7162         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7163         u64             ts;
7164 };
7165
7166 struct tracing_log_err {
7167         struct list_head        list;
7168         struct err_info         info;
7169         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7170         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7171 };
7172
7173 static DEFINE_MUTEX(tracing_err_log_lock);
7174
7175 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7176 {
7177         struct tracing_log_err *err;
7178
7179         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7180                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7181                 if (!err)
7182                         err = ERR_PTR(-ENOMEM);
7183                 tr->n_err_log_entries++;
7184
7185                 return err;
7186         }
7187
7188         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7189         list_del(&err->list);
7190
7191         return err;
7192 }
7193
7194 /**
7195  * err_pos - find the position of a string within a command for error careting
7196  * @cmd: The tracing command that caused the error
7197  * @str: The string to position the caret at within @cmd
7198  *
7199  * Finds the position of the first occurence of @str within @cmd.  The
7200  * return value can be passed to tracing_log_err() for caret placement
7201  * within @cmd.
7202  *
7203  * Returns the index within @cmd of the first occurence of @str or 0
7204  * if @str was not found.
7205  */
7206 unsigned int err_pos(char *cmd, const char *str)
7207 {
7208         char *found;
7209
7210         if (WARN_ON(!strlen(cmd)))
7211                 return 0;
7212
7213         found = strstr(cmd, str);
7214         if (found)
7215                 return found - cmd;
7216
7217         return 0;
7218 }
7219
7220 /**
7221  * tracing_log_err - write an error to the tracing error log
7222  * @tr: The associated trace array for the error (NULL for top level array)
7223  * @loc: A string describing where the error occurred
7224  * @cmd: The tracing command that caused the error
7225  * @errs: The array of loc-specific static error strings
7226  * @type: The index into errs[], which produces the specific static err string
7227  * @pos: The position the caret should be placed in the cmd
7228  *
7229  * Writes an error into tracing/error_log of the form:
7230  *
7231  * <loc>: error: <text>
7232  *   Command: <cmd>
7233  *              ^
7234  *
7235  * tracing/error_log is a small log file containing the last
7236  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7237  * unless there has been a tracing error, and the error log can be
7238  * cleared and have its memory freed by writing the empty string in
7239  * truncation mode to it i.e. echo > tracing/error_log.
7240  *
7241  * NOTE: the @errs array along with the @type param are used to
7242  * produce a static error string - this string is not copied and saved
7243  * when the error is logged - only a pointer to it is saved.  See
7244  * existing callers for examples of how static strings are typically
7245  * defined for use with tracing_log_err().
7246  */
7247 void tracing_log_err(struct trace_array *tr,
7248                      const char *loc, const char *cmd,
7249                      const char **errs, u8 type, u8 pos)
7250 {
7251         struct tracing_log_err *err;
7252
7253         if (!tr)
7254                 tr = &global_trace;
7255
7256         mutex_lock(&tracing_err_log_lock);
7257         err = get_tracing_log_err(tr);
7258         if (PTR_ERR(err) == -ENOMEM) {
7259                 mutex_unlock(&tracing_err_log_lock);
7260                 return;
7261         }
7262
7263         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7264         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7265
7266         err->info.errs = errs;
7267         err->info.type = type;
7268         err->info.pos = pos;
7269         err->info.ts = local_clock();
7270
7271         list_add_tail(&err->list, &tr->err_log);
7272         mutex_unlock(&tracing_err_log_lock);
7273 }
7274
7275 static void clear_tracing_err_log(struct trace_array *tr)
7276 {
7277         struct tracing_log_err *err, *next;
7278
7279         mutex_lock(&tracing_err_log_lock);
7280         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7281                 list_del(&err->list);
7282                 kfree(err);
7283         }
7284
7285         tr->n_err_log_entries = 0;
7286         mutex_unlock(&tracing_err_log_lock);
7287 }
7288
7289 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7290 {
7291         struct trace_array *tr = m->private;
7292
7293         mutex_lock(&tracing_err_log_lock);
7294
7295         return seq_list_start(&tr->err_log, *pos);
7296 }
7297
7298 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7299 {
7300         struct trace_array *tr = m->private;
7301
7302         return seq_list_next(v, &tr->err_log, pos);
7303 }
7304
7305 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7306 {
7307         mutex_unlock(&tracing_err_log_lock);
7308 }
7309
7310 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7311 {
7312         u8 i;
7313
7314         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7315                 seq_putc(m, ' ');
7316         for (i = 0; i < pos; i++)
7317                 seq_putc(m, ' ');
7318         seq_puts(m, "^\n");
7319 }
7320
7321 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7322 {
7323         struct tracing_log_err *err = v;
7324
7325         if (err) {
7326                 const char *err_text = err->info.errs[err->info.type];
7327                 u64 sec = err->info.ts;
7328                 u32 nsec;
7329
7330                 nsec = do_div(sec, NSEC_PER_SEC);
7331                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7332                            err->loc, err_text);
7333                 seq_printf(m, "%s", err->cmd);
7334                 tracing_err_log_show_pos(m, err->info.pos);
7335         }
7336
7337         return 0;
7338 }
7339
7340 static const struct seq_operations tracing_err_log_seq_ops = {
7341         .start  = tracing_err_log_seq_start,
7342         .next   = tracing_err_log_seq_next,
7343         .stop   = tracing_err_log_seq_stop,
7344         .show   = tracing_err_log_seq_show
7345 };
7346
7347 static int tracing_err_log_open(struct inode *inode, struct file *file)
7348 {
7349         struct trace_array *tr = inode->i_private;
7350         int ret = 0;
7351
7352         ret = tracing_check_open_get_tr(tr);
7353         if (ret)
7354                 return ret;
7355
7356         /* If this file was opened for write, then erase contents */
7357         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7358                 clear_tracing_err_log(tr);
7359
7360         if (file->f_mode & FMODE_READ) {
7361                 ret = seq_open(file, &tracing_err_log_seq_ops);
7362                 if (!ret) {
7363                         struct seq_file *m = file->private_data;
7364                         m->private = tr;
7365                 } else {
7366                         trace_array_put(tr);
7367                 }
7368         }
7369         return ret;
7370 }
7371
7372 static ssize_t tracing_err_log_write(struct file *file,
7373                                      const char __user *buffer,
7374                                      size_t count, loff_t *ppos)
7375 {
7376         return count;
7377 }
7378
7379 static int tracing_err_log_release(struct inode *inode, struct file *file)
7380 {
7381         struct trace_array *tr = inode->i_private;
7382
7383         trace_array_put(tr);
7384
7385         if (file->f_mode & FMODE_READ)
7386                 seq_release(inode, file);
7387
7388         return 0;
7389 }
7390
7391 static const struct file_operations tracing_err_log_fops = {
7392         .open           = tracing_err_log_open,
7393         .write          = tracing_err_log_write,
7394         .read           = seq_read,
7395         .llseek         = seq_lseek,
7396         .release        = tracing_err_log_release,
7397 };
7398
7399 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7400 {
7401         struct trace_array *tr = inode->i_private;
7402         struct ftrace_buffer_info *info;
7403         int ret;
7404
7405         ret = tracing_check_open_get_tr(tr);
7406         if (ret)
7407                 return ret;
7408
7409         info = kzalloc(sizeof(*info), GFP_KERNEL);
7410         if (!info) {
7411                 trace_array_put(tr);
7412                 return -ENOMEM;
7413         }
7414
7415         mutex_lock(&trace_types_lock);
7416
7417         info->iter.tr           = tr;
7418         info->iter.cpu_file     = tracing_get_cpu(inode);
7419         info->iter.trace        = tr->current_trace;
7420         info->iter.array_buffer = &tr->array_buffer;
7421         info->spare             = NULL;
7422         /* Force reading ring buffer for first read */
7423         info->read              = (unsigned int)-1;
7424
7425         filp->private_data = info;
7426
7427         tr->current_trace->ref++;
7428
7429         mutex_unlock(&trace_types_lock);
7430
7431         ret = nonseekable_open(inode, filp);
7432         if (ret < 0)
7433                 trace_array_put(tr);
7434
7435         return ret;
7436 }
7437
7438 static __poll_t
7439 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7440 {
7441         struct ftrace_buffer_info *info = filp->private_data;
7442         struct trace_iterator *iter = &info->iter;
7443
7444         return trace_poll(iter, filp, poll_table);
7445 }
7446
7447 static ssize_t
7448 tracing_buffers_read(struct file *filp, char __user *ubuf,
7449                      size_t count, loff_t *ppos)
7450 {
7451         struct ftrace_buffer_info *info = filp->private_data;
7452         struct trace_iterator *iter = &info->iter;
7453         ssize_t ret = 0;
7454         ssize_t size;
7455
7456         if (!count)
7457                 return 0;
7458
7459 #ifdef CONFIG_TRACER_MAX_TRACE
7460         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7461                 return -EBUSY;
7462 #endif
7463
7464         if (!info->spare) {
7465                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7466                                                           iter->cpu_file);
7467                 if (IS_ERR(info->spare)) {
7468                         ret = PTR_ERR(info->spare);
7469                         info->spare = NULL;
7470                 } else {
7471                         info->spare_cpu = iter->cpu_file;
7472                 }
7473         }
7474         if (!info->spare)
7475                 return ret;
7476
7477         /* Do we have previous read data to read? */
7478         if (info->read < PAGE_SIZE)
7479                 goto read;
7480
7481  again:
7482         trace_access_lock(iter->cpu_file);
7483         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7484                                     &info->spare,
7485                                     count,
7486                                     iter->cpu_file, 0);
7487         trace_access_unlock(iter->cpu_file);
7488
7489         if (ret < 0) {
7490                 if (trace_empty(iter)) {
7491                         if ((filp->f_flags & O_NONBLOCK))
7492                                 return -EAGAIN;
7493
7494                         ret = wait_on_pipe(iter, 0);
7495                         if (ret)
7496                                 return ret;
7497
7498                         goto again;
7499                 }
7500                 return 0;
7501         }
7502
7503         info->read = 0;
7504  read:
7505         size = PAGE_SIZE - info->read;
7506         if (size > count)
7507                 size = count;
7508
7509         ret = copy_to_user(ubuf, info->spare + info->read, size);
7510         if (ret == size)
7511                 return -EFAULT;
7512
7513         size -= ret;
7514
7515         *ppos += size;
7516         info->read += size;
7517
7518         return size;
7519 }
7520
7521 static int tracing_buffers_release(struct inode *inode, struct file *file)
7522 {
7523         struct ftrace_buffer_info *info = file->private_data;
7524         struct trace_iterator *iter = &info->iter;
7525
7526         mutex_lock(&trace_types_lock);
7527
7528         iter->tr->current_trace->ref--;
7529
7530         __trace_array_put(iter->tr);
7531
7532         if (info->spare)
7533                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7534                                            info->spare_cpu, info->spare);
7535         kfree(info);
7536
7537         mutex_unlock(&trace_types_lock);
7538
7539         return 0;
7540 }
7541
7542 struct buffer_ref {
7543         struct trace_buffer     *buffer;
7544         void                    *page;
7545         int                     cpu;
7546         refcount_t              refcount;
7547 };
7548
7549 static void buffer_ref_release(struct buffer_ref *ref)
7550 {
7551         if (!refcount_dec_and_test(&ref->refcount))
7552                 return;
7553         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7554         kfree(ref);
7555 }
7556
7557 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7558                                     struct pipe_buffer *buf)
7559 {
7560         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7561
7562         buffer_ref_release(ref);
7563         buf->private = 0;
7564 }
7565
7566 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7567                                 struct pipe_buffer *buf)
7568 {
7569         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7570
7571         if (refcount_read(&ref->refcount) > INT_MAX/2)
7572                 return false;
7573
7574         refcount_inc(&ref->refcount);
7575         return true;
7576 }
7577
7578 /* Pipe buffer operations for a buffer. */
7579 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7580         .release                = buffer_pipe_buf_release,
7581         .get                    = buffer_pipe_buf_get,
7582 };
7583
7584 /*
7585  * Callback from splice_to_pipe(), if we need to release some pages
7586  * at the end of the spd in case we error'ed out in filling the pipe.
7587  */
7588 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7589 {
7590         struct buffer_ref *ref =
7591                 (struct buffer_ref *)spd->partial[i].private;
7592
7593         buffer_ref_release(ref);
7594         spd->partial[i].private = 0;
7595 }
7596
7597 static ssize_t
7598 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7599                             struct pipe_inode_info *pipe, size_t len,
7600                             unsigned int flags)
7601 {
7602         struct ftrace_buffer_info *info = file->private_data;
7603         struct trace_iterator *iter = &info->iter;
7604         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7605         struct page *pages_def[PIPE_DEF_BUFFERS];
7606         struct splice_pipe_desc spd = {
7607                 .pages          = pages_def,
7608                 .partial        = partial_def,
7609                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7610                 .ops            = &buffer_pipe_buf_ops,
7611                 .spd_release    = buffer_spd_release,
7612         };
7613         struct buffer_ref *ref;
7614         int entries, i;
7615         ssize_t ret = 0;
7616
7617 #ifdef CONFIG_TRACER_MAX_TRACE
7618         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7619                 return -EBUSY;
7620 #endif
7621
7622         if (*ppos & (PAGE_SIZE - 1))
7623                 return -EINVAL;
7624
7625         if (len & (PAGE_SIZE - 1)) {
7626                 if (len < PAGE_SIZE)
7627                         return -EINVAL;
7628                 len &= PAGE_MASK;
7629         }
7630
7631         if (splice_grow_spd(pipe, &spd))
7632                 return -ENOMEM;
7633
7634  again:
7635         trace_access_lock(iter->cpu_file);
7636         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7637
7638         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7639                 struct page *page;
7640                 int r;
7641
7642                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7643                 if (!ref) {
7644                         ret = -ENOMEM;
7645                         break;
7646                 }
7647
7648                 refcount_set(&ref->refcount, 1);
7649                 ref->buffer = iter->array_buffer->buffer;
7650                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7651                 if (IS_ERR(ref->page)) {
7652                         ret = PTR_ERR(ref->page);
7653                         ref->page = NULL;
7654                         kfree(ref);
7655                         break;
7656                 }
7657                 ref->cpu = iter->cpu_file;
7658
7659                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7660                                           len, iter->cpu_file, 1);
7661                 if (r < 0) {
7662                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7663                                                    ref->page);
7664                         kfree(ref);
7665                         break;
7666                 }
7667
7668                 page = virt_to_page(ref->page);
7669
7670                 spd.pages[i] = page;
7671                 spd.partial[i].len = PAGE_SIZE;
7672                 spd.partial[i].offset = 0;
7673                 spd.partial[i].private = (unsigned long)ref;
7674                 spd.nr_pages++;
7675                 *ppos += PAGE_SIZE;
7676
7677                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7678         }
7679
7680         trace_access_unlock(iter->cpu_file);
7681         spd.nr_pages = i;
7682
7683         /* did we read anything? */
7684         if (!spd.nr_pages) {
7685                 if (ret)
7686                         goto out;
7687
7688                 ret = -EAGAIN;
7689                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7690                         goto out;
7691
7692                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7693                 if (ret)
7694                         goto out;
7695
7696                 goto again;
7697         }
7698
7699         ret = splice_to_pipe(pipe, &spd);
7700 out:
7701         splice_shrink_spd(&spd);
7702
7703         return ret;
7704 }
7705
7706 static const struct file_operations tracing_buffers_fops = {
7707         .open           = tracing_buffers_open,
7708         .read           = tracing_buffers_read,
7709         .poll           = tracing_buffers_poll,
7710         .release        = tracing_buffers_release,
7711         .splice_read    = tracing_buffers_splice_read,
7712         .llseek         = no_llseek,
7713 };
7714
7715 static ssize_t
7716 tracing_stats_read(struct file *filp, char __user *ubuf,
7717                    size_t count, loff_t *ppos)
7718 {
7719         struct inode *inode = file_inode(filp);
7720         struct trace_array *tr = inode->i_private;
7721         struct array_buffer *trace_buf = &tr->array_buffer;
7722         int cpu = tracing_get_cpu(inode);
7723         struct trace_seq *s;
7724         unsigned long cnt;
7725         unsigned long long t;
7726         unsigned long usec_rem;
7727
7728         s = kmalloc(sizeof(*s), GFP_KERNEL);
7729         if (!s)
7730                 return -ENOMEM;
7731
7732         trace_seq_init(s);
7733
7734         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7735         trace_seq_printf(s, "entries: %ld\n", cnt);
7736
7737         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7738         trace_seq_printf(s, "overrun: %ld\n", cnt);
7739
7740         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7741         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7742
7743         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7744         trace_seq_printf(s, "bytes: %ld\n", cnt);
7745
7746         if (trace_clocks[tr->clock_id].in_ns) {
7747                 /* local or global for trace_clock */
7748                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7749                 usec_rem = do_div(t, USEC_PER_SEC);
7750                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7751                                                                 t, usec_rem);
7752
7753                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7754                 usec_rem = do_div(t, USEC_PER_SEC);
7755                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7756         } else {
7757                 /* counter or tsc mode for trace_clock */
7758                 trace_seq_printf(s, "oldest event ts: %llu\n",
7759                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7760
7761                 trace_seq_printf(s, "now ts: %llu\n",
7762                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7763         }
7764
7765         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7766         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7767
7768         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7769         trace_seq_printf(s, "read events: %ld\n", cnt);
7770
7771         count = simple_read_from_buffer(ubuf, count, ppos,
7772                                         s->buffer, trace_seq_used(s));
7773
7774         kfree(s);
7775
7776         return count;
7777 }
7778
7779 static const struct file_operations tracing_stats_fops = {
7780         .open           = tracing_open_generic_tr,
7781         .read           = tracing_stats_read,
7782         .llseek         = generic_file_llseek,
7783         .release        = tracing_release_generic_tr,
7784 };
7785
7786 #ifdef CONFIG_DYNAMIC_FTRACE
7787
7788 static ssize_t
7789 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7790                   size_t cnt, loff_t *ppos)
7791 {
7792         ssize_t ret;
7793         char *buf;
7794         int r;
7795
7796         /* 256 should be plenty to hold the amount needed */
7797         buf = kmalloc(256, GFP_KERNEL);
7798         if (!buf)
7799                 return -ENOMEM;
7800
7801         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7802                       ftrace_update_tot_cnt,
7803                       ftrace_number_of_pages,
7804                       ftrace_number_of_groups);
7805
7806         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7807         kfree(buf);
7808         return ret;
7809 }
7810
7811 static const struct file_operations tracing_dyn_info_fops = {
7812         .open           = tracing_open_generic,
7813         .read           = tracing_read_dyn_info,
7814         .llseek         = generic_file_llseek,
7815 };
7816 #endif /* CONFIG_DYNAMIC_FTRACE */
7817
7818 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7819 static void
7820 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7821                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7822                 void *data)
7823 {
7824         tracing_snapshot_instance(tr);
7825 }
7826
7827 static void
7828 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7829                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7830                       void *data)
7831 {
7832         struct ftrace_func_mapper *mapper = data;
7833         long *count = NULL;
7834
7835         if (mapper)
7836                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7837
7838         if (count) {
7839
7840                 if (*count <= 0)
7841                         return;
7842
7843                 (*count)--;
7844         }
7845
7846         tracing_snapshot_instance(tr);
7847 }
7848
7849 static int
7850 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7851                       struct ftrace_probe_ops *ops, void *data)
7852 {
7853         struct ftrace_func_mapper *mapper = data;
7854         long *count = NULL;
7855
7856         seq_printf(m, "%ps:", (void *)ip);
7857
7858         seq_puts(m, "snapshot");
7859
7860         if (mapper)
7861                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7862
7863         if (count)
7864                 seq_printf(m, ":count=%ld\n", *count);
7865         else
7866                 seq_puts(m, ":unlimited\n");
7867
7868         return 0;
7869 }
7870
7871 static int
7872 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7873                      unsigned long ip, void *init_data, void **data)
7874 {
7875         struct ftrace_func_mapper *mapper = *data;
7876
7877         if (!mapper) {
7878                 mapper = allocate_ftrace_func_mapper();
7879                 if (!mapper)
7880                         return -ENOMEM;
7881                 *data = mapper;
7882         }
7883
7884         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7885 }
7886
7887 static void
7888 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7889                      unsigned long ip, void *data)
7890 {
7891         struct ftrace_func_mapper *mapper = data;
7892
7893         if (!ip) {
7894                 if (!mapper)
7895                         return;
7896                 free_ftrace_func_mapper(mapper, NULL);
7897                 return;
7898         }
7899
7900         ftrace_func_mapper_remove_ip(mapper, ip);
7901 }
7902
7903 static struct ftrace_probe_ops snapshot_probe_ops = {
7904         .func                   = ftrace_snapshot,
7905         .print                  = ftrace_snapshot_print,
7906 };
7907
7908 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7909         .func                   = ftrace_count_snapshot,
7910         .print                  = ftrace_snapshot_print,
7911         .init                   = ftrace_snapshot_init,
7912         .free                   = ftrace_snapshot_free,
7913 };
7914
7915 static int
7916 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7917                                char *glob, char *cmd, char *param, int enable)
7918 {
7919         struct ftrace_probe_ops *ops;
7920         void *count = (void *)-1;
7921         char *number;
7922         int ret;
7923
7924         if (!tr)
7925                 return -ENODEV;
7926
7927         /* hash funcs only work with set_ftrace_filter */
7928         if (!enable)
7929                 return -EINVAL;
7930
7931         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7932
7933         if (glob[0] == '!')
7934                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7935
7936         if (!param)
7937                 goto out_reg;
7938
7939         number = strsep(&param, ":");
7940
7941         if (!strlen(number))
7942                 goto out_reg;
7943
7944         /*
7945          * We use the callback data field (which is a pointer)
7946          * as our counter.
7947          */
7948         ret = kstrtoul(number, 0, (unsigned long *)&count);
7949         if (ret)
7950                 return ret;
7951
7952  out_reg:
7953         ret = tracing_alloc_snapshot_instance(tr);
7954         if (ret < 0)
7955                 goto out;
7956
7957         ret = register_ftrace_function_probe(glob, tr, ops, count);
7958
7959  out:
7960         return ret < 0 ? ret : 0;
7961 }
7962
7963 static struct ftrace_func_command ftrace_snapshot_cmd = {
7964         .name                   = "snapshot",
7965         .func                   = ftrace_trace_snapshot_callback,
7966 };
7967
7968 static __init int register_snapshot_cmd(void)
7969 {
7970         return register_ftrace_command(&ftrace_snapshot_cmd);
7971 }
7972 #else
7973 static inline __init int register_snapshot_cmd(void) { return 0; }
7974 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7975
7976 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7977 {
7978         if (WARN_ON(!tr->dir))
7979                 return ERR_PTR(-ENODEV);
7980
7981         /* Top directory uses NULL as the parent */
7982         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7983                 return NULL;
7984
7985         /* All sub buffers have a descriptor */
7986         return tr->dir;
7987 }
7988
7989 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7990 {
7991         struct dentry *d_tracer;
7992
7993         if (tr->percpu_dir)
7994                 return tr->percpu_dir;
7995
7996         d_tracer = tracing_get_dentry(tr);
7997         if (IS_ERR(d_tracer))
7998                 return NULL;
7999
8000         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8001
8002         MEM_FAIL(!tr->percpu_dir,
8003                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8004
8005         return tr->percpu_dir;
8006 }
8007
8008 static struct dentry *
8009 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8010                       void *data, long cpu, const struct file_operations *fops)
8011 {
8012         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8013
8014         if (ret) /* See tracing_get_cpu() */
8015                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8016         return ret;
8017 }
8018
8019 static void
8020 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8021 {
8022         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8023         struct dentry *d_cpu;
8024         char cpu_dir[30]; /* 30 characters should be more than enough */
8025
8026         if (!d_percpu)
8027                 return;
8028
8029         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8030         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8031         if (!d_cpu) {
8032                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8033                 return;
8034         }
8035
8036         /* per cpu trace_pipe */
8037         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8038                                 tr, cpu, &tracing_pipe_fops);
8039
8040         /* per cpu trace */
8041         trace_create_cpu_file("trace", 0644, d_cpu,
8042                                 tr, cpu, &tracing_fops);
8043
8044         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8045                                 tr, cpu, &tracing_buffers_fops);
8046
8047         trace_create_cpu_file("stats", 0444, d_cpu,
8048                                 tr, cpu, &tracing_stats_fops);
8049
8050         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8051                                 tr, cpu, &tracing_entries_fops);
8052
8053 #ifdef CONFIG_TRACER_SNAPSHOT
8054         trace_create_cpu_file("snapshot", 0644, d_cpu,
8055                                 tr, cpu, &snapshot_fops);
8056
8057         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8058                                 tr, cpu, &snapshot_raw_fops);
8059 #endif
8060 }
8061
8062 #ifdef CONFIG_FTRACE_SELFTEST
8063 /* Let selftest have access to static functions in this file */
8064 #include "trace_selftest.c"
8065 #endif
8066
8067 static ssize_t
8068 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8069                         loff_t *ppos)
8070 {
8071         struct trace_option_dentry *topt = filp->private_data;
8072         char *buf;
8073
8074         if (topt->flags->val & topt->opt->bit)
8075                 buf = "1\n";
8076         else
8077                 buf = "0\n";
8078
8079         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8080 }
8081
8082 static ssize_t
8083 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8084                          loff_t *ppos)
8085 {
8086         struct trace_option_dentry *topt = filp->private_data;
8087         unsigned long val;
8088         int ret;
8089
8090         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8091         if (ret)
8092                 return ret;
8093
8094         if (val != 0 && val != 1)
8095                 return -EINVAL;
8096
8097         if (!!(topt->flags->val & topt->opt->bit) != val) {
8098                 mutex_lock(&trace_types_lock);
8099                 ret = __set_tracer_option(topt->tr, topt->flags,
8100                                           topt->opt, !val);
8101                 mutex_unlock(&trace_types_lock);
8102                 if (ret)
8103                         return ret;
8104         }
8105
8106         *ppos += cnt;
8107
8108         return cnt;
8109 }
8110
8111
8112 static const struct file_operations trace_options_fops = {
8113         .open = tracing_open_generic,
8114         .read = trace_options_read,
8115         .write = trace_options_write,
8116         .llseek = generic_file_llseek,
8117 };
8118
8119 /*
8120  * In order to pass in both the trace_array descriptor as well as the index
8121  * to the flag that the trace option file represents, the trace_array
8122  * has a character array of trace_flags_index[], which holds the index
8123  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8124  * The address of this character array is passed to the flag option file
8125  * read/write callbacks.
8126  *
8127  * In order to extract both the index and the trace_array descriptor,
8128  * get_tr_index() uses the following algorithm.
8129  *
8130  *   idx = *ptr;
8131  *
8132  * As the pointer itself contains the address of the index (remember
8133  * index[1] == 1).
8134  *
8135  * Then to get the trace_array descriptor, by subtracting that index
8136  * from the ptr, we get to the start of the index itself.
8137  *
8138  *   ptr - idx == &index[0]
8139  *
8140  * Then a simple container_of() from that pointer gets us to the
8141  * trace_array descriptor.
8142  */
8143 static void get_tr_index(void *data, struct trace_array **ptr,
8144                          unsigned int *pindex)
8145 {
8146         *pindex = *(unsigned char *)data;
8147
8148         *ptr = container_of(data - *pindex, struct trace_array,
8149                             trace_flags_index);
8150 }
8151
8152 static ssize_t
8153 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8154                         loff_t *ppos)
8155 {
8156         void *tr_index = filp->private_data;
8157         struct trace_array *tr;
8158         unsigned int index;
8159         char *buf;
8160
8161         get_tr_index(tr_index, &tr, &index);
8162
8163         if (tr->trace_flags & (1 << index))
8164                 buf = "1\n";
8165         else
8166                 buf = "0\n";
8167
8168         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8169 }
8170
8171 static ssize_t
8172 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8173                          loff_t *ppos)
8174 {
8175         void *tr_index = filp->private_data;
8176         struct trace_array *tr;
8177         unsigned int index;
8178         unsigned long val;
8179         int ret;
8180
8181         get_tr_index(tr_index, &tr, &index);
8182
8183         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8184         if (ret)
8185                 return ret;
8186
8187         if (val != 0 && val != 1)
8188                 return -EINVAL;
8189
8190         mutex_lock(&event_mutex);
8191         mutex_lock(&trace_types_lock);
8192         ret = set_tracer_flag(tr, 1 << index, val);
8193         mutex_unlock(&trace_types_lock);
8194         mutex_unlock(&event_mutex);
8195
8196         if (ret < 0)
8197                 return ret;
8198
8199         *ppos += cnt;
8200
8201         return cnt;
8202 }
8203
8204 static const struct file_operations trace_options_core_fops = {
8205         .open = tracing_open_generic,
8206         .read = trace_options_core_read,
8207         .write = trace_options_core_write,
8208         .llseek = generic_file_llseek,
8209 };
8210
8211 struct dentry *trace_create_file(const char *name,
8212                                  umode_t mode,
8213                                  struct dentry *parent,
8214                                  void *data,
8215                                  const struct file_operations *fops)
8216 {
8217         struct dentry *ret;
8218
8219         ret = tracefs_create_file(name, mode, parent, data, fops);
8220         if (!ret)
8221                 pr_warn("Could not create tracefs '%s' entry\n", name);
8222
8223         return ret;
8224 }
8225
8226
8227 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8228 {
8229         struct dentry *d_tracer;
8230
8231         if (tr->options)
8232                 return tr->options;
8233
8234         d_tracer = tracing_get_dentry(tr);
8235         if (IS_ERR(d_tracer))
8236                 return NULL;
8237
8238         tr->options = tracefs_create_dir("options", d_tracer);
8239         if (!tr->options) {
8240                 pr_warn("Could not create tracefs directory 'options'\n");
8241                 return NULL;
8242         }
8243
8244         return tr->options;
8245 }
8246
8247 static void
8248 create_trace_option_file(struct trace_array *tr,
8249                          struct trace_option_dentry *topt,
8250                          struct tracer_flags *flags,
8251                          struct tracer_opt *opt)
8252 {
8253         struct dentry *t_options;
8254
8255         t_options = trace_options_init_dentry(tr);
8256         if (!t_options)
8257                 return;
8258
8259         topt->flags = flags;
8260         topt->opt = opt;
8261         topt->tr = tr;
8262
8263         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8264                                     &trace_options_fops);
8265
8266 }
8267
8268 static void
8269 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8270 {
8271         struct trace_option_dentry *topts;
8272         struct trace_options *tr_topts;
8273         struct tracer_flags *flags;
8274         struct tracer_opt *opts;
8275         int cnt;
8276         int i;
8277
8278         if (!tracer)
8279                 return;
8280
8281         flags = tracer->flags;
8282
8283         if (!flags || !flags->opts)
8284                 return;
8285
8286         /*
8287          * If this is an instance, only create flags for tracers
8288          * the instance may have.
8289          */
8290         if (!trace_ok_for_array(tracer, tr))
8291                 return;
8292
8293         for (i = 0; i < tr->nr_topts; i++) {
8294                 /* Make sure there's no duplicate flags. */
8295                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8296                         return;
8297         }
8298
8299         opts = flags->opts;
8300
8301         for (cnt = 0; opts[cnt].name; cnt++)
8302                 ;
8303
8304         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8305         if (!topts)
8306                 return;
8307
8308         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8309                             GFP_KERNEL);
8310         if (!tr_topts) {
8311                 kfree(topts);
8312                 return;
8313         }
8314
8315         tr->topts = tr_topts;
8316         tr->topts[tr->nr_topts].tracer = tracer;
8317         tr->topts[tr->nr_topts].topts = topts;
8318         tr->nr_topts++;
8319
8320         for (cnt = 0; opts[cnt].name; cnt++) {
8321                 create_trace_option_file(tr, &topts[cnt], flags,
8322                                          &opts[cnt]);
8323                 MEM_FAIL(topts[cnt].entry == NULL,
8324                           "Failed to create trace option: %s",
8325                           opts[cnt].name);
8326         }
8327 }
8328
8329 static struct dentry *
8330 create_trace_option_core_file(struct trace_array *tr,
8331                               const char *option, long index)
8332 {
8333         struct dentry *t_options;
8334
8335         t_options = trace_options_init_dentry(tr);
8336         if (!t_options)
8337                 return NULL;
8338
8339         return trace_create_file(option, 0644, t_options,
8340                                  (void *)&tr->trace_flags_index[index],
8341                                  &trace_options_core_fops);
8342 }
8343
8344 static void create_trace_options_dir(struct trace_array *tr)
8345 {
8346         struct dentry *t_options;
8347         bool top_level = tr == &global_trace;
8348         int i;
8349
8350         t_options = trace_options_init_dentry(tr);
8351         if (!t_options)
8352                 return;
8353
8354         for (i = 0; trace_options[i]; i++) {
8355                 if (top_level ||
8356                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8357                         create_trace_option_core_file(tr, trace_options[i], i);
8358         }
8359 }
8360
8361 static ssize_t
8362 rb_simple_read(struct file *filp, char __user *ubuf,
8363                size_t cnt, loff_t *ppos)
8364 {
8365         struct trace_array *tr = filp->private_data;
8366         char buf[64];
8367         int r;
8368
8369         r = tracer_tracing_is_on(tr);
8370         r = sprintf(buf, "%d\n", r);
8371
8372         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8373 }
8374
8375 static ssize_t
8376 rb_simple_write(struct file *filp, const char __user *ubuf,
8377                 size_t cnt, loff_t *ppos)
8378 {
8379         struct trace_array *tr = filp->private_data;
8380         struct trace_buffer *buffer = tr->array_buffer.buffer;
8381         unsigned long val;
8382         int ret;
8383
8384         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8385         if (ret)
8386                 return ret;
8387
8388         if (buffer) {
8389                 mutex_lock(&trace_types_lock);
8390                 if (!!val == tracer_tracing_is_on(tr)) {
8391                         val = 0; /* do nothing */
8392                 } else if (val) {
8393                         tracer_tracing_on(tr);
8394                         if (tr->current_trace->start)
8395                                 tr->current_trace->start(tr);
8396                 } else {
8397                         tracer_tracing_off(tr);
8398                         if (tr->current_trace->stop)
8399                                 tr->current_trace->stop(tr);
8400                 }
8401                 mutex_unlock(&trace_types_lock);
8402         }
8403
8404         (*ppos)++;
8405
8406         return cnt;
8407 }
8408
8409 static const struct file_operations rb_simple_fops = {
8410         .open           = tracing_open_generic_tr,
8411         .read           = rb_simple_read,
8412         .write          = rb_simple_write,
8413         .release        = tracing_release_generic_tr,
8414         .llseek         = default_llseek,
8415 };
8416
8417 static ssize_t
8418 buffer_percent_read(struct file *filp, char __user *ubuf,
8419                     size_t cnt, loff_t *ppos)
8420 {
8421         struct trace_array *tr = filp->private_data;
8422         char buf[64];
8423         int r;
8424
8425         r = tr->buffer_percent;
8426         r = sprintf(buf, "%d\n", r);
8427
8428         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8429 }
8430
8431 static ssize_t
8432 buffer_percent_write(struct file *filp, const char __user *ubuf,
8433                      size_t cnt, loff_t *ppos)
8434 {
8435         struct trace_array *tr = filp->private_data;
8436         unsigned long val;
8437         int ret;
8438
8439         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8440         if (ret)
8441                 return ret;
8442
8443         if (val > 100)
8444                 return -EINVAL;
8445
8446         if (!val)
8447                 val = 1;
8448
8449         tr->buffer_percent = val;
8450
8451         (*ppos)++;
8452
8453         return cnt;
8454 }
8455
8456 static const struct file_operations buffer_percent_fops = {
8457         .open           = tracing_open_generic_tr,
8458         .read           = buffer_percent_read,
8459         .write          = buffer_percent_write,
8460         .release        = tracing_release_generic_tr,
8461         .llseek         = default_llseek,
8462 };
8463
8464 static struct dentry *trace_instance_dir;
8465
8466 static void
8467 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8468
8469 static int
8470 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8471 {
8472         enum ring_buffer_flags rb_flags;
8473
8474         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8475
8476         buf->tr = tr;
8477
8478         buf->buffer = ring_buffer_alloc(size, rb_flags);
8479         if (!buf->buffer)
8480                 return -ENOMEM;
8481
8482         buf->data = alloc_percpu(struct trace_array_cpu);
8483         if (!buf->data) {
8484                 ring_buffer_free(buf->buffer);
8485                 buf->buffer = NULL;
8486                 return -ENOMEM;
8487         }
8488
8489         /* Allocate the first page for all buffers */
8490         set_buffer_entries(&tr->array_buffer,
8491                            ring_buffer_size(tr->array_buffer.buffer, 0));
8492
8493         return 0;
8494 }
8495
8496 static int allocate_trace_buffers(struct trace_array *tr, int size)
8497 {
8498         int ret;
8499
8500         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8501         if (ret)
8502                 return ret;
8503
8504 #ifdef CONFIG_TRACER_MAX_TRACE
8505         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8506                                     allocate_snapshot ? size : 1);
8507         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8508                 ring_buffer_free(tr->array_buffer.buffer);
8509                 tr->array_buffer.buffer = NULL;
8510                 free_percpu(tr->array_buffer.data);
8511                 tr->array_buffer.data = NULL;
8512                 return -ENOMEM;
8513         }
8514         tr->allocated_snapshot = allocate_snapshot;
8515
8516         /*
8517          * Only the top level trace array gets its snapshot allocated
8518          * from the kernel command line.
8519          */
8520         allocate_snapshot = false;
8521 #endif
8522
8523         return 0;
8524 }
8525
8526 static void free_trace_buffer(struct array_buffer *buf)
8527 {
8528         if (buf->buffer) {
8529                 ring_buffer_free(buf->buffer);
8530                 buf->buffer = NULL;
8531                 free_percpu(buf->data);
8532                 buf->data = NULL;
8533         }
8534 }
8535
8536 static void free_trace_buffers(struct trace_array *tr)
8537 {
8538         if (!tr)
8539                 return;
8540
8541         free_trace_buffer(&tr->array_buffer);
8542
8543 #ifdef CONFIG_TRACER_MAX_TRACE
8544         free_trace_buffer(&tr->max_buffer);
8545 #endif
8546 }
8547
8548 static void init_trace_flags_index(struct trace_array *tr)
8549 {
8550         int i;
8551
8552         /* Used by the trace options files */
8553         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8554                 tr->trace_flags_index[i] = i;
8555 }
8556
8557 static void __update_tracer_options(struct trace_array *tr)
8558 {
8559         struct tracer *t;
8560
8561         for (t = trace_types; t; t = t->next)
8562                 add_tracer_options(tr, t);
8563 }
8564
8565 static void update_tracer_options(struct trace_array *tr)
8566 {
8567         mutex_lock(&trace_types_lock);
8568         __update_tracer_options(tr);
8569         mutex_unlock(&trace_types_lock);
8570 }
8571
8572 /* Must have trace_types_lock held */
8573 struct trace_array *trace_array_find(const char *instance)
8574 {
8575         struct trace_array *tr, *found = NULL;
8576
8577         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8578                 if (tr->name && strcmp(tr->name, instance) == 0) {
8579                         found = tr;
8580                         break;
8581                 }
8582         }
8583
8584         return found;
8585 }
8586
8587 struct trace_array *trace_array_find_get(const char *instance)
8588 {
8589         struct trace_array *tr;
8590
8591         mutex_lock(&trace_types_lock);
8592         tr = trace_array_find(instance);
8593         if (tr)
8594                 tr->ref++;
8595         mutex_unlock(&trace_types_lock);
8596
8597         return tr;
8598 }
8599
8600 static struct trace_array *trace_array_create(const char *name)
8601 {
8602         struct trace_array *tr;
8603         int ret;
8604
8605         ret = -ENOMEM;
8606         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8607         if (!tr)
8608                 return ERR_PTR(ret);
8609
8610         tr->name = kstrdup(name, GFP_KERNEL);
8611         if (!tr->name)
8612                 goto out_free_tr;
8613
8614         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8615                 goto out_free_tr;
8616
8617         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8618
8619         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8620
8621         raw_spin_lock_init(&tr->start_lock);
8622
8623         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8624
8625         tr->current_trace = &nop_trace;
8626
8627         INIT_LIST_HEAD(&tr->systems);
8628         INIT_LIST_HEAD(&tr->events);
8629         INIT_LIST_HEAD(&tr->hist_vars);
8630         INIT_LIST_HEAD(&tr->err_log);
8631
8632         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8633                 goto out_free_tr;
8634
8635         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8636         if (!tr->dir)
8637                 goto out_free_tr;
8638
8639         ret = event_trace_add_tracer(tr->dir, tr);
8640         if (ret) {
8641                 tracefs_remove(tr->dir);
8642                 goto out_free_tr;
8643         }
8644
8645         ftrace_init_trace_array(tr);
8646
8647         init_tracer_tracefs(tr, tr->dir);
8648         init_trace_flags_index(tr);
8649         __update_tracer_options(tr);
8650
8651         list_add(&tr->list, &ftrace_trace_arrays);
8652
8653         tr->ref++;
8654
8655
8656         return tr;
8657
8658  out_free_tr:
8659         free_trace_buffers(tr);
8660         free_cpumask_var(tr->tracing_cpumask);
8661         kfree(tr->name);
8662         kfree(tr);
8663
8664         return ERR_PTR(ret);
8665 }
8666
8667 static int instance_mkdir(const char *name)
8668 {
8669         struct trace_array *tr;
8670         int ret;
8671
8672         mutex_lock(&event_mutex);
8673         mutex_lock(&trace_types_lock);
8674
8675         ret = -EEXIST;
8676         if (trace_array_find(name))
8677                 goto out_unlock;
8678
8679         tr = trace_array_create(name);
8680
8681         ret = PTR_ERR_OR_ZERO(tr);
8682
8683 out_unlock:
8684         mutex_unlock(&trace_types_lock);
8685         mutex_unlock(&event_mutex);
8686         return ret;
8687 }
8688
8689 /**
8690  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8691  * @name: The name of the trace array to be looked up/created.
8692  *
8693  * Returns pointer to trace array with given name.
8694  * NULL, if it cannot be created.
8695  *
8696  * NOTE: This function increments the reference counter associated with the
8697  * trace array returned. This makes sure it cannot be freed while in use.
8698  * Use trace_array_put() once the trace array is no longer needed.
8699  * If the trace_array is to be freed, trace_array_destroy() needs to
8700  * be called after the trace_array_put(), or simply let user space delete
8701  * it from the tracefs instances directory. But until the
8702  * trace_array_put() is called, user space can not delete it.
8703  *
8704  */
8705 struct trace_array *trace_array_get_by_name(const char *name)
8706 {
8707         struct trace_array *tr;
8708
8709         mutex_lock(&event_mutex);
8710         mutex_lock(&trace_types_lock);
8711
8712         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8713                 if (tr->name && strcmp(tr->name, name) == 0)
8714                         goto out_unlock;
8715         }
8716
8717         tr = trace_array_create(name);
8718
8719         if (IS_ERR(tr))
8720                 tr = NULL;
8721 out_unlock:
8722         if (tr)
8723                 tr->ref++;
8724
8725         mutex_unlock(&trace_types_lock);
8726         mutex_unlock(&event_mutex);
8727         return tr;
8728 }
8729 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8730
8731 static int __remove_instance(struct trace_array *tr)
8732 {
8733         int i;
8734
8735         /* Reference counter for a newly created trace array = 1. */
8736         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8737                 return -EBUSY;
8738
8739         list_del(&tr->list);
8740
8741         /* Disable all the flags that were enabled coming in */
8742         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8743                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8744                         set_tracer_flag(tr, 1 << i, 0);
8745         }
8746
8747         tracing_set_nop(tr);
8748         clear_ftrace_function_probes(tr);
8749         event_trace_del_tracer(tr);
8750         ftrace_clear_pids(tr);
8751         ftrace_destroy_function_files(tr);
8752         tracefs_remove(tr->dir);
8753         free_trace_buffers(tr);
8754
8755         for (i = 0; i < tr->nr_topts; i++) {
8756                 kfree(tr->topts[i].topts);
8757         }
8758         kfree(tr->topts);
8759
8760         free_cpumask_var(tr->tracing_cpumask);
8761         kfree(tr->name);
8762         kfree(tr);
8763         tr = NULL;
8764
8765         return 0;
8766 }
8767
8768 int trace_array_destroy(struct trace_array *this_tr)
8769 {
8770         struct trace_array *tr;
8771         int ret;
8772
8773         if (!this_tr)
8774                 return -EINVAL;
8775
8776         mutex_lock(&event_mutex);
8777         mutex_lock(&trace_types_lock);
8778
8779         ret = -ENODEV;
8780
8781         /* Making sure trace array exists before destroying it. */
8782         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8783                 if (tr == this_tr) {
8784                         ret = __remove_instance(tr);
8785                         break;
8786                 }
8787         }
8788
8789         mutex_unlock(&trace_types_lock);
8790         mutex_unlock(&event_mutex);
8791
8792         return ret;
8793 }
8794 EXPORT_SYMBOL_GPL(trace_array_destroy);
8795
8796 static int instance_rmdir(const char *name)
8797 {
8798         struct trace_array *tr;
8799         int ret;
8800
8801         mutex_lock(&event_mutex);
8802         mutex_lock(&trace_types_lock);
8803
8804         ret = -ENODEV;
8805         tr = trace_array_find(name);
8806         if (tr)
8807                 ret = __remove_instance(tr);
8808
8809         mutex_unlock(&trace_types_lock);
8810         mutex_unlock(&event_mutex);
8811
8812         return ret;
8813 }
8814
8815 static __init void create_trace_instances(struct dentry *d_tracer)
8816 {
8817         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8818                                                          instance_mkdir,
8819                                                          instance_rmdir);
8820         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8821                 return;
8822 }
8823
8824 static void
8825 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8826 {
8827         struct trace_event_file *file;
8828         int cpu;
8829
8830         trace_create_file("available_tracers", 0444, d_tracer,
8831                         tr, &show_traces_fops);
8832
8833         trace_create_file("current_tracer", 0644, d_tracer,
8834                         tr, &set_tracer_fops);
8835
8836         trace_create_file("tracing_cpumask", 0644, d_tracer,
8837                           tr, &tracing_cpumask_fops);
8838
8839         trace_create_file("trace_options", 0644, d_tracer,
8840                           tr, &tracing_iter_fops);
8841
8842         trace_create_file("trace", 0644, d_tracer,
8843                           tr, &tracing_fops);
8844
8845         trace_create_file("trace_pipe", 0444, d_tracer,
8846                           tr, &tracing_pipe_fops);
8847
8848         trace_create_file("buffer_size_kb", 0644, d_tracer,
8849                           tr, &tracing_entries_fops);
8850
8851         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8852                           tr, &tracing_total_entries_fops);
8853
8854         trace_create_file("free_buffer", 0200, d_tracer,
8855                           tr, &tracing_free_buffer_fops);
8856
8857         trace_create_file("trace_marker", 0220, d_tracer,
8858                           tr, &tracing_mark_fops);
8859
8860         file = __find_event_file(tr, "ftrace", "print");
8861         if (file && file->dir)
8862                 trace_create_file("trigger", 0644, file->dir, file,
8863                                   &event_trigger_fops);
8864         tr->trace_marker_file = file;
8865
8866         trace_create_file("trace_marker_raw", 0220, d_tracer,
8867                           tr, &tracing_mark_raw_fops);
8868
8869         trace_create_file("trace_clock", 0644, d_tracer, tr,
8870                           &trace_clock_fops);
8871
8872         trace_create_file("tracing_on", 0644, d_tracer,
8873                           tr, &rb_simple_fops);
8874
8875         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8876                           &trace_time_stamp_mode_fops);
8877
8878         tr->buffer_percent = 50;
8879
8880         trace_create_file("buffer_percent", 0444, d_tracer,
8881                         tr, &buffer_percent_fops);
8882
8883         create_trace_options_dir(tr);
8884
8885 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8886         trace_create_maxlat_file(tr, d_tracer);
8887 #endif
8888
8889         if (ftrace_create_function_files(tr, d_tracer))
8890                 MEM_FAIL(1, "Could not allocate function filter files");
8891
8892 #ifdef CONFIG_TRACER_SNAPSHOT
8893         trace_create_file("snapshot", 0644, d_tracer,
8894                           tr, &snapshot_fops);
8895 #endif
8896
8897         trace_create_file("error_log", 0644, d_tracer,
8898                           tr, &tracing_err_log_fops);
8899
8900         for_each_tracing_cpu(cpu)
8901                 tracing_init_tracefs_percpu(tr, cpu);
8902
8903         ftrace_init_tracefs(tr, d_tracer);
8904 }
8905
8906 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8907 {
8908         struct vfsmount *mnt;
8909         struct file_system_type *type;
8910
8911         /*
8912          * To maintain backward compatibility for tools that mount
8913          * debugfs to get to the tracing facility, tracefs is automatically
8914          * mounted to the debugfs/tracing directory.
8915          */
8916         type = get_fs_type("tracefs");
8917         if (!type)
8918                 return NULL;
8919         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8920         put_filesystem(type);
8921         if (IS_ERR(mnt))
8922                 return NULL;
8923         mntget(mnt);
8924
8925         return mnt;
8926 }
8927
8928 /**
8929  * tracing_init_dentry - initialize top level trace array
8930  *
8931  * This is called when creating files or directories in the tracing
8932  * directory. It is called via fs_initcall() by any of the boot up code
8933  * and expects to return the dentry of the top level tracing directory.
8934  */
8935 struct dentry *tracing_init_dentry(void)
8936 {
8937         struct trace_array *tr = &global_trace;
8938
8939         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8940                 pr_warn("Tracing disabled due to lockdown\n");
8941                 return ERR_PTR(-EPERM);
8942         }
8943
8944         /* The top level trace array uses  NULL as parent */
8945         if (tr->dir)
8946                 return NULL;
8947
8948         if (WARN_ON(!tracefs_initialized()) ||
8949                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8950                  WARN_ON(!debugfs_initialized())))
8951                 return ERR_PTR(-ENODEV);
8952
8953         /*
8954          * As there may still be users that expect the tracing
8955          * files to exist in debugfs/tracing, we must automount
8956          * the tracefs file system there, so older tools still
8957          * work with the newer kerenl.
8958          */
8959         tr->dir = debugfs_create_automount("tracing", NULL,
8960                                            trace_automount, NULL);
8961
8962         return NULL;
8963 }
8964
8965 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8966 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8967
8968 static void __init trace_eval_init(void)
8969 {
8970         int len;
8971
8972         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8973         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8974 }
8975
8976 #ifdef CONFIG_MODULES
8977 static void trace_module_add_evals(struct module *mod)
8978 {
8979         if (!mod->num_trace_evals)
8980                 return;
8981
8982         /*
8983          * Modules with bad taint do not have events created, do
8984          * not bother with enums either.
8985          */
8986         if (trace_module_has_bad_taint(mod))
8987                 return;
8988
8989         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8990 }
8991
8992 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8993 static void trace_module_remove_evals(struct module *mod)
8994 {
8995         union trace_eval_map_item *map;
8996         union trace_eval_map_item **last = &trace_eval_maps;
8997
8998         if (!mod->num_trace_evals)
8999                 return;
9000
9001         mutex_lock(&trace_eval_mutex);
9002
9003         map = trace_eval_maps;
9004
9005         while (map) {
9006                 if (map->head.mod == mod)
9007                         break;
9008                 map = trace_eval_jmp_to_tail(map);
9009                 last = &map->tail.next;
9010                 map = map->tail.next;
9011         }
9012         if (!map)
9013                 goto out;
9014
9015         *last = trace_eval_jmp_to_tail(map)->tail.next;
9016         kfree(map);
9017  out:
9018         mutex_unlock(&trace_eval_mutex);
9019 }
9020 #else
9021 static inline void trace_module_remove_evals(struct module *mod) { }
9022 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9023
9024 static int trace_module_notify(struct notifier_block *self,
9025                                unsigned long val, void *data)
9026 {
9027         struct module *mod = data;
9028
9029         switch (val) {
9030         case MODULE_STATE_COMING:
9031                 trace_module_add_evals(mod);
9032                 break;
9033         case MODULE_STATE_GOING:
9034                 trace_module_remove_evals(mod);
9035                 break;
9036         }
9037
9038         return 0;
9039 }
9040
9041 static struct notifier_block trace_module_nb = {
9042         .notifier_call = trace_module_notify,
9043         .priority = 0,
9044 };
9045 #endif /* CONFIG_MODULES */
9046
9047 static __init int tracer_init_tracefs(void)
9048 {
9049         struct dentry *d_tracer;
9050
9051         trace_access_lock_init();
9052
9053         d_tracer = tracing_init_dentry();
9054         if (IS_ERR(d_tracer))
9055                 return 0;
9056
9057         event_trace_init();
9058
9059         init_tracer_tracefs(&global_trace, d_tracer);
9060         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9061
9062         trace_create_file("tracing_thresh", 0644, d_tracer,
9063                         &global_trace, &tracing_thresh_fops);
9064
9065         trace_create_file("README", 0444, d_tracer,
9066                         NULL, &tracing_readme_fops);
9067
9068         trace_create_file("saved_cmdlines", 0444, d_tracer,
9069                         NULL, &tracing_saved_cmdlines_fops);
9070
9071         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9072                           NULL, &tracing_saved_cmdlines_size_fops);
9073
9074         trace_create_file("saved_tgids", 0444, d_tracer,
9075                         NULL, &tracing_saved_tgids_fops);
9076
9077         trace_eval_init();
9078
9079         trace_create_eval_file(d_tracer);
9080
9081 #ifdef CONFIG_MODULES
9082         register_module_notifier(&trace_module_nb);
9083 #endif
9084
9085 #ifdef CONFIG_DYNAMIC_FTRACE
9086         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9087                         NULL, &tracing_dyn_info_fops);
9088 #endif
9089
9090         create_trace_instances(d_tracer);
9091
9092         update_tracer_options(&global_trace);
9093
9094         return 0;
9095 }
9096
9097 static int trace_panic_handler(struct notifier_block *this,
9098                                unsigned long event, void *unused)
9099 {
9100         if (ftrace_dump_on_oops)
9101                 ftrace_dump(ftrace_dump_on_oops);
9102         return NOTIFY_OK;
9103 }
9104
9105 static struct notifier_block trace_panic_notifier = {
9106         .notifier_call  = trace_panic_handler,
9107         .next           = NULL,
9108         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9109 };
9110
9111 static int trace_die_handler(struct notifier_block *self,
9112                              unsigned long val,
9113                              void *data)
9114 {
9115         switch (val) {
9116         case DIE_OOPS:
9117                 if (ftrace_dump_on_oops)
9118                         ftrace_dump(ftrace_dump_on_oops);
9119                 break;
9120         default:
9121                 break;
9122         }
9123         return NOTIFY_OK;
9124 }
9125
9126 static struct notifier_block trace_die_notifier = {
9127         .notifier_call = trace_die_handler,
9128         .priority = 200
9129 };
9130
9131 /*
9132  * printk is set to max of 1024, we really don't need it that big.
9133  * Nothing should be printing 1000 characters anyway.
9134  */
9135 #define TRACE_MAX_PRINT         1000
9136
9137 /*
9138  * Define here KERN_TRACE so that we have one place to modify
9139  * it if we decide to change what log level the ftrace dump
9140  * should be at.
9141  */
9142 #define KERN_TRACE              KERN_EMERG
9143
9144 void
9145 trace_printk_seq(struct trace_seq *s)
9146 {
9147         /* Probably should print a warning here. */
9148         if (s->seq.len >= TRACE_MAX_PRINT)
9149                 s->seq.len = TRACE_MAX_PRINT;
9150
9151         /*
9152          * More paranoid code. Although the buffer size is set to
9153          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9154          * an extra layer of protection.
9155          */
9156         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9157                 s->seq.len = s->seq.size - 1;
9158
9159         /* should be zero ended, but we are paranoid. */
9160         s->buffer[s->seq.len] = 0;
9161
9162         printk(KERN_TRACE "%s", s->buffer);
9163
9164         trace_seq_init(s);
9165 }
9166
9167 void trace_init_global_iter(struct trace_iterator *iter)
9168 {
9169         iter->tr = &global_trace;
9170         iter->trace = iter->tr->current_trace;
9171         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9172         iter->array_buffer = &global_trace.array_buffer;
9173
9174         if (iter->trace && iter->trace->open)
9175                 iter->trace->open(iter);
9176
9177         /* Annotate start of buffers if we had overruns */
9178         if (ring_buffer_overruns(iter->array_buffer->buffer))
9179                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9180
9181         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9182         if (trace_clocks[iter->tr->clock_id].in_ns)
9183                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9184 }
9185
9186 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9187 {
9188         /* use static because iter can be a bit big for the stack */
9189         static struct trace_iterator iter;
9190         static atomic_t dump_running;
9191         struct trace_array *tr = &global_trace;
9192         unsigned int old_userobj;
9193         unsigned long flags;
9194         int cnt = 0, cpu;
9195
9196         /* Only allow one dump user at a time. */
9197         if (atomic_inc_return(&dump_running) != 1) {
9198                 atomic_dec(&dump_running);
9199                 return;
9200         }
9201
9202         /*
9203          * Always turn off tracing when we dump.
9204          * We don't need to show trace output of what happens
9205          * between multiple crashes.
9206          *
9207          * If the user does a sysrq-z, then they can re-enable
9208          * tracing with echo 1 > tracing_on.
9209          */
9210         tracing_off();
9211
9212         local_irq_save(flags);
9213         printk_nmi_direct_enter();
9214
9215         /* Simulate the iterator */
9216         trace_init_global_iter(&iter);
9217         /* Can not use kmalloc for iter.temp */
9218         iter.temp = static_temp_buf;
9219         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9220
9221         for_each_tracing_cpu(cpu) {
9222                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9223         }
9224
9225         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9226
9227         /* don't look at user memory in panic mode */
9228         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9229
9230         switch (oops_dump_mode) {
9231         case DUMP_ALL:
9232                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9233                 break;
9234         case DUMP_ORIG:
9235                 iter.cpu_file = raw_smp_processor_id();
9236                 break;
9237         case DUMP_NONE:
9238                 goto out_enable;
9239         default:
9240                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9241                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9242         }
9243
9244         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9245
9246         /* Did function tracer already get disabled? */
9247         if (ftrace_is_dead()) {
9248                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9249                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9250         }
9251
9252         /*
9253          * We need to stop all tracing on all CPUS to read the
9254          * the next buffer. This is a bit expensive, but is
9255          * not done often. We fill all what we can read,
9256          * and then release the locks again.
9257          */
9258
9259         while (!trace_empty(&iter)) {
9260
9261                 if (!cnt)
9262                         printk(KERN_TRACE "---------------------------------\n");
9263
9264                 cnt++;
9265
9266                 trace_iterator_reset(&iter);
9267                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9268
9269                 if (trace_find_next_entry_inc(&iter) != NULL) {
9270                         int ret;
9271
9272                         ret = print_trace_line(&iter);
9273                         if (ret != TRACE_TYPE_NO_CONSUME)
9274                                 trace_consume(&iter);
9275                 }
9276                 touch_nmi_watchdog();
9277
9278                 trace_printk_seq(&iter.seq);
9279         }
9280
9281         if (!cnt)
9282                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9283         else
9284                 printk(KERN_TRACE "---------------------------------\n");
9285
9286  out_enable:
9287         tr->trace_flags |= old_userobj;
9288
9289         for_each_tracing_cpu(cpu) {
9290                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9291         }
9292         atomic_dec(&dump_running);
9293         printk_nmi_direct_exit();
9294         local_irq_restore(flags);
9295 }
9296 EXPORT_SYMBOL_GPL(ftrace_dump);
9297
9298 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9299 {
9300         char **argv;
9301         int argc, ret;
9302
9303         argc = 0;
9304         ret = 0;
9305         argv = argv_split(GFP_KERNEL, buf, &argc);
9306         if (!argv)
9307                 return -ENOMEM;
9308
9309         if (argc)
9310                 ret = createfn(argc, argv);
9311
9312         argv_free(argv);
9313
9314         return ret;
9315 }
9316
9317 #define WRITE_BUFSIZE  4096
9318
9319 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9320                                 size_t count, loff_t *ppos,
9321                                 int (*createfn)(int, char **))
9322 {
9323         char *kbuf, *buf, *tmp;
9324         int ret = 0;
9325         size_t done = 0;
9326         size_t size;
9327
9328         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9329         if (!kbuf)
9330                 return -ENOMEM;
9331
9332         while (done < count) {
9333                 size = count - done;
9334
9335                 if (size >= WRITE_BUFSIZE)
9336                         size = WRITE_BUFSIZE - 1;
9337
9338                 if (copy_from_user(kbuf, buffer + done, size)) {
9339                         ret = -EFAULT;
9340                         goto out;
9341                 }
9342                 kbuf[size] = '\0';
9343                 buf = kbuf;
9344                 do {
9345                         tmp = strchr(buf, '\n');
9346                         if (tmp) {
9347                                 *tmp = '\0';
9348                                 size = tmp - buf + 1;
9349                         } else {
9350                                 size = strlen(buf);
9351                                 if (done + size < count) {
9352                                         if (buf != kbuf)
9353                                                 break;
9354                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9355                                         pr_warn("Line length is too long: Should be less than %d\n",
9356                                                 WRITE_BUFSIZE - 2);
9357                                         ret = -EINVAL;
9358                                         goto out;
9359                                 }
9360                         }
9361                         done += size;
9362
9363                         /* Remove comments */
9364                         tmp = strchr(buf, '#');
9365
9366                         if (tmp)
9367                                 *tmp = '\0';
9368
9369                         ret = trace_run_command(buf, createfn);
9370                         if (ret)
9371                                 goto out;
9372                         buf += size;
9373
9374                 } while (done < count);
9375         }
9376         ret = done;
9377
9378 out:
9379         kfree(kbuf);
9380
9381         return ret;
9382 }
9383
9384 __init static int tracer_alloc_buffers(void)
9385 {
9386         int ring_buf_size;
9387         int ret = -ENOMEM;
9388
9389
9390         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9391                 pr_warn("Tracing disabled due to lockdown\n");
9392                 return -EPERM;
9393         }
9394
9395         /*
9396          * Make sure we don't accidently add more trace options
9397          * than we have bits for.
9398          */
9399         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9400
9401         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9402                 goto out;
9403
9404         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9405                 goto out_free_buffer_mask;
9406
9407         /* Only allocate trace_printk buffers if a trace_printk exists */
9408         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9409                 /* Must be called before global_trace.buffer is allocated */
9410                 trace_printk_init_buffers();
9411
9412         /* To save memory, keep the ring buffer size to its minimum */
9413         if (ring_buffer_expanded)
9414                 ring_buf_size = trace_buf_size;
9415         else
9416                 ring_buf_size = 1;
9417
9418         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9419         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9420
9421         raw_spin_lock_init(&global_trace.start_lock);
9422
9423         /*
9424          * The prepare callbacks allocates some memory for the ring buffer. We
9425          * don't free the buffer if the if the CPU goes down. If we were to free
9426          * the buffer, then the user would lose any trace that was in the
9427          * buffer. The memory will be removed once the "instance" is removed.
9428          */
9429         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9430                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9431                                       NULL);
9432         if (ret < 0)
9433                 goto out_free_cpumask;
9434         /* Used for event triggers */
9435         ret = -ENOMEM;
9436         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9437         if (!temp_buffer)
9438                 goto out_rm_hp_state;
9439
9440         if (trace_create_savedcmd() < 0)
9441                 goto out_free_temp_buffer;
9442
9443         /* TODO: make the number of buffers hot pluggable with CPUS */
9444         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9445                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9446                 goto out_free_savedcmd;
9447         }
9448
9449         if (global_trace.buffer_disabled)
9450                 tracing_off();
9451
9452         if (trace_boot_clock) {
9453                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9454                 if (ret < 0)
9455                         pr_warn("Trace clock %s not defined, going back to default\n",
9456                                 trace_boot_clock);
9457         }
9458
9459         /*
9460          * register_tracer() might reference current_trace, so it
9461          * needs to be set before we register anything. This is
9462          * just a bootstrap of current_trace anyway.
9463          */
9464         global_trace.current_trace = &nop_trace;
9465
9466         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9467
9468         ftrace_init_global_array_ops(&global_trace);
9469
9470         init_trace_flags_index(&global_trace);
9471
9472         register_tracer(&nop_trace);
9473
9474         /* Function tracing may start here (via kernel command line) */
9475         init_function_trace();
9476
9477         /* All seems OK, enable tracing */
9478         tracing_disabled = 0;
9479
9480         atomic_notifier_chain_register(&panic_notifier_list,
9481                                        &trace_panic_notifier);
9482
9483         register_die_notifier(&trace_die_notifier);
9484
9485         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9486
9487         INIT_LIST_HEAD(&global_trace.systems);
9488         INIT_LIST_HEAD(&global_trace.events);
9489         INIT_LIST_HEAD(&global_trace.hist_vars);
9490         INIT_LIST_HEAD(&global_trace.err_log);
9491         list_add(&global_trace.list, &ftrace_trace_arrays);
9492
9493         apply_trace_boot_options();
9494
9495         register_snapshot_cmd();
9496
9497         return 0;
9498
9499 out_free_savedcmd:
9500         free_saved_cmdlines_buffer(savedcmd);
9501 out_free_temp_buffer:
9502         ring_buffer_free(temp_buffer);
9503 out_rm_hp_state:
9504         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9505 out_free_cpumask:
9506         free_cpumask_var(global_trace.tracing_cpumask);
9507 out_free_buffer_mask:
9508         free_cpumask_var(tracing_buffer_mask);
9509 out:
9510         return ret;
9511 }
9512
9513 void __init early_trace_init(void)
9514 {
9515         if (tracepoint_printk) {
9516                 tracepoint_print_iter =
9517                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9518                 if (MEM_FAIL(!tracepoint_print_iter,
9519                              "Failed to allocate trace iterator\n"))
9520                         tracepoint_printk = 0;
9521                 else
9522                         static_key_enable(&tracepoint_printk_key.key);
9523         }
9524         tracer_alloc_buffers();
9525 }
9526
9527 void __init trace_init(void)
9528 {
9529         trace_event_init();
9530 }
9531
9532 __init static int clear_boot_tracer(void)
9533 {
9534         /*
9535          * The default tracer at boot buffer is an init section.
9536          * This function is called in lateinit. If we did not
9537          * find the boot tracer, then clear it out, to prevent
9538          * later registration from accessing the buffer that is
9539          * about to be freed.
9540          */
9541         if (!default_bootup_tracer)
9542                 return 0;
9543
9544         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9545                default_bootup_tracer);
9546         default_bootup_tracer = NULL;
9547
9548         return 0;
9549 }
9550
9551 fs_initcall(tracer_init_tracefs);
9552 late_initcall_sync(clear_boot_tracer);
9553
9554 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9555 __init static int tracing_set_default_clock(void)
9556 {
9557         /* sched_clock_stable() is determined in late_initcall */
9558         if (!trace_boot_clock && !sched_clock_stable()) {
9559                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9560                         pr_warn("Can not set tracing clock due to lockdown\n");
9561                         return -EPERM;
9562                 }
9563
9564                 printk(KERN_WARNING
9565                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9566                        "If you want to keep using the local clock, then add:\n"
9567                        "  \"trace_clock=local\"\n"
9568                        "on the kernel command line\n");
9569                 tracing_set_clock(&global_trace, "global");
9570         }
9571
9572         return 0;
9573 }
9574 late_initcall_sync(tracing_set_default_clock);
9575 #endif