ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning) {
1303                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304                         "Disabling tracing due to warning\n");
1305                 tracing_off();
1306         }
1307 }
1308
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317         if (tr->array_buffer.buffer)
1318                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319         return !tr->buffer_disabled;
1320 }
1321
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327         return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330
1331 static int __init set_buf_size(char *str)
1332 {
1333         unsigned long buf_size;
1334
1335         if (!str)
1336                 return 0;
1337         buf_size = memparse(str, &str);
1338         /* nr_entries can not be zero */
1339         if (buf_size == 0)
1340                 return 0;
1341         trace_buf_size = buf_size;
1342         return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348         unsigned long threshold;
1349         int ret;
1350
1351         if (!str)
1352                 return 0;
1353         ret = kstrtoul(str, 0, &threshold);
1354         if (ret < 0)
1355                 return 0;
1356         tracing_thresh = threshold * 1000;
1357         return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363         return nsecs / 1000;
1364 }
1365
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377         TRACE_FLAGS
1378         NULL
1379 };
1380
1381 static struct {
1382         u64 (*func)(void);
1383         const char *name;
1384         int in_ns;              /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386         { trace_clock_local,            "local",        1 },
1387         { trace_clock_global,           "global",       1 },
1388         { trace_clock_counter,          "counter",      0 },
1389         { trace_clock_jiffies,          "uptime",       0 },
1390         { trace_clock,                  "perf",         1 },
1391         { ktime_get_mono_fast_ns,       "mono",         1 },
1392         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1393         { ktime_get_boot_fast_ns,       "boot",         1 },
1394         ARCH_TRACE_CLOCKS
1395 };
1396
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399         if (trace_clocks[tr->clock_id].in_ns)
1400                 return true;
1401
1402         return false;
1403 }
1404
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410         memset(parser, 0, sizeof(*parser));
1411
1412         parser->buffer = kmalloc(size, GFP_KERNEL);
1413         if (!parser->buffer)
1414                 return 1;
1415
1416         parser->size = size;
1417         return 0;
1418 }
1419
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425         kfree(parser->buffer);
1426         parser->buffer = NULL;
1427 }
1428
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441         size_t cnt, loff_t *ppos)
1442 {
1443         char ch;
1444         size_t read = 0;
1445         ssize_t ret;
1446
1447         if (!*ppos)
1448                 trace_parser_clear(parser);
1449
1450         ret = get_user(ch, ubuf++);
1451         if (ret)
1452                 goto out;
1453
1454         read++;
1455         cnt--;
1456
1457         /*
1458          * The parser is not finished with the last write,
1459          * continue reading the user input without skipping spaces.
1460          */
1461         if (!parser->cont) {
1462                 /* skip white space */
1463                 while (cnt && isspace(ch)) {
1464                         ret = get_user(ch, ubuf++);
1465                         if (ret)
1466                                 goto out;
1467                         read++;
1468                         cnt--;
1469                 }
1470
1471                 parser->idx = 0;
1472
1473                 /* only spaces were written */
1474                 if (isspace(ch) || !ch) {
1475                         *ppos += read;
1476                         ret = read;
1477                         goto out;
1478                 }
1479         }
1480
1481         /* read the non-space input */
1482         while (cnt && !isspace(ch) && ch) {
1483                 if (parser->idx < parser->size - 1)
1484                         parser->buffer[parser->idx++] = ch;
1485                 else {
1486                         ret = -EINVAL;
1487                         goto out;
1488                 }
1489                 ret = get_user(ch, ubuf++);
1490                 if (ret)
1491                         goto out;
1492                 read++;
1493                 cnt--;
1494         }
1495
1496         /* We either got finished input or we have to wait for another call. */
1497         if (isspace(ch) || !ch) {
1498                 parser->buffer[parser->idx] = 0;
1499                 parser->cont = false;
1500         } else if (parser->idx < parser->size - 1) {
1501                 parser->cont = true;
1502                 parser->buffer[parser->idx++] = ch;
1503                 /* Make sure the parsed string always terminates with '\0'. */
1504                 parser->buffer[parser->idx] = 0;
1505         } else {
1506                 ret = -EINVAL;
1507                 goto out;
1508         }
1509
1510         *ppos += read;
1511         ret = read;
1512
1513 out:
1514         return ret;
1515 }
1516
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520         int len;
1521
1522         if (trace_seq_used(s) <= s->seq.readpos)
1523                 return -EBUSY;
1524
1525         len = trace_seq_used(s) - s->seq.readpos;
1526         if (cnt > len)
1527                 cnt = len;
1528         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529
1530         s->seq.readpos += cnt;
1531         return cnt;
1532 }
1533
1534 unsigned long __read_mostly     tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538         defined(CONFIG_FSNOTIFY)
1539
1540 static struct workqueue_struct *fsnotify_wq;
1541
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544         struct trace_array *tr = container_of(work, struct trace_array,
1545                                               fsnotify_work);
1546         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1547                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1548 }
1549
1550 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 {
1552         struct trace_array *tr = container_of(iwork, struct trace_array,
1553                                               fsnotify_irqwork);
1554         queue_work(fsnotify_wq, &tr->fsnotify_work);
1555 }
1556
1557 static void trace_create_maxlat_file(struct trace_array *tr,
1558                                      struct dentry *d_tracer)
1559 {
1560         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1561         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1562         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1563                                               d_tracer, &tr->max_latency,
1564                                               &tracing_max_lat_fops);
1565 }
1566
1567 __init static int latency_fsnotify_init(void)
1568 {
1569         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1570                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1571         if (!fsnotify_wq) {
1572                 pr_err("Unable to allocate tr_max_lat_wq\n");
1573                 return -ENOMEM;
1574         }
1575         return 0;
1576 }
1577
1578 late_initcall_sync(latency_fsnotify_init);
1579
1580 void latency_fsnotify(struct trace_array *tr)
1581 {
1582         if (!fsnotify_wq)
1583                 return;
1584         /*
1585          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1586          * possible that we are called from __schedule() or do_idle(), which
1587          * could cause a deadlock.
1588          */
1589         irq_work_queue(&tr->fsnotify_irqwork);
1590 }
1591
1592 /*
1593  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1594  *  defined(CONFIG_FSNOTIFY)
1595  */
1596 #else
1597
1598 #define trace_create_maxlat_file(tr, d_tracer)                          \
1599         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1600                           &tr->max_latency, &tracing_max_lat_fops)
1601
1602 #endif
1603
1604 #ifdef CONFIG_TRACER_MAX_TRACE
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613         struct array_buffer *trace_buf = &tr->array_buffer;
1614         struct array_buffer *max_buf = &tr->max_buffer;
1615         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1616         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617
1618         max_buf->cpu = cpu;
1619         max_buf->time_start = data->preempt_timestamp;
1620
1621         max_data->saved_latency = tr->max_latency;
1622         max_data->critical_start = data->critical_start;
1623         max_data->critical_end = data->critical_end;
1624
1625         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1626         max_data->pid = tsk->pid;
1627         /*
1628          * If tsk == current, then use current_uid(), as that does not use
1629          * RCU. The irq tracer can be called out of RCU scope.
1630          */
1631         if (tsk == current)
1632                 max_data->uid = current_uid();
1633         else
1634                 max_data->uid = task_uid(tsk);
1635
1636         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637         max_data->policy = tsk->policy;
1638         max_data->rt_priority = tsk->rt_priority;
1639
1640         /* record this tasks comm */
1641         tracing_record_cmdline(tsk);
1642         latency_fsnotify(tr);
1643 }
1644
1645 /**
1646  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647  * @tr: tracer
1648  * @tsk: the task with the latency
1649  * @cpu: The cpu that initiated the trace.
1650  * @cond_data: User data associated with a conditional snapshot
1651  *
1652  * Flip the buffers between the @tr and the max_tr and record information
1653  * about which task was the cause of this latency.
1654  */
1655 void
1656 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1657               void *cond_data)
1658 {
1659         if (tr->stop_count)
1660                 return;
1661
1662         WARN_ON_ONCE(!irqs_disabled());
1663
1664         if (!tr->allocated_snapshot) {
1665                 /* Only the nop tracer should hit this when disabling */
1666                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1667                 return;
1668         }
1669
1670         arch_spin_lock(&tr->max_lock);
1671
1672         /* Inherit the recordable setting from array_buffer */
1673         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1674                 ring_buffer_record_on(tr->max_buffer.buffer);
1675         else
1676                 ring_buffer_record_off(tr->max_buffer.buffer);
1677
1678 #ifdef CONFIG_TRACER_SNAPSHOT
1679         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1680                 goto out_unlock;
1681 #endif
1682         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683
1684         __update_max_tr(tr, tsk, cpu);
1685
1686  out_unlock:
1687         arch_spin_unlock(&tr->max_lock);
1688 }
1689
1690 /**
1691  * update_max_tr_single - only copy one trace over, and reset the rest
1692  * @tr: tracer
1693  * @tsk: task with the latency
1694  * @cpu: the cpu of the buffer to copy.
1695  *
1696  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1697  */
1698 void
1699 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1700 {
1701         int ret;
1702
1703         if (tr->stop_count)
1704                 return;
1705
1706         WARN_ON_ONCE(!irqs_disabled());
1707         if (!tr->allocated_snapshot) {
1708                 /* Only the nop tracer should hit this when disabling */
1709                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1710                 return;
1711         }
1712
1713         arch_spin_lock(&tr->max_lock);
1714
1715         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716
1717         if (ret == -EBUSY) {
1718                 /*
1719                  * We failed to swap the buffer due to a commit taking
1720                  * place on this CPU. We fail to record, but we reset
1721                  * the max trace buffer (no one writes directly to it)
1722                  * and flag that it failed.
1723                  */
1724                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1725                         "Failed to swap buffers due to commit in progress\n");
1726         }
1727
1728         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729
1730         __update_max_tr(tr, tsk, cpu);
1731         arch_spin_unlock(&tr->max_lock);
1732 }
1733 #endif /* CONFIG_TRACER_MAX_TRACE */
1734
1735 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 {
1737         /* Iterators are static, they should be filled or empty */
1738         if (trace_buffer_iter(iter, iter->cpu_file))
1739                 return 0;
1740
1741         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1742                                 full);
1743 }
1744
1745 #ifdef CONFIG_FTRACE_STARTUP_TEST
1746 static bool selftests_can_run;
1747
1748 struct trace_selftests {
1749         struct list_head                list;
1750         struct tracer                   *type;
1751 };
1752
1753 static LIST_HEAD(postponed_selftests);
1754
1755 static int save_selftest(struct tracer *type)
1756 {
1757         struct trace_selftests *selftest;
1758
1759         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1760         if (!selftest)
1761                 return -ENOMEM;
1762
1763         selftest->type = type;
1764         list_add(&selftest->list, &postponed_selftests);
1765         return 0;
1766 }
1767
1768 static int run_tracer_selftest(struct tracer *type)
1769 {
1770         struct trace_array *tr = &global_trace;
1771         struct tracer *saved_tracer = tr->current_trace;
1772         int ret;
1773
1774         if (!type->selftest || tracing_selftest_disabled)
1775                 return 0;
1776
1777         /*
1778          * If a tracer registers early in boot up (before scheduling is
1779          * initialized and such), then do not run its selftests yet.
1780          * Instead, run it a little later in the boot process.
1781          */
1782         if (!selftests_can_run)
1783                 return save_selftest(type);
1784
1785         /*
1786          * Run a selftest on this tracer.
1787          * Here we reset the trace buffer, and set the current
1788          * tracer to be this tracer. The tracer can then run some
1789          * internal tracing to verify that everything is in order.
1790          * If we fail, we do not register this tracer.
1791          */
1792         tracing_reset_online_cpus(&tr->array_buffer);
1793
1794         tr->current_trace = type;
1795
1796 #ifdef CONFIG_TRACER_MAX_TRACE
1797         if (type->use_max_tr) {
1798                 /* If we expanded the buffers, make sure the max is expanded too */
1799                 if (ring_buffer_expanded)
1800                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1801                                            RING_BUFFER_ALL_CPUS);
1802                 tr->allocated_snapshot = true;
1803         }
1804 #endif
1805
1806         /* the test is responsible for initializing and enabling */
1807         pr_info("Testing tracer %s: ", type->name);
1808         ret = type->selftest(type, tr);
1809         /* the test is responsible for resetting too */
1810         tr->current_trace = saved_tracer;
1811         if (ret) {
1812                 printk(KERN_CONT "FAILED!\n");
1813                 /* Add the warning after printing 'FAILED' */
1814                 WARN_ON(1);
1815                 return -1;
1816         }
1817         /* Only reset on passing, to avoid touching corrupted buffers */
1818         tracing_reset_online_cpus(&tr->array_buffer);
1819
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821         if (type->use_max_tr) {
1822                 tr->allocated_snapshot = false;
1823
1824                 /* Shrink the max buffer again */
1825                 if (ring_buffer_expanded)
1826                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1827                                            RING_BUFFER_ALL_CPUS);
1828         }
1829 #endif
1830
1831         printk(KERN_CONT "PASSED\n");
1832         return 0;
1833 }
1834
1835 static __init int init_trace_selftests(void)
1836 {
1837         struct trace_selftests *p, *n;
1838         struct tracer *t, **last;
1839         int ret;
1840
1841         selftests_can_run = true;
1842
1843         mutex_lock(&trace_types_lock);
1844
1845         if (list_empty(&postponed_selftests))
1846                 goto out;
1847
1848         pr_info("Running postponed tracer tests:\n");
1849
1850         tracing_selftest_running = true;
1851         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1852                 /* This loop can take minutes when sanitizers are enabled, so
1853                  * lets make sure we allow RCU processing.
1854                  */
1855                 cond_resched();
1856                 ret = run_tracer_selftest(p->type);
1857                 /* If the test fails, then warn and remove from available_tracers */
1858                 if (ret < 0) {
1859                         WARN(1, "tracer: %s failed selftest, disabling\n",
1860                              p->type->name);
1861                         last = &trace_types;
1862                         for (t = trace_types; t; t = t->next) {
1863                                 if (t == p->type) {
1864                                         *last = t->next;
1865                                         break;
1866                                 }
1867                                 last = &t->next;
1868                         }
1869                 }
1870                 list_del(&p->list);
1871                 kfree(p);
1872         }
1873         tracing_selftest_running = false;
1874
1875  out:
1876         mutex_unlock(&trace_types_lock);
1877
1878         return 0;
1879 }
1880 core_initcall(init_trace_selftests);
1881 #else
1882 static inline int run_tracer_selftest(struct tracer *type)
1883 {
1884         return 0;
1885 }
1886 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887
1888 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889
1890 static void __init apply_trace_boot_options(void);
1891
1892 /**
1893  * register_tracer - register a tracer with the ftrace system.
1894  * @type: the plugin for the tracer
1895  *
1896  * Register a new plugin tracer.
1897  */
1898 int __init register_tracer(struct tracer *type)
1899 {
1900         struct tracer *t;
1901         int ret = 0;
1902
1903         if (!type->name) {
1904                 pr_info("Tracer must have a name\n");
1905                 return -1;
1906         }
1907
1908         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1909                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1910                 return -1;
1911         }
1912
1913         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1914                 pr_warn("Can not register tracer %s due to lockdown\n",
1915                            type->name);
1916                 return -EPERM;
1917         }
1918
1919         mutex_lock(&trace_types_lock);
1920
1921         tracing_selftest_running = true;
1922
1923         for (t = trace_types; t; t = t->next) {
1924                 if (strcmp(type->name, t->name) == 0) {
1925                         /* already found */
1926                         pr_info("Tracer %s already registered\n",
1927                                 type->name);
1928                         ret = -1;
1929                         goto out;
1930                 }
1931         }
1932
1933         if (!type->set_flag)
1934                 type->set_flag = &dummy_set_flag;
1935         if (!type->flags) {
1936                 /*allocate a dummy tracer_flags*/
1937                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1938                 if (!type->flags) {
1939                         ret = -ENOMEM;
1940                         goto out;
1941                 }
1942                 type->flags->val = 0;
1943                 type->flags->opts = dummy_tracer_opt;
1944         } else
1945                 if (!type->flags->opts)
1946                         type->flags->opts = dummy_tracer_opt;
1947
1948         /* store the tracer for __set_tracer_option */
1949         type->flags->trace = type;
1950
1951         ret = run_tracer_selftest(type);
1952         if (ret < 0)
1953                 goto out;
1954
1955         type->next = trace_types;
1956         trace_types = type;
1957         add_tracer_options(&global_trace, type);
1958
1959  out:
1960         tracing_selftest_running = false;
1961         mutex_unlock(&trace_types_lock);
1962
1963         if (ret || !default_bootup_tracer)
1964                 goto out_unlock;
1965
1966         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1967                 goto out_unlock;
1968
1969         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1970         /* Do we want this tracer to start on bootup? */
1971         tracing_set_tracer(&global_trace, type->name);
1972         default_bootup_tracer = NULL;
1973
1974         apply_trace_boot_options();
1975
1976         /* disable other selftests, since this will break it. */
1977         tracing_selftest_disabled = true;
1978 #ifdef CONFIG_FTRACE_STARTUP_TEST
1979         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1980                type->name);
1981 #endif
1982
1983  out_unlock:
1984         return ret;
1985 }
1986
1987 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 {
1989         struct trace_buffer *buffer = buf->buffer;
1990
1991         if (!buffer)
1992                 return;
1993
1994         ring_buffer_record_disable(buffer);
1995
1996         /* Make sure all commits have finished */
1997         synchronize_rcu();
1998         ring_buffer_reset_cpu(buffer, cpu);
1999
2000         ring_buffer_record_enable(buffer);
2001 }
2002
2003 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 {
2005         struct trace_buffer *buffer = buf->buffer;
2006
2007         if (!buffer)
2008                 return;
2009
2010         ring_buffer_record_disable(buffer);
2011
2012         /* Make sure all commits have finished */
2013         synchronize_rcu();
2014
2015         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2016
2017         ring_buffer_reset_online_cpus(buffer);
2018
2019         ring_buffer_record_enable(buffer);
2020 }
2021
2022 /* Must have trace_types_lock held */
2023 void tracing_reset_all_online_cpus(void)
2024 {
2025         struct trace_array *tr;
2026
2027         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2028                 if (!tr->clear_trace)
2029                         continue;
2030                 tr->clear_trace = false;
2031                 tracing_reset_online_cpus(&tr->array_buffer);
2032 #ifdef CONFIG_TRACER_MAX_TRACE
2033                 tracing_reset_online_cpus(&tr->max_buffer);
2034 #endif
2035         }
2036 }
2037
2038 static int *tgid_map;
2039
2040 #define SAVED_CMDLINES_DEFAULT 128
2041 #define NO_CMDLINE_MAP UINT_MAX
2042 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2043 struct saved_cmdlines_buffer {
2044         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2045         unsigned *map_cmdline_to_pid;
2046         unsigned cmdline_num;
2047         int cmdline_idx;
2048         char *saved_cmdlines;
2049 };
2050 static struct saved_cmdlines_buffer *savedcmd;
2051
2052 /* temporary disable recording */
2053 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2054
2055 static inline char *get_saved_cmdlines(int idx)
2056 {
2057         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2058 }
2059
2060 static inline void set_cmdline(int idx, const char *cmdline)
2061 {
2062         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2063 }
2064
2065 static int allocate_cmdlines_buffer(unsigned int val,
2066                                     struct saved_cmdlines_buffer *s)
2067 {
2068         s->map_cmdline_to_pid = kmalloc_array(val,
2069                                               sizeof(*s->map_cmdline_to_pid),
2070                                               GFP_KERNEL);
2071         if (!s->map_cmdline_to_pid)
2072                 return -ENOMEM;
2073
2074         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2075         if (!s->saved_cmdlines) {
2076                 kfree(s->map_cmdline_to_pid);
2077                 return -ENOMEM;
2078         }
2079
2080         s->cmdline_idx = 0;
2081         s->cmdline_num = val;
2082         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2083                sizeof(s->map_pid_to_cmdline));
2084         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2085                val * sizeof(*s->map_cmdline_to_pid));
2086
2087         return 0;
2088 }
2089
2090 static int trace_create_savedcmd(void)
2091 {
2092         int ret;
2093
2094         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2095         if (!savedcmd)
2096                 return -ENOMEM;
2097
2098         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2099         if (ret < 0) {
2100                 kfree(savedcmd);
2101                 savedcmd = NULL;
2102                 return -ENOMEM;
2103         }
2104
2105         return 0;
2106 }
2107
2108 int is_tracing_stopped(void)
2109 {
2110         return global_trace.stop_count;
2111 }
2112
2113 /**
2114  * tracing_start - quick start of the tracer
2115  *
2116  * If tracing is enabled but was stopped by tracing_stop,
2117  * this will start the tracer back up.
2118  */
2119 void tracing_start(void)
2120 {
2121         struct trace_buffer *buffer;
2122         unsigned long flags;
2123
2124         if (tracing_disabled)
2125                 return;
2126
2127         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2128         if (--global_trace.stop_count) {
2129                 if (global_trace.stop_count < 0) {
2130                         /* Someone screwed up their debugging */
2131                         WARN_ON_ONCE(1);
2132                         global_trace.stop_count = 0;
2133                 }
2134                 goto out;
2135         }
2136
2137         /* Prevent the buffers from switching */
2138         arch_spin_lock(&global_trace.max_lock);
2139
2140         buffer = global_trace.array_buffer.buffer;
2141         if (buffer)
2142                 ring_buffer_record_enable(buffer);
2143
2144 #ifdef CONFIG_TRACER_MAX_TRACE
2145         buffer = global_trace.max_buffer.buffer;
2146         if (buffer)
2147                 ring_buffer_record_enable(buffer);
2148 #endif
2149
2150         arch_spin_unlock(&global_trace.max_lock);
2151
2152  out:
2153         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2154 }
2155
2156 static void tracing_start_tr(struct trace_array *tr)
2157 {
2158         struct trace_buffer *buffer;
2159         unsigned long flags;
2160
2161         if (tracing_disabled)
2162                 return;
2163
2164         /* If global, we need to also start the max tracer */
2165         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2166                 return tracing_start();
2167
2168         raw_spin_lock_irqsave(&tr->start_lock, flags);
2169
2170         if (--tr->stop_count) {
2171                 if (tr->stop_count < 0) {
2172                         /* Someone screwed up their debugging */
2173                         WARN_ON_ONCE(1);
2174                         tr->stop_count = 0;
2175                 }
2176                 goto out;
2177         }
2178
2179         buffer = tr->array_buffer.buffer;
2180         if (buffer)
2181                 ring_buffer_record_enable(buffer);
2182
2183  out:
2184         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2185 }
2186
2187 /**
2188  * tracing_stop - quick stop of the tracer
2189  *
2190  * Light weight way to stop tracing. Use in conjunction with
2191  * tracing_start.
2192  */
2193 void tracing_stop(void)
2194 {
2195         struct trace_buffer *buffer;
2196         unsigned long flags;
2197
2198         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2199         if (global_trace.stop_count++)
2200                 goto out;
2201
2202         /* Prevent the buffers from switching */
2203         arch_spin_lock(&global_trace.max_lock);
2204
2205         buffer = global_trace.array_buffer.buffer;
2206         if (buffer)
2207                 ring_buffer_record_disable(buffer);
2208
2209 #ifdef CONFIG_TRACER_MAX_TRACE
2210         buffer = global_trace.max_buffer.buffer;
2211         if (buffer)
2212                 ring_buffer_record_disable(buffer);
2213 #endif
2214
2215         arch_spin_unlock(&global_trace.max_lock);
2216
2217  out:
2218         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2219 }
2220
2221 static void tracing_stop_tr(struct trace_array *tr)
2222 {
2223         struct trace_buffer *buffer;
2224         unsigned long flags;
2225
2226         /* If global, we need to also stop the max tracer */
2227         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2228                 return tracing_stop();
2229
2230         raw_spin_lock_irqsave(&tr->start_lock, flags);
2231         if (tr->stop_count++)
2232                 goto out;
2233
2234         buffer = tr->array_buffer.buffer;
2235         if (buffer)
2236                 ring_buffer_record_disable(buffer);
2237
2238  out:
2239         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2240 }
2241
2242 static int trace_save_cmdline(struct task_struct *tsk)
2243 {
2244         unsigned pid, idx;
2245
2246         /* treat recording of idle task as a success */
2247         if (!tsk->pid)
2248                 return 1;
2249
2250         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2251                 return 0;
2252
2253         /*
2254          * It's not the end of the world if we don't get
2255          * the lock, but we also don't want to spin
2256          * nor do we want to disable interrupts,
2257          * so if we miss here, then better luck next time.
2258          */
2259         if (!arch_spin_trylock(&trace_cmdline_lock))
2260                 return 0;
2261
2262         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2263         if (idx == NO_CMDLINE_MAP) {
2264                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2265
2266                 /*
2267                  * Check whether the cmdline buffer at idx has a pid
2268                  * mapped. We are going to overwrite that entry so we
2269                  * need to clear the map_pid_to_cmdline. Otherwise we
2270                  * would read the new comm for the old pid.
2271                  */
2272                 pid = savedcmd->map_cmdline_to_pid[idx];
2273                 if (pid != NO_CMDLINE_MAP)
2274                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2275
2276                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2277                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2278
2279                 savedcmd->cmdline_idx = idx;
2280         }
2281
2282         set_cmdline(idx, tsk->comm);
2283
2284         arch_spin_unlock(&trace_cmdline_lock);
2285
2286         return 1;
2287 }
2288
2289 static void __trace_find_cmdline(int pid, char comm[])
2290 {
2291         unsigned map;
2292
2293         if (!pid) {
2294                 strcpy(comm, "<idle>");
2295                 return;
2296         }
2297
2298         if (WARN_ON_ONCE(pid < 0)) {
2299                 strcpy(comm, "<XXX>");
2300                 return;
2301         }
2302
2303         if (pid > PID_MAX_DEFAULT) {
2304                 strcpy(comm, "<...>");
2305                 return;
2306         }
2307
2308         map = savedcmd->map_pid_to_cmdline[pid];
2309         if (map != NO_CMDLINE_MAP)
2310                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2311         else
2312                 strcpy(comm, "<...>");
2313 }
2314
2315 void trace_find_cmdline(int pid, char comm[])
2316 {
2317         preempt_disable();
2318         arch_spin_lock(&trace_cmdline_lock);
2319
2320         __trace_find_cmdline(pid, comm);
2321
2322         arch_spin_unlock(&trace_cmdline_lock);
2323         preempt_enable();
2324 }
2325
2326 int trace_find_tgid(int pid)
2327 {
2328         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2329                 return 0;
2330
2331         return tgid_map[pid];
2332 }
2333
2334 static int trace_save_tgid(struct task_struct *tsk)
2335 {
2336         /* treat recording of idle task as a success */
2337         if (!tsk->pid)
2338                 return 1;
2339
2340         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2341                 return 0;
2342
2343         tgid_map[tsk->pid] = tsk->tgid;
2344         return 1;
2345 }
2346
2347 static bool tracing_record_taskinfo_skip(int flags)
2348 {
2349         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2350                 return true;
2351         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2352                 return true;
2353         if (!__this_cpu_read(trace_taskinfo_save))
2354                 return true;
2355         return false;
2356 }
2357
2358 /**
2359  * tracing_record_taskinfo - record the task info of a task
2360  *
2361  * @task:  task to record
2362  * @flags: TRACE_RECORD_CMDLINE for recording comm
2363  *         TRACE_RECORD_TGID for recording tgid
2364  */
2365 void tracing_record_taskinfo(struct task_struct *task, int flags)
2366 {
2367         bool done;
2368
2369         if (tracing_record_taskinfo_skip(flags))
2370                 return;
2371
2372         /*
2373          * Record as much task information as possible. If some fail, continue
2374          * to try to record the others.
2375          */
2376         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2377         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2378
2379         /* If recording any information failed, retry again soon. */
2380         if (!done)
2381                 return;
2382
2383         __this_cpu_write(trace_taskinfo_save, false);
2384 }
2385
2386 /**
2387  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2388  *
2389  * @prev: previous task during sched_switch
2390  * @next: next task during sched_switch
2391  * @flags: TRACE_RECORD_CMDLINE for recording comm
2392  *         TRACE_RECORD_TGID for recording tgid
2393  */
2394 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2395                                           struct task_struct *next, int flags)
2396 {
2397         bool done;
2398
2399         if (tracing_record_taskinfo_skip(flags))
2400                 return;
2401
2402         /*
2403          * Record as much task information as possible. If some fail, continue
2404          * to try to record the others.
2405          */
2406         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2407         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2408         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2409         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2410
2411         /* If recording any information failed, retry again soon. */
2412         if (!done)
2413                 return;
2414
2415         __this_cpu_write(trace_taskinfo_save, false);
2416 }
2417
2418 /* Helpers to record a specific task information */
2419 void tracing_record_cmdline(struct task_struct *task)
2420 {
2421         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2422 }
2423
2424 void tracing_record_tgid(struct task_struct *task)
2425 {
2426         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2427 }
2428
2429 /*
2430  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2431  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2432  * simplifies those functions and keeps them in sync.
2433  */
2434 enum print_line_t trace_handle_return(struct trace_seq *s)
2435 {
2436         return trace_seq_has_overflowed(s) ?
2437                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2438 }
2439 EXPORT_SYMBOL_GPL(trace_handle_return);
2440
2441 void
2442 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2443                              unsigned long flags, int pc)
2444 {
2445         struct task_struct *tsk = current;
2446
2447         entry->preempt_count            = pc & 0xff;
2448         entry->pid                      = (tsk) ? tsk->pid : 0;
2449         entry->type                     = type;
2450         entry->flags =
2451 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2452                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2453 #else
2454                 TRACE_FLAG_IRQS_NOSUPPORT |
2455 #endif
2456                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2457                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2458                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2459                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2460                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2461 }
2462 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2463
2464 struct ring_buffer_event *
2465 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2466                           int type,
2467                           unsigned long len,
2468                           unsigned long flags, int pc)
2469 {
2470         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2471 }
2472
2473 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2474 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2475 static int trace_buffered_event_ref;
2476
2477 /**
2478  * trace_buffered_event_enable - enable buffering events
2479  *
2480  * When events are being filtered, it is quicker to use a temporary
2481  * buffer to write the event data into if there's a likely chance
2482  * that it will not be committed. The discard of the ring buffer
2483  * is not as fast as committing, and is much slower than copying
2484  * a commit.
2485  *
2486  * When an event is to be filtered, allocate per cpu buffers to
2487  * write the event data into, and if the event is filtered and discarded
2488  * it is simply dropped, otherwise, the entire data is to be committed
2489  * in one shot.
2490  */
2491 void trace_buffered_event_enable(void)
2492 {
2493         struct ring_buffer_event *event;
2494         struct page *page;
2495         int cpu;
2496
2497         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2498
2499         if (trace_buffered_event_ref++)
2500                 return;
2501
2502         for_each_tracing_cpu(cpu) {
2503                 page = alloc_pages_node(cpu_to_node(cpu),
2504                                         GFP_KERNEL | __GFP_NORETRY, 0);
2505                 if (!page)
2506                         goto failed;
2507
2508                 event = page_address(page);
2509                 memset(event, 0, sizeof(*event));
2510
2511                 per_cpu(trace_buffered_event, cpu) = event;
2512
2513                 preempt_disable();
2514                 if (cpu == smp_processor_id() &&
2515                     this_cpu_read(trace_buffered_event) !=
2516                     per_cpu(trace_buffered_event, cpu))
2517                         WARN_ON_ONCE(1);
2518                 preempt_enable();
2519         }
2520
2521         return;
2522  failed:
2523         trace_buffered_event_disable();
2524 }
2525
2526 static void enable_trace_buffered_event(void *data)
2527 {
2528         /* Probably not needed, but do it anyway */
2529         smp_rmb();
2530         this_cpu_dec(trace_buffered_event_cnt);
2531 }
2532
2533 static void disable_trace_buffered_event(void *data)
2534 {
2535         this_cpu_inc(trace_buffered_event_cnt);
2536 }
2537
2538 /**
2539  * trace_buffered_event_disable - disable buffering events
2540  *
2541  * When a filter is removed, it is faster to not use the buffered
2542  * events, and to commit directly into the ring buffer. Free up
2543  * the temp buffers when there are no more users. This requires
2544  * special synchronization with current events.
2545  */
2546 void trace_buffered_event_disable(void)
2547 {
2548         int cpu;
2549
2550         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2551
2552         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2553                 return;
2554
2555         if (--trace_buffered_event_ref)
2556                 return;
2557
2558         preempt_disable();
2559         /* For each CPU, set the buffer as used. */
2560         smp_call_function_many(tracing_buffer_mask,
2561                                disable_trace_buffered_event, NULL, 1);
2562         preempt_enable();
2563
2564         /* Wait for all current users to finish */
2565         synchronize_rcu();
2566
2567         for_each_tracing_cpu(cpu) {
2568                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2569                 per_cpu(trace_buffered_event, cpu) = NULL;
2570         }
2571         /*
2572          * Make sure trace_buffered_event is NULL before clearing
2573          * trace_buffered_event_cnt.
2574          */
2575         smp_wmb();
2576
2577         preempt_disable();
2578         /* Do the work on each cpu */
2579         smp_call_function_many(tracing_buffer_mask,
2580                                enable_trace_buffered_event, NULL, 1);
2581         preempt_enable();
2582 }
2583
2584 static struct trace_buffer *temp_buffer;
2585
2586 struct ring_buffer_event *
2587 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2588                           struct trace_event_file *trace_file,
2589                           int type, unsigned long len,
2590                           unsigned long flags, int pc)
2591 {
2592         struct ring_buffer_event *entry;
2593         int val;
2594
2595         *current_rb = trace_file->tr->array_buffer.buffer;
2596
2597         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2598              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2599             (entry = this_cpu_read(trace_buffered_event))) {
2600                 /* Try to use the per cpu buffer first */
2601                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2602                 if (val == 1) {
2603                         trace_event_setup(entry, type, flags, pc);
2604                         entry->array[0] = len;
2605                         return entry;
2606                 }
2607                 this_cpu_dec(trace_buffered_event_cnt);
2608         }
2609
2610         entry = __trace_buffer_lock_reserve(*current_rb,
2611                                             type, len, flags, pc);
2612         /*
2613          * If tracing is off, but we have triggers enabled
2614          * we still need to look at the event data. Use the temp_buffer
2615          * to store the trace event for the tigger to use. It's recusive
2616          * safe and will not be recorded anywhere.
2617          */
2618         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2619                 *current_rb = temp_buffer;
2620                 entry = __trace_buffer_lock_reserve(*current_rb,
2621                                                     type, len, flags, pc);
2622         }
2623         return entry;
2624 }
2625 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2626
2627 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2628 static DEFINE_MUTEX(tracepoint_printk_mutex);
2629
2630 static void output_printk(struct trace_event_buffer *fbuffer)
2631 {
2632         struct trace_event_call *event_call;
2633         struct trace_event_file *file;
2634         struct trace_event *event;
2635         unsigned long flags;
2636         struct trace_iterator *iter = tracepoint_print_iter;
2637
2638         /* We should never get here if iter is NULL */
2639         if (WARN_ON_ONCE(!iter))
2640                 return;
2641
2642         event_call = fbuffer->trace_file->event_call;
2643         if (!event_call || !event_call->event.funcs ||
2644             !event_call->event.funcs->trace)
2645                 return;
2646
2647         file = fbuffer->trace_file;
2648         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2649             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2650              !filter_match_preds(file->filter, fbuffer->entry)))
2651                 return;
2652
2653         event = &fbuffer->trace_file->event_call->event;
2654
2655         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2656         trace_seq_init(&iter->seq);
2657         iter->ent = fbuffer->entry;
2658         event_call->event.funcs->trace(iter, 0, event);
2659         trace_seq_putc(&iter->seq, 0);
2660         printk("%s", iter->seq.buffer);
2661
2662         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2663 }
2664
2665 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2666                              void *buffer, size_t *lenp,
2667                              loff_t *ppos)
2668 {
2669         int save_tracepoint_printk;
2670         int ret;
2671
2672         mutex_lock(&tracepoint_printk_mutex);
2673         save_tracepoint_printk = tracepoint_printk;
2674
2675         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2676
2677         /*
2678          * This will force exiting early, as tracepoint_printk
2679          * is always zero when tracepoint_printk_iter is not allocated
2680          */
2681         if (!tracepoint_print_iter)
2682                 tracepoint_printk = 0;
2683
2684         if (save_tracepoint_printk == tracepoint_printk)
2685                 goto out;
2686
2687         if (tracepoint_printk)
2688                 static_key_enable(&tracepoint_printk_key.key);
2689         else
2690                 static_key_disable(&tracepoint_printk_key.key);
2691
2692  out:
2693         mutex_unlock(&tracepoint_printk_mutex);
2694
2695         return ret;
2696 }
2697
2698 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2699 {
2700         if (static_key_false(&tracepoint_printk_key.key))
2701                 output_printk(fbuffer);
2702
2703         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2704                                     fbuffer->event, fbuffer->entry,
2705                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2706 }
2707 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2708
2709 /*
2710  * Skip 3:
2711  *
2712  *   trace_buffer_unlock_commit_regs()
2713  *   trace_event_buffer_commit()
2714  *   trace_event_raw_event_xxx()
2715  */
2716 # define STACK_SKIP 3
2717
2718 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2719                                      struct trace_buffer *buffer,
2720                                      struct ring_buffer_event *event,
2721                                      unsigned long flags, int pc,
2722                                      struct pt_regs *regs)
2723 {
2724         __buffer_unlock_commit(buffer, event);
2725
2726         /*
2727          * If regs is not set, then skip the necessary functions.
2728          * Note, we can still get here via blktrace, wakeup tracer
2729          * and mmiotrace, but that's ok if they lose a function or
2730          * two. They are not that meaningful.
2731          */
2732         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2733         ftrace_trace_userstack(buffer, flags, pc);
2734 }
2735
2736 /*
2737  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2738  */
2739 void
2740 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2741                                    struct ring_buffer_event *event)
2742 {
2743         __buffer_unlock_commit(buffer, event);
2744 }
2745
2746 static void
2747 trace_process_export(struct trace_export *export,
2748                struct ring_buffer_event *event)
2749 {
2750         struct trace_entry *entry;
2751         unsigned int size = 0;
2752
2753         entry = ring_buffer_event_data(event);
2754         size = ring_buffer_event_length(event);
2755         export->write(export, entry, size);
2756 }
2757
2758 static DEFINE_MUTEX(ftrace_export_lock);
2759
2760 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2761
2762 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2763
2764 static inline void ftrace_exports_enable(void)
2765 {
2766         static_branch_enable(&ftrace_exports_enabled);
2767 }
2768
2769 static inline void ftrace_exports_disable(void)
2770 {
2771         static_branch_disable(&ftrace_exports_enabled);
2772 }
2773
2774 static void ftrace_exports(struct ring_buffer_event *event)
2775 {
2776         struct trace_export *export;
2777
2778         preempt_disable_notrace();
2779
2780         export = rcu_dereference_raw_check(ftrace_exports_list);
2781         while (export) {
2782                 trace_process_export(export, event);
2783                 export = rcu_dereference_raw_check(export->next);
2784         }
2785
2786         preempt_enable_notrace();
2787 }
2788
2789 static inline void
2790 add_trace_export(struct trace_export **list, struct trace_export *export)
2791 {
2792         rcu_assign_pointer(export->next, *list);
2793         /*
2794          * We are entering export into the list but another
2795          * CPU might be walking that list. We need to make sure
2796          * the export->next pointer is valid before another CPU sees
2797          * the export pointer included into the list.
2798          */
2799         rcu_assign_pointer(*list, export);
2800 }
2801
2802 static inline int
2803 rm_trace_export(struct trace_export **list, struct trace_export *export)
2804 {
2805         struct trace_export **p;
2806
2807         for (p = list; *p != NULL; p = &(*p)->next)
2808                 if (*p == export)
2809                         break;
2810
2811         if (*p != export)
2812                 return -1;
2813
2814         rcu_assign_pointer(*p, (*p)->next);
2815
2816         return 0;
2817 }
2818
2819 static inline void
2820 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2821 {
2822         if (*list == NULL)
2823                 ftrace_exports_enable();
2824
2825         add_trace_export(list, export);
2826 }
2827
2828 static inline int
2829 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2830 {
2831         int ret;
2832
2833         ret = rm_trace_export(list, export);
2834         if (*list == NULL)
2835                 ftrace_exports_disable();
2836
2837         return ret;
2838 }
2839
2840 int register_ftrace_export(struct trace_export *export)
2841 {
2842         if (WARN_ON_ONCE(!export->write))
2843                 return -1;
2844
2845         mutex_lock(&ftrace_export_lock);
2846
2847         add_ftrace_export(&ftrace_exports_list, export);
2848
2849         mutex_unlock(&ftrace_export_lock);
2850
2851         return 0;
2852 }
2853 EXPORT_SYMBOL_GPL(register_ftrace_export);
2854
2855 int unregister_ftrace_export(struct trace_export *export)
2856 {
2857         int ret;
2858
2859         mutex_lock(&ftrace_export_lock);
2860
2861         ret = rm_ftrace_export(&ftrace_exports_list, export);
2862
2863         mutex_unlock(&ftrace_export_lock);
2864
2865         return ret;
2866 }
2867 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2868
2869 void
2870 trace_function(struct trace_array *tr,
2871                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2872                int pc)
2873 {
2874         struct trace_event_call *call = &event_function;
2875         struct trace_buffer *buffer = tr->array_buffer.buffer;
2876         struct ring_buffer_event *event;
2877         struct ftrace_entry *entry;
2878
2879         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2880                                             flags, pc);
2881         if (!event)
2882                 return;
2883         entry   = ring_buffer_event_data(event);
2884         entry->ip                       = ip;
2885         entry->parent_ip                = parent_ip;
2886
2887         if (!call_filter_check_discard(call, entry, buffer, event)) {
2888                 if (static_branch_unlikely(&ftrace_exports_enabled))
2889                         ftrace_exports(event);
2890                 __buffer_unlock_commit(buffer, event);
2891         }
2892 }
2893
2894 #ifdef CONFIG_STACKTRACE
2895
2896 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2897 #define FTRACE_KSTACK_NESTING   4
2898
2899 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2900
2901 struct ftrace_stack {
2902         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2903 };
2904
2905
2906 struct ftrace_stacks {
2907         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2908 };
2909
2910 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2911 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2912
2913 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2914                                  unsigned long flags,
2915                                  int skip, int pc, struct pt_regs *regs)
2916 {
2917         struct trace_event_call *call = &event_kernel_stack;
2918         struct ring_buffer_event *event;
2919         unsigned int size, nr_entries;
2920         struct ftrace_stack *fstack;
2921         struct stack_entry *entry;
2922         int stackidx;
2923
2924         /*
2925          * Add one, for this function and the call to save_stack_trace()
2926          * If regs is set, then these functions will not be in the way.
2927          */
2928 #ifndef CONFIG_UNWINDER_ORC
2929         if (!regs)
2930                 skip++;
2931 #endif
2932
2933         /*
2934          * Since events can happen in NMIs there's no safe way to
2935          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2936          * or NMI comes in, it will just have to use the default
2937          * FTRACE_STACK_SIZE.
2938          */
2939         preempt_disable_notrace();
2940
2941         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2942
2943         /* This should never happen. If it does, yell once and skip */
2944         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2945                 goto out;
2946
2947         /*
2948          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2949          * interrupt will either see the value pre increment or post
2950          * increment. If the interrupt happens pre increment it will have
2951          * restored the counter when it returns.  We just need a barrier to
2952          * keep gcc from moving things around.
2953          */
2954         barrier();
2955
2956         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2957         size = ARRAY_SIZE(fstack->calls);
2958
2959         if (regs) {
2960                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2961                                                    size, skip);
2962         } else {
2963                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2964         }
2965
2966         size = nr_entries * sizeof(unsigned long);
2967         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2968                                             sizeof(*entry) + size, flags, pc);
2969         if (!event)
2970                 goto out;
2971         entry = ring_buffer_event_data(event);
2972
2973         memcpy(&entry->caller, fstack->calls, size);
2974         entry->size = nr_entries;
2975
2976         if (!call_filter_check_discard(call, entry, buffer, event))
2977                 __buffer_unlock_commit(buffer, event);
2978
2979  out:
2980         /* Again, don't let gcc optimize things here */
2981         barrier();
2982         __this_cpu_dec(ftrace_stack_reserve);
2983         preempt_enable_notrace();
2984
2985 }
2986
2987 static inline void ftrace_trace_stack(struct trace_array *tr,
2988                                       struct trace_buffer *buffer,
2989                                       unsigned long flags,
2990                                       int skip, int pc, struct pt_regs *regs)
2991 {
2992         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2993                 return;
2994
2995         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2996 }
2997
2998 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2999                    int pc)
3000 {
3001         struct trace_buffer *buffer = tr->array_buffer.buffer;
3002
3003         if (rcu_is_watching()) {
3004                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3005                 return;
3006         }
3007
3008         /*
3009          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3010          * but if the above rcu_is_watching() failed, then the NMI
3011          * triggered someplace critical, and rcu_irq_enter() should
3012          * not be called from NMI.
3013          */
3014         if (unlikely(in_nmi()))
3015                 return;
3016
3017         rcu_irq_enter_irqson();
3018         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3019         rcu_irq_exit_irqson();
3020 }
3021
3022 /**
3023  * trace_dump_stack - record a stack back trace in the trace buffer
3024  * @skip: Number of functions to skip (helper handlers)
3025  */
3026 void trace_dump_stack(int skip)
3027 {
3028         unsigned long flags;
3029
3030         if (tracing_disabled || tracing_selftest_running)
3031                 return;
3032
3033         local_save_flags(flags);
3034
3035 #ifndef CONFIG_UNWINDER_ORC
3036         /* Skip 1 to skip this function. */
3037         skip++;
3038 #endif
3039         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3040                              flags, skip, preempt_count(), NULL);
3041 }
3042 EXPORT_SYMBOL_GPL(trace_dump_stack);
3043
3044 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3045 static DEFINE_PER_CPU(int, user_stack_count);
3046
3047 static void
3048 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3049 {
3050         struct trace_event_call *call = &event_user_stack;
3051         struct ring_buffer_event *event;
3052         struct userstack_entry *entry;
3053
3054         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3055                 return;
3056
3057         /*
3058          * NMIs can not handle page faults, even with fix ups.
3059          * The save user stack can (and often does) fault.
3060          */
3061         if (unlikely(in_nmi()))
3062                 return;
3063
3064         /*
3065          * prevent recursion, since the user stack tracing may
3066          * trigger other kernel events.
3067          */
3068         preempt_disable();
3069         if (__this_cpu_read(user_stack_count))
3070                 goto out;
3071
3072         __this_cpu_inc(user_stack_count);
3073
3074         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3075                                             sizeof(*entry), flags, pc);
3076         if (!event)
3077                 goto out_drop_count;
3078         entry   = ring_buffer_event_data(event);
3079
3080         entry->tgid             = current->tgid;
3081         memset(&entry->caller, 0, sizeof(entry->caller));
3082
3083         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3084         if (!call_filter_check_discard(call, entry, buffer, event))
3085                 __buffer_unlock_commit(buffer, event);
3086
3087  out_drop_count:
3088         __this_cpu_dec(user_stack_count);
3089  out:
3090         preempt_enable();
3091 }
3092 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3093 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3094                                    unsigned long flags, int pc)
3095 {
3096 }
3097 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3098
3099 #endif /* CONFIG_STACKTRACE */
3100
3101 /* created for use with alloc_percpu */
3102 struct trace_buffer_struct {
3103         int nesting;
3104         char buffer[4][TRACE_BUF_SIZE];
3105 };
3106
3107 static struct trace_buffer_struct *trace_percpu_buffer;
3108
3109 /*
3110  * Thise allows for lockless recording.  If we're nested too deeply, then
3111  * this returns NULL.
3112  */
3113 static char *get_trace_buf(void)
3114 {
3115         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3116
3117         if (!buffer || buffer->nesting >= 4)
3118                 return NULL;
3119
3120         buffer->nesting++;
3121
3122         /* Interrupts must see nesting incremented before we use the buffer */
3123         barrier();
3124         return &buffer->buffer[buffer->nesting][0];
3125 }
3126
3127 static void put_trace_buf(void)
3128 {
3129         /* Don't let the decrement of nesting leak before this */
3130         barrier();
3131         this_cpu_dec(trace_percpu_buffer->nesting);
3132 }
3133
3134 static int alloc_percpu_trace_buffer(void)
3135 {
3136         struct trace_buffer_struct *buffers;
3137
3138         buffers = alloc_percpu(struct trace_buffer_struct);
3139         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3140                 return -ENOMEM;
3141
3142         trace_percpu_buffer = buffers;
3143         return 0;
3144 }
3145
3146 static int buffers_allocated;
3147
3148 void trace_printk_init_buffers(void)
3149 {
3150         if (buffers_allocated)
3151                 return;
3152
3153         if (alloc_percpu_trace_buffer())
3154                 return;
3155
3156         /* trace_printk() is for debug use only. Don't use it in production. */
3157
3158         pr_warn("\n");
3159         pr_warn("**********************************************************\n");
3160         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3161         pr_warn("**                                                      **\n");
3162         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3163         pr_warn("**                                                      **\n");
3164         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3165         pr_warn("** unsafe for production use.                           **\n");
3166         pr_warn("**                                                      **\n");
3167         pr_warn("** If you see this message and you are not debugging    **\n");
3168         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3169         pr_warn("**                                                      **\n");
3170         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3171         pr_warn("**********************************************************\n");
3172
3173         /* Expand the buffers to set size */
3174         tracing_update_buffers();
3175
3176         buffers_allocated = 1;
3177
3178         /*
3179          * trace_printk_init_buffers() can be called by modules.
3180          * If that happens, then we need to start cmdline recording
3181          * directly here. If the global_trace.buffer is already
3182          * allocated here, then this was called by module code.
3183          */
3184         if (global_trace.array_buffer.buffer)
3185                 tracing_start_cmdline_record();
3186 }
3187 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3188
3189 void trace_printk_start_comm(void)
3190 {
3191         /* Start tracing comms if trace printk is set */
3192         if (!buffers_allocated)
3193                 return;
3194         tracing_start_cmdline_record();
3195 }
3196
3197 static void trace_printk_start_stop_comm(int enabled)
3198 {
3199         if (!buffers_allocated)
3200                 return;
3201
3202         if (enabled)
3203                 tracing_start_cmdline_record();
3204         else
3205                 tracing_stop_cmdline_record();
3206 }
3207
3208 /**
3209  * trace_vbprintk - write binary msg to tracing buffer
3210  * @ip:    The address of the caller
3211  * @fmt:   The string format to write to the buffer
3212  * @args:  Arguments for @fmt
3213  */
3214 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3215 {
3216         struct trace_event_call *call = &event_bprint;
3217         struct ring_buffer_event *event;
3218         struct trace_buffer *buffer;
3219         struct trace_array *tr = &global_trace;
3220         struct bprint_entry *entry;
3221         unsigned long flags;
3222         char *tbuffer;
3223         int len = 0, size, pc;
3224
3225         if (unlikely(tracing_selftest_running || tracing_disabled))
3226                 return 0;
3227
3228         /* Don't pollute graph traces with trace_vprintk internals */
3229         pause_graph_tracing();
3230
3231         pc = preempt_count();
3232         preempt_disable_notrace();
3233
3234         tbuffer = get_trace_buf();
3235         if (!tbuffer) {
3236                 len = 0;
3237                 goto out_nobuffer;
3238         }
3239
3240         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3241
3242         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3243                 goto out_put;
3244
3245         local_save_flags(flags);
3246         size = sizeof(*entry) + sizeof(u32) * len;
3247         buffer = tr->array_buffer.buffer;
3248         ring_buffer_nest_start(buffer);
3249         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3250                                             flags, pc);
3251         if (!event)
3252                 goto out;
3253         entry = ring_buffer_event_data(event);
3254         entry->ip                       = ip;
3255         entry->fmt                      = fmt;
3256
3257         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3258         if (!call_filter_check_discard(call, entry, buffer, event)) {
3259                 __buffer_unlock_commit(buffer, event);
3260                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3261         }
3262
3263 out:
3264         ring_buffer_nest_end(buffer);
3265 out_put:
3266         put_trace_buf();
3267
3268 out_nobuffer:
3269         preempt_enable_notrace();
3270         unpause_graph_tracing();
3271
3272         return len;
3273 }
3274 EXPORT_SYMBOL_GPL(trace_vbprintk);
3275
3276 __printf(3, 0)
3277 static int
3278 __trace_array_vprintk(struct trace_buffer *buffer,
3279                       unsigned long ip, const char *fmt, va_list args)
3280 {
3281         struct trace_event_call *call = &event_print;
3282         struct ring_buffer_event *event;
3283         int len = 0, size, pc;
3284         struct print_entry *entry;
3285         unsigned long flags;
3286         char *tbuffer;
3287
3288         if (tracing_disabled || tracing_selftest_running)
3289                 return 0;
3290
3291         /* Don't pollute graph traces with trace_vprintk internals */
3292         pause_graph_tracing();
3293
3294         pc = preempt_count();
3295         preempt_disable_notrace();
3296
3297
3298         tbuffer = get_trace_buf();
3299         if (!tbuffer) {
3300                 len = 0;
3301                 goto out_nobuffer;
3302         }
3303
3304         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3305
3306         local_save_flags(flags);
3307         size = sizeof(*entry) + len + 1;
3308         ring_buffer_nest_start(buffer);
3309         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3310                                             flags, pc);
3311         if (!event)
3312                 goto out;
3313         entry = ring_buffer_event_data(event);
3314         entry->ip = ip;
3315
3316         memcpy(&entry->buf, tbuffer, len + 1);
3317         if (!call_filter_check_discard(call, entry, buffer, event)) {
3318                 __buffer_unlock_commit(buffer, event);
3319                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3320         }
3321
3322 out:
3323         ring_buffer_nest_end(buffer);
3324         put_trace_buf();
3325
3326 out_nobuffer:
3327         preempt_enable_notrace();
3328         unpause_graph_tracing();
3329
3330         return len;
3331 }
3332
3333 __printf(3, 0)
3334 int trace_array_vprintk(struct trace_array *tr,
3335                         unsigned long ip, const char *fmt, va_list args)
3336 {
3337         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3338 }
3339
3340 __printf(3, 0)
3341 int trace_array_printk(struct trace_array *tr,
3342                        unsigned long ip, const char *fmt, ...)
3343 {
3344         int ret;
3345         va_list ap;
3346
3347         if (!tr)
3348                 return -ENOENT;
3349
3350         /* This is only allowed for created instances */
3351         if (tr == &global_trace)
3352                 return 0;
3353
3354         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3355                 return 0;
3356
3357         va_start(ap, fmt);
3358         ret = trace_array_vprintk(tr, ip, fmt, ap);
3359         va_end(ap);
3360         return ret;
3361 }
3362 EXPORT_SYMBOL_GPL(trace_array_printk);
3363
3364 __printf(3, 4)
3365 int trace_array_printk_buf(struct trace_buffer *buffer,
3366                            unsigned long ip, const char *fmt, ...)
3367 {
3368         int ret;
3369         va_list ap;
3370
3371         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3372                 return 0;
3373
3374         va_start(ap, fmt);
3375         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3376         va_end(ap);
3377         return ret;
3378 }
3379
3380 __printf(2, 0)
3381 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3382 {
3383         return trace_array_vprintk(&global_trace, ip, fmt, args);
3384 }
3385 EXPORT_SYMBOL_GPL(trace_vprintk);
3386
3387 static void trace_iterator_increment(struct trace_iterator *iter)
3388 {
3389         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3390
3391         iter->idx++;
3392         if (buf_iter)
3393                 ring_buffer_iter_advance(buf_iter);
3394 }
3395
3396 static struct trace_entry *
3397 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3398                 unsigned long *lost_events)
3399 {
3400         struct ring_buffer_event *event;
3401         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3402
3403         if (buf_iter) {
3404                 event = ring_buffer_iter_peek(buf_iter, ts);
3405                 if (lost_events)
3406                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3407                                 (unsigned long)-1 : 0;
3408         } else {
3409                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3410                                          lost_events);
3411         }
3412
3413         if (event) {
3414                 iter->ent_size = ring_buffer_event_length(event);
3415                 return ring_buffer_event_data(event);
3416         }
3417         iter->ent_size = 0;
3418         return NULL;
3419 }
3420
3421 static struct trace_entry *
3422 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3423                   unsigned long *missing_events, u64 *ent_ts)
3424 {
3425         struct trace_buffer *buffer = iter->array_buffer->buffer;
3426         struct trace_entry *ent, *next = NULL;
3427         unsigned long lost_events = 0, next_lost = 0;
3428         int cpu_file = iter->cpu_file;
3429         u64 next_ts = 0, ts;
3430         int next_cpu = -1;
3431         int next_size = 0;
3432         int cpu;
3433
3434         /*
3435          * If we are in a per_cpu trace file, don't bother by iterating over
3436          * all cpu and peek directly.
3437          */
3438         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3439                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3440                         return NULL;
3441                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3442                 if (ent_cpu)
3443                         *ent_cpu = cpu_file;
3444
3445                 return ent;
3446         }
3447
3448         for_each_tracing_cpu(cpu) {
3449
3450                 if (ring_buffer_empty_cpu(buffer, cpu))
3451                         continue;
3452
3453                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3454
3455                 /*
3456                  * Pick the entry with the smallest timestamp:
3457                  */
3458                 if (ent && (!next || ts < next_ts)) {
3459                         next = ent;
3460                         next_cpu = cpu;
3461                         next_ts = ts;
3462                         next_lost = lost_events;
3463                         next_size = iter->ent_size;
3464                 }
3465         }
3466
3467         iter->ent_size = next_size;
3468
3469         if (ent_cpu)
3470                 *ent_cpu = next_cpu;
3471
3472         if (ent_ts)
3473                 *ent_ts = next_ts;
3474
3475         if (missing_events)
3476                 *missing_events = next_lost;
3477
3478         return next;
3479 }
3480
3481 #define STATIC_TEMP_BUF_SIZE    128
3482 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3483
3484 /* Find the next real entry, without updating the iterator itself */
3485 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3486                                           int *ent_cpu, u64 *ent_ts)
3487 {
3488         /* __find_next_entry will reset ent_size */
3489         int ent_size = iter->ent_size;
3490         struct trace_entry *entry;
3491
3492         /*
3493          * If called from ftrace_dump(), then the iter->temp buffer
3494          * will be the static_temp_buf and not created from kmalloc.
3495          * If the entry size is greater than the buffer, we can
3496          * not save it. Just return NULL in that case. This is only
3497          * used to add markers when two consecutive events' time
3498          * stamps have a large delta. See trace_print_lat_context()
3499          */
3500         if (iter->temp == static_temp_buf &&
3501             STATIC_TEMP_BUF_SIZE < ent_size)
3502                 return NULL;
3503
3504         /*
3505          * The __find_next_entry() may call peek_next_entry(), which may
3506          * call ring_buffer_peek() that may make the contents of iter->ent
3507          * undefined. Need to copy iter->ent now.
3508          */
3509         if (iter->ent && iter->ent != iter->temp) {
3510                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3511                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3512                         kfree(iter->temp);
3513                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3514                         if (!iter->temp)
3515                                 return NULL;
3516                 }
3517                 memcpy(iter->temp, iter->ent, iter->ent_size);
3518                 iter->temp_size = iter->ent_size;
3519                 iter->ent = iter->temp;
3520         }
3521         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3522         /* Put back the original ent_size */
3523         iter->ent_size = ent_size;
3524
3525         return entry;
3526 }
3527
3528 /* Find the next real entry, and increment the iterator to the next entry */
3529 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3530 {
3531         iter->ent = __find_next_entry(iter, &iter->cpu,
3532                                       &iter->lost_events, &iter->ts);
3533
3534         if (iter->ent)
3535                 trace_iterator_increment(iter);
3536
3537         return iter->ent ? iter : NULL;
3538 }
3539
3540 static void trace_consume(struct trace_iterator *iter)
3541 {
3542         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3543                             &iter->lost_events);
3544 }
3545
3546 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3547 {
3548         struct trace_iterator *iter = m->private;
3549         int i = (int)*pos;
3550         void *ent;
3551
3552         WARN_ON_ONCE(iter->leftover);
3553
3554         (*pos)++;
3555
3556         /* can't go backwards */
3557         if (iter->idx > i)
3558                 return NULL;
3559
3560         if (iter->idx < 0)
3561                 ent = trace_find_next_entry_inc(iter);
3562         else
3563                 ent = iter;
3564
3565         while (ent && iter->idx < i)
3566                 ent = trace_find_next_entry_inc(iter);
3567
3568         iter->pos = *pos;
3569
3570         return ent;
3571 }
3572
3573 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3574 {
3575         struct ring_buffer_iter *buf_iter;
3576         unsigned long entries = 0;
3577         u64 ts;
3578
3579         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3580
3581         buf_iter = trace_buffer_iter(iter, cpu);
3582         if (!buf_iter)
3583                 return;
3584
3585         ring_buffer_iter_reset(buf_iter);
3586
3587         /*
3588          * We could have the case with the max latency tracers
3589          * that a reset never took place on a cpu. This is evident
3590          * by the timestamp being before the start of the buffer.
3591          */
3592         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3593                 if (ts >= iter->array_buffer->time_start)
3594                         break;
3595                 entries++;
3596                 ring_buffer_iter_advance(buf_iter);
3597         }
3598
3599         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3600 }
3601
3602 /*
3603  * The current tracer is copied to avoid a global locking
3604  * all around.
3605  */
3606 static void *s_start(struct seq_file *m, loff_t *pos)
3607 {
3608         struct trace_iterator *iter = m->private;
3609         struct trace_array *tr = iter->tr;
3610         int cpu_file = iter->cpu_file;
3611         void *p = NULL;
3612         loff_t l = 0;
3613         int cpu;
3614
3615         /*
3616          * copy the tracer to avoid using a global lock all around.
3617          * iter->trace is a copy of current_trace, the pointer to the
3618          * name may be used instead of a strcmp(), as iter->trace->name
3619          * will point to the same string as current_trace->name.
3620          */
3621         mutex_lock(&trace_types_lock);
3622         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3623                 *iter->trace = *tr->current_trace;
3624         mutex_unlock(&trace_types_lock);
3625
3626 #ifdef CONFIG_TRACER_MAX_TRACE
3627         if (iter->snapshot && iter->trace->use_max_tr)
3628                 return ERR_PTR(-EBUSY);
3629 #endif
3630
3631         if (!iter->snapshot)
3632                 atomic_inc(&trace_record_taskinfo_disabled);
3633
3634         if (*pos != iter->pos) {
3635                 iter->ent = NULL;
3636                 iter->cpu = 0;
3637                 iter->idx = -1;
3638
3639                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3640                         for_each_tracing_cpu(cpu)
3641                                 tracing_iter_reset(iter, cpu);
3642                 } else
3643                         tracing_iter_reset(iter, cpu_file);
3644
3645                 iter->leftover = 0;
3646                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3647                         ;
3648
3649         } else {
3650                 /*
3651                  * If we overflowed the seq_file before, then we want
3652                  * to just reuse the trace_seq buffer again.
3653                  */
3654                 if (iter->leftover)
3655                         p = iter;
3656                 else {
3657                         l = *pos - 1;
3658                         p = s_next(m, p, &l);
3659                 }
3660         }
3661
3662         trace_event_read_lock();
3663         trace_access_lock(cpu_file);
3664         return p;
3665 }
3666
3667 static void s_stop(struct seq_file *m, void *p)
3668 {
3669         struct trace_iterator *iter = m->private;
3670
3671 #ifdef CONFIG_TRACER_MAX_TRACE
3672         if (iter->snapshot && iter->trace->use_max_tr)
3673                 return;
3674 #endif
3675
3676         if (!iter->snapshot)
3677                 atomic_dec(&trace_record_taskinfo_disabled);
3678
3679         trace_access_unlock(iter->cpu_file);
3680         trace_event_read_unlock();
3681 }
3682
3683 static void
3684 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3685                       unsigned long *entries, int cpu)
3686 {
3687         unsigned long count;
3688
3689         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3690         /*
3691          * If this buffer has skipped entries, then we hold all
3692          * entries for the trace and we need to ignore the
3693          * ones before the time stamp.
3694          */
3695         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3696                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3697                 /* total is the same as the entries */
3698                 *total = count;
3699         } else
3700                 *total = count +
3701                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3702         *entries = count;
3703 }
3704
3705 static void
3706 get_total_entries(struct array_buffer *buf,
3707                   unsigned long *total, unsigned long *entries)
3708 {
3709         unsigned long t, e;
3710         int cpu;
3711
3712         *total = 0;
3713         *entries = 0;
3714
3715         for_each_tracing_cpu(cpu) {
3716                 get_total_entries_cpu(buf, &t, &e, cpu);
3717                 *total += t;
3718                 *entries += e;
3719         }
3720 }
3721
3722 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3723 {
3724         unsigned long total, entries;
3725
3726         if (!tr)
3727                 tr = &global_trace;
3728
3729         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3730
3731         return entries;
3732 }
3733
3734 unsigned long trace_total_entries(struct trace_array *tr)
3735 {
3736         unsigned long total, entries;
3737
3738         if (!tr)
3739                 tr = &global_trace;
3740
3741         get_total_entries(&tr->array_buffer, &total, &entries);
3742
3743         return entries;
3744 }
3745
3746 static void print_lat_help_header(struct seq_file *m)
3747 {
3748         seq_puts(m, "#                  _------=> CPU#            \n"
3749                     "#                 / _-----=> irqs-off        \n"
3750                     "#                | / _----=> need-resched    \n"
3751                     "#                || / _---=> hardirq/softirq \n"
3752                     "#                ||| / _--=> preempt-depth   \n"
3753                     "#                |||| /     delay            \n"
3754                     "#  cmd     pid   ||||| time  |   caller      \n"
3755                     "#     \\   /      |||||  \\    |   /         \n");
3756 }
3757
3758 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3759 {
3760         unsigned long total;
3761         unsigned long entries;
3762
3763         get_total_entries(buf, &total, &entries);
3764         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3765                    entries, total, num_online_cpus());
3766         seq_puts(m, "#\n");
3767 }
3768
3769 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3770                                    unsigned int flags)
3771 {
3772         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3773
3774         print_event_info(buf, m);
3775
3776         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3777         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3778 }
3779
3780 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3781                                        unsigned int flags)
3782 {
3783         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3784         const char *space = "          ";
3785         int prec = tgid ? 10 : 2;
3786
3787         print_event_info(buf, m);
3788
3789         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3790         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3791         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3792         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3793         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3794         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3795         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3796 }
3797
3798 void
3799 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3800 {
3801         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3802         struct array_buffer *buf = iter->array_buffer;
3803         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3804         struct tracer *type = iter->trace;
3805         unsigned long entries;
3806         unsigned long total;
3807         const char *name = "preemption";
3808
3809         name = type->name;
3810
3811         get_total_entries(buf, &total, &entries);
3812
3813         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3814                    name, UTS_RELEASE);
3815         seq_puts(m, "# -----------------------------------"
3816                  "---------------------------------\n");
3817         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3818                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3819                    nsecs_to_usecs(data->saved_latency),
3820                    entries,
3821                    total,
3822                    buf->cpu,
3823 #if defined(CONFIG_PREEMPT_NONE)
3824                    "server",
3825 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3826                    "desktop",
3827 #elif defined(CONFIG_PREEMPT)
3828                    "preempt",
3829 #elif defined(CONFIG_PREEMPT_RT)
3830                    "preempt_rt",
3831 #else
3832                    "unknown",
3833 #endif
3834                    /* These are reserved for later use */
3835                    0, 0, 0, 0);
3836 #ifdef CONFIG_SMP
3837         seq_printf(m, " #P:%d)\n", num_online_cpus());
3838 #else
3839         seq_puts(m, ")\n");
3840 #endif
3841         seq_puts(m, "#    -----------------\n");
3842         seq_printf(m, "#    | task: %.16s-%d "
3843                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3844                    data->comm, data->pid,
3845                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3846                    data->policy, data->rt_priority);
3847         seq_puts(m, "#    -----------------\n");
3848
3849         if (data->critical_start) {
3850                 seq_puts(m, "#  => started at: ");
3851                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3852                 trace_print_seq(m, &iter->seq);
3853                 seq_puts(m, "\n#  => ended at:   ");
3854                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3855                 trace_print_seq(m, &iter->seq);
3856                 seq_puts(m, "\n#\n");
3857         }
3858
3859         seq_puts(m, "#\n");
3860 }
3861
3862 static void test_cpu_buff_start(struct trace_iterator *iter)
3863 {
3864         struct trace_seq *s = &iter->seq;
3865         struct trace_array *tr = iter->tr;
3866
3867         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3868                 return;
3869
3870         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3871                 return;
3872
3873         if (cpumask_available(iter->started) &&
3874             cpumask_test_cpu(iter->cpu, iter->started))
3875                 return;
3876
3877         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3878                 return;
3879
3880         if (cpumask_available(iter->started))
3881                 cpumask_set_cpu(iter->cpu, iter->started);
3882
3883         /* Don't print started cpu buffer for the first entry of the trace */
3884         if (iter->idx > 1)
3885                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3886                                 iter->cpu);
3887 }
3888
3889 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3890 {
3891         struct trace_array *tr = iter->tr;
3892         struct trace_seq *s = &iter->seq;
3893         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3894         struct trace_entry *entry;
3895         struct trace_event *event;
3896
3897         entry = iter->ent;
3898
3899         test_cpu_buff_start(iter);
3900
3901         event = ftrace_find_event(entry->type);
3902
3903         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3904                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3905                         trace_print_lat_context(iter);
3906                 else
3907                         trace_print_context(iter);
3908         }
3909
3910         if (trace_seq_has_overflowed(s))
3911                 return TRACE_TYPE_PARTIAL_LINE;
3912
3913         if (event)
3914                 return event->funcs->trace(iter, sym_flags, event);
3915
3916         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3917
3918         return trace_handle_return(s);
3919 }
3920
3921 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3922 {
3923         struct trace_array *tr = iter->tr;
3924         struct trace_seq *s = &iter->seq;
3925         struct trace_entry *entry;
3926         struct trace_event *event;
3927
3928         entry = iter->ent;
3929
3930         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3931                 trace_seq_printf(s, "%d %d %llu ",
3932                                  entry->pid, iter->cpu, iter->ts);
3933
3934         if (trace_seq_has_overflowed(s))
3935                 return TRACE_TYPE_PARTIAL_LINE;
3936
3937         event = ftrace_find_event(entry->type);
3938         if (event)
3939                 return event->funcs->raw(iter, 0, event);
3940
3941         trace_seq_printf(s, "%d ?\n", entry->type);
3942
3943         return trace_handle_return(s);
3944 }
3945
3946 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3947 {
3948         struct trace_array *tr = iter->tr;
3949         struct trace_seq *s = &iter->seq;
3950         unsigned char newline = '\n';
3951         struct trace_entry *entry;
3952         struct trace_event *event;
3953
3954         entry = iter->ent;
3955
3956         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3957                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3958                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3959                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3960                 if (trace_seq_has_overflowed(s))
3961                         return TRACE_TYPE_PARTIAL_LINE;
3962         }
3963
3964         event = ftrace_find_event(entry->type);
3965         if (event) {
3966                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3967                 if (ret != TRACE_TYPE_HANDLED)
3968                         return ret;
3969         }
3970
3971         SEQ_PUT_FIELD(s, newline);
3972
3973         return trace_handle_return(s);
3974 }
3975
3976 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3977 {
3978         struct trace_array *tr = iter->tr;
3979         struct trace_seq *s = &iter->seq;
3980         struct trace_entry *entry;
3981         struct trace_event *event;
3982
3983         entry = iter->ent;
3984
3985         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3986                 SEQ_PUT_FIELD(s, entry->pid);
3987                 SEQ_PUT_FIELD(s, iter->cpu);
3988                 SEQ_PUT_FIELD(s, iter->ts);
3989                 if (trace_seq_has_overflowed(s))
3990                         return TRACE_TYPE_PARTIAL_LINE;
3991         }
3992
3993         event = ftrace_find_event(entry->type);
3994         return event ? event->funcs->binary(iter, 0, event) :
3995                 TRACE_TYPE_HANDLED;
3996 }
3997
3998 int trace_empty(struct trace_iterator *iter)
3999 {
4000         struct ring_buffer_iter *buf_iter;
4001         int cpu;
4002
4003         /* If we are looking at one CPU buffer, only check that one */
4004         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4005                 cpu = iter->cpu_file;
4006                 buf_iter = trace_buffer_iter(iter, cpu);
4007                 if (buf_iter) {
4008                         if (!ring_buffer_iter_empty(buf_iter))
4009                                 return 0;
4010                 } else {
4011                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4012                                 return 0;
4013                 }
4014                 return 1;
4015         }
4016
4017         for_each_tracing_cpu(cpu) {
4018                 buf_iter = trace_buffer_iter(iter, cpu);
4019                 if (buf_iter) {
4020                         if (!ring_buffer_iter_empty(buf_iter))
4021                                 return 0;
4022                 } else {
4023                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4024                                 return 0;
4025                 }
4026         }
4027
4028         return 1;
4029 }
4030
4031 /*  Called with trace_event_read_lock() held. */
4032 enum print_line_t print_trace_line(struct trace_iterator *iter)
4033 {
4034         struct trace_array *tr = iter->tr;
4035         unsigned long trace_flags = tr->trace_flags;
4036         enum print_line_t ret;
4037
4038         if (iter->lost_events) {
4039                 if (iter->lost_events == (unsigned long)-1)
4040                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4041                                          iter->cpu);
4042                 else
4043                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4044                                          iter->cpu, iter->lost_events);
4045                 if (trace_seq_has_overflowed(&iter->seq))
4046                         return TRACE_TYPE_PARTIAL_LINE;
4047         }
4048
4049         if (iter->trace && iter->trace->print_line) {
4050                 ret = iter->trace->print_line(iter);
4051                 if (ret != TRACE_TYPE_UNHANDLED)
4052                         return ret;
4053         }
4054
4055         if (iter->ent->type == TRACE_BPUTS &&
4056                         trace_flags & TRACE_ITER_PRINTK &&
4057                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4058                 return trace_print_bputs_msg_only(iter);
4059
4060         if (iter->ent->type == TRACE_BPRINT &&
4061                         trace_flags & TRACE_ITER_PRINTK &&
4062                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4063                 return trace_print_bprintk_msg_only(iter);
4064
4065         if (iter->ent->type == TRACE_PRINT &&
4066                         trace_flags & TRACE_ITER_PRINTK &&
4067                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4068                 return trace_print_printk_msg_only(iter);
4069
4070         if (trace_flags & TRACE_ITER_BIN)
4071                 return print_bin_fmt(iter);
4072
4073         if (trace_flags & TRACE_ITER_HEX)
4074                 return print_hex_fmt(iter);
4075
4076         if (trace_flags & TRACE_ITER_RAW)
4077                 return print_raw_fmt(iter);
4078
4079         return print_trace_fmt(iter);
4080 }
4081
4082 void trace_latency_header(struct seq_file *m)
4083 {
4084         struct trace_iterator *iter = m->private;
4085         struct trace_array *tr = iter->tr;
4086
4087         /* print nothing if the buffers are empty */
4088         if (trace_empty(iter))
4089                 return;
4090
4091         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4092                 print_trace_header(m, iter);
4093
4094         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4095                 print_lat_help_header(m);
4096 }
4097
4098 void trace_default_header(struct seq_file *m)
4099 {
4100         struct trace_iterator *iter = m->private;
4101         struct trace_array *tr = iter->tr;
4102         unsigned long trace_flags = tr->trace_flags;
4103
4104         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4105                 return;
4106
4107         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4108                 /* print nothing if the buffers are empty */
4109                 if (trace_empty(iter))
4110                         return;
4111                 print_trace_header(m, iter);
4112                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4113                         print_lat_help_header(m);
4114         } else {
4115                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4116                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4117                                 print_func_help_header_irq(iter->array_buffer,
4118                                                            m, trace_flags);
4119                         else
4120                                 print_func_help_header(iter->array_buffer, m,
4121                                                        trace_flags);
4122                 }
4123         }
4124 }
4125
4126 static void test_ftrace_alive(struct seq_file *m)
4127 {
4128         if (!ftrace_is_dead())
4129                 return;
4130         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4131                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4132 }
4133
4134 #ifdef CONFIG_TRACER_MAX_TRACE
4135 static void show_snapshot_main_help(struct seq_file *m)
4136 {
4137         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4138                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4139                     "#                      Takes a snapshot of the main buffer.\n"
4140                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4141                     "#                      (Doesn't have to be '2' works with any number that\n"
4142                     "#                       is not a '0' or '1')\n");
4143 }
4144
4145 static void show_snapshot_percpu_help(struct seq_file *m)
4146 {
4147         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4148 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4149         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4150                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4151 #else
4152         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4153                     "#                     Must use main snapshot file to allocate.\n");
4154 #endif
4155         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4156                     "#                      (Doesn't have to be '2' works with any number that\n"
4157                     "#                       is not a '0' or '1')\n");
4158 }
4159
4160 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4161 {
4162         if (iter->tr->allocated_snapshot)
4163                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4164         else
4165                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4166
4167         seq_puts(m, "# Snapshot commands:\n");
4168         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4169                 show_snapshot_main_help(m);
4170         else
4171                 show_snapshot_percpu_help(m);
4172 }
4173 #else
4174 /* Should never be called */
4175 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4176 #endif
4177
4178 static int s_show(struct seq_file *m, void *v)
4179 {
4180         struct trace_iterator *iter = v;
4181         int ret;
4182
4183         if (iter->ent == NULL) {
4184                 if (iter->tr) {
4185                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4186                         seq_puts(m, "#\n");
4187                         test_ftrace_alive(m);
4188                 }
4189                 if (iter->snapshot && trace_empty(iter))
4190                         print_snapshot_help(m, iter);
4191                 else if (iter->trace && iter->trace->print_header)
4192                         iter->trace->print_header(m);
4193                 else
4194                         trace_default_header(m);
4195
4196         } else if (iter->leftover) {
4197                 /*
4198                  * If we filled the seq_file buffer earlier, we
4199                  * want to just show it now.
4200                  */
4201                 ret = trace_print_seq(m, &iter->seq);
4202
4203                 /* ret should this time be zero, but you never know */
4204                 iter->leftover = ret;
4205
4206         } else {
4207                 print_trace_line(iter);
4208                 ret = trace_print_seq(m, &iter->seq);
4209                 /*
4210                  * If we overflow the seq_file buffer, then it will
4211                  * ask us for this data again at start up.
4212                  * Use that instead.
4213                  *  ret is 0 if seq_file write succeeded.
4214                  *        -1 otherwise.
4215                  */
4216                 iter->leftover = ret;
4217         }
4218
4219         return 0;
4220 }
4221
4222 /*
4223  * Should be used after trace_array_get(), trace_types_lock
4224  * ensures that i_cdev was already initialized.
4225  */
4226 static inline int tracing_get_cpu(struct inode *inode)
4227 {
4228         if (inode->i_cdev) /* See trace_create_cpu_file() */
4229                 return (long)inode->i_cdev - 1;
4230         return RING_BUFFER_ALL_CPUS;
4231 }
4232
4233 static const struct seq_operations tracer_seq_ops = {
4234         .start          = s_start,
4235         .next           = s_next,
4236         .stop           = s_stop,
4237         .show           = s_show,
4238 };
4239
4240 static struct trace_iterator *
4241 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4242 {
4243         struct trace_array *tr = inode->i_private;
4244         struct trace_iterator *iter;
4245         int cpu;
4246
4247         if (tracing_disabled)
4248                 return ERR_PTR(-ENODEV);
4249
4250         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4251         if (!iter)
4252                 return ERR_PTR(-ENOMEM);
4253
4254         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4255                                     GFP_KERNEL);
4256         if (!iter->buffer_iter)
4257                 goto release;
4258
4259         /*
4260          * trace_find_next_entry() may need to save off iter->ent.
4261          * It will place it into the iter->temp buffer. As most
4262          * events are less than 128, allocate a buffer of that size.
4263          * If one is greater, then trace_find_next_entry() will
4264          * allocate a new buffer to adjust for the bigger iter->ent.
4265          * It's not critical if it fails to get allocated here.
4266          */
4267         iter->temp = kmalloc(128, GFP_KERNEL);
4268         if (iter->temp)
4269                 iter->temp_size = 128;
4270
4271         /*
4272          * We make a copy of the current tracer to avoid concurrent
4273          * changes on it while we are reading.
4274          */
4275         mutex_lock(&trace_types_lock);
4276         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4277         if (!iter->trace)
4278                 goto fail;
4279
4280         *iter->trace = *tr->current_trace;
4281
4282         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4283                 goto fail;
4284
4285         iter->tr = tr;
4286
4287 #ifdef CONFIG_TRACER_MAX_TRACE
4288         /* Currently only the top directory has a snapshot */
4289         if (tr->current_trace->print_max || snapshot)
4290                 iter->array_buffer = &tr->max_buffer;
4291         else
4292 #endif
4293                 iter->array_buffer = &tr->array_buffer;
4294         iter->snapshot = snapshot;
4295         iter->pos = -1;
4296         iter->cpu_file = tracing_get_cpu(inode);
4297         mutex_init(&iter->mutex);
4298
4299         /* Notify the tracer early; before we stop tracing. */
4300         if (iter->trace->open)
4301                 iter->trace->open(iter);
4302
4303         /* Annotate start of buffers if we had overruns */
4304         if (ring_buffer_overruns(iter->array_buffer->buffer))
4305                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4306
4307         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4308         if (trace_clocks[tr->clock_id].in_ns)
4309                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4310
4311         /*
4312          * If pause-on-trace is enabled, then stop the trace while
4313          * dumping, unless this is the "snapshot" file
4314          */
4315         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4316                 tracing_stop_tr(tr);
4317
4318         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4319                 for_each_tracing_cpu(cpu) {
4320                         iter->buffer_iter[cpu] =
4321                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4322                                                          cpu, GFP_KERNEL);
4323                 }
4324                 ring_buffer_read_prepare_sync();
4325                 for_each_tracing_cpu(cpu) {
4326                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4327                         tracing_iter_reset(iter, cpu);
4328                 }
4329         } else {
4330                 cpu = iter->cpu_file;
4331                 iter->buffer_iter[cpu] =
4332                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4333                                                  cpu, GFP_KERNEL);
4334                 ring_buffer_read_prepare_sync();
4335                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4336                 tracing_iter_reset(iter, cpu);
4337         }
4338
4339         mutex_unlock(&trace_types_lock);
4340
4341         return iter;
4342
4343  fail:
4344         mutex_unlock(&trace_types_lock);
4345         kfree(iter->trace);
4346         kfree(iter->temp);
4347         kfree(iter->buffer_iter);
4348 release:
4349         seq_release_private(inode, file);
4350         return ERR_PTR(-ENOMEM);
4351 }
4352
4353 int tracing_open_generic(struct inode *inode, struct file *filp)
4354 {
4355         int ret;
4356
4357         ret = tracing_check_open_get_tr(NULL);
4358         if (ret)
4359                 return ret;
4360
4361         filp->private_data = inode->i_private;
4362         return 0;
4363 }
4364
4365 bool tracing_is_disabled(void)
4366 {
4367         return (tracing_disabled) ? true: false;
4368 }
4369
4370 /*
4371  * Open and update trace_array ref count.
4372  * Must have the current trace_array passed to it.
4373  */
4374 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4375 {
4376         struct trace_array *tr = inode->i_private;
4377         int ret;
4378
4379         ret = tracing_check_open_get_tr(tr);
4380         if (ret)
4381                 return ret;
4382
4383         filp->private_data = inode->i_private;
4384
4385         return 0;
4386 }
4387
4388 static int tracing_release(struct inode *inode, struct file *file)
4389 {
4390         struct trace_array *tr = inode->i_private;
4391         struct seq_file *m = file->private_data;
4392         struct trace_iterator *iter;
4393         int cpu;
4394
4395         if (!(file->f_mode & FMODE_READ)) {
4396                 trace_array_put(tr);
4397                 return 0;
4398         }
4399
4400         /* Writes do not use seq_file */
4401         iter = m->private;
4402         mutex_lock(&trace_types_lock);
4403
4404         for_each_tracing_cpu(cpu) {
4405                 if (iter->buffer_iter[cpu])
4406                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4407         }
4408
4409         if (iter->trace && iter->trace->close)
4410                 iter->trace->close(iter);
4411
4412         if (!iter->snapshot && tr->stop_count)
4413                 /* reenable tracing if it was previously enabled */
4414                 tracing_start_tr(tr);
4415
4416         __trace_array_put(tr);
4417
4418         mutex_unlock(&trace_types_lock);
4419
4420         mutex_destroy(&iter->mutex);
4421         free_cpumask_var(iter->started);
4422         kfree(iter->temp);
4423         kfree(iter->trace);
4424         kfree(iter->buffer_iter);
4425         seq_release_private(inode, file);
4426
4427         return 0;
4428 }
4429
4430 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4431 {
4432         struct trace_array *tr = inode->i_private;
4433
4434         trace_array_put(tr);
4435         return 0;
4436 }
4437
4438 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4439 {
4440         struct trace_array *tr = inode->i_private;
4441
4442         trace_array_put(tr);
4443
4444         return single_release(inode, file);
4445 }
4446
4447 static int tracing_open(struct inode *inode, struct file *file)
4448 {
4449         struct trace_array *tr = inode->i_private;
4450         struct trace_iterator *iter;
4451         int ret;
4452
4453         ret = tracing_check_open_get_tr(tr);
4454         if (ret)
4455                 return ret;
4456
4457         /* If this file was open for write, then erase contents */
4458         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4459                 int cpu = tracing_get_cpu(inode);
4460                 struct array_buffer *trace_buf = &tr->array_buffer;
4461
4462 #ifdef CONFIG_TRACER_MAX_TRACE
4463                 if (tr->current_trace->print_max)
4464                         trace_buf = &tr->max_buffer;
4465 #endif
4466
4467                 if (cpu == RING_BUFFER_ALL_CPUS)
4468                         tracing_reset_online_cpus(trace_buf);
4469                 else
4470                         tracing_reset_cpu(trace_buf, cpu);
4471         }
4472
4473         if (file->f_mode & FMODE_READ) {
4474                 iter = __tracing_open(inode, file, false);
4475                 if (IS_ERR(iter))
4476                         ret = PTR_ERR(iter);
4477                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4478                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4479         }
4480
4481         if (ret < 0)
4482                 trace_array_put(tr);
4483
4484         return ret;
4485 }
4486
4487 /*
4488  * Some tracers are not suitable for instance buffers.
4489  * A tracer is always available for the global array (toplevel)
4490  * or if it explicitly states that it is.
4491  */
4492 static bool
4493 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4494 {
4495         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4496 }
4497
4498 /* Find the next tracer that this trace array may use */
4499 static struct tracer *
4500 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4501 {
4502         while (t && !trace_ok_for_array(t, tr))
4503                 t = t->next;
4504
4505         return t;
4506 }
4507
4508 static void *
4509 t_next(struct seq_file *m, void *v, loff_t *pos)
4510 {
4511         struct trace_array *tr = m->private;
4512         struct tracer *t = v;
4513
4514         (*pos)++;
4515
4516         if (t)
4517                 t = get_tracer_for_array(tr, t->next);
4518
4519         return t;
4520 }
4521
4522 static void *t_start(struct seq_file *m, loff_t *pos)
4523 {
4524         struct trace_array *tr = m->private;
4525         struct tracer *t;
4526         loff_t l = 0;
4527
4528         mutex_lock(&trace_types_lock);
4529
4530         t = get_tracer_for_array(tr, trace_types);
4531         for (; t && l < *pos; t = t_next(m, t, &l))
4532                         ;
4533
4534         return t;
4535 }
4536
4537 static void t_stop(struct seq_file *m, void *p)
4538 {
4539         mutex_unlock(&trace_types_lock);
4540 }
4541
4542 static int t_show(struct seq_file *m, void *v)
4543 {
4544         struct tracer *t = v;
4545
4546         if (!t)
4547                 return 0;
4548
4549         seq_puts(m, t->name);
4550         if (t->next)
4551                 seq_putc(m, ' ');
4552         else
4553                 seq_putc(m, '\n');
4554
4555         return 0;
4556 }
4557
4558 static const struct seq_operations show_traces_seq_ops = {
4559         .start          = t_start,
4560         .next           = t_next,
4561         .stop           = t_stop,
4562         .show           = t_show,
4563 };
4564
4565 static int show_traces_open(struct inode *inode, struct file *file)
4566 {
4567         struct trace_array *tr = inode->i_private;
4568         struct seq_file *m;
4569         int ret;
4570
4571         ret = tracing_check_open_get_tr(tr);
4572         if (ret)
4573                 return ret;
4574
4575         ret = seq_open(file, &show_traces_seq_ops);
4576         if (ret) {
4577                 trace_array_put(tr);
4578                 return ret;
4579         }
4580
4581         m = file->private_data;
4582         m->private = tr;
4583
4584         return 0;
4585 }
4586
4587 static int show_traces_release(struct inode *inode, struct file *file)
4588 {
4589         struct trace_array *tr = inode->i_private;
4590
4591         trace_array_put(tr);
4592         return seq_release(inode, file);
4593 }
4594
4595 static ssize_t
4596 tracing_write_stub(struct file *filp, const char __user *ubuf,
4597                    size_t count, loff_t *ppos)
4598 {
4599         return count;
4600 }
4601
4602 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4603 {
4604         int ret;
4605
4606         if (file->f_mode & FMODE_READ)
4607                 ret = seq_lseek(file, offset, whence);
4608         else
4609                 file->f_pos = ret = 0;
4610
4611         return ret;
4612 }
4613
4614 static const struct file_operations tracing_fops = {
4615         .open           = tracing_open,
4616         .read           = seq_read,
4617         .write          = tracing_write_stub,
4618         .llseek         = tracing_lseek,
4619         .release        = tracing_release,
4620 };
4621
4622 static const struct file_operations show_traces_fops = {
4623         .open           = show_traces_open,
4624         .read           = seq_read,
4625         .llseek         = seq_lseek,
4626         .release        = show_traces_release,
4627 };
4628
4629 static ssize_t
4630 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4631                      size_t count, loff_t *ppos)
4632 {
4633         struct trace_array *tr = file_inode(filp)->i_private;
4634         char *mask_str;
4635         int len;
4636
4637         len = snprintf(NULL, 0, "%*pb\n",
4638                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4639         mask_str = kmalloc(len, GFP_KERNEL);
4640         if (!mask_str)
4641                 return -ENOMEM;
4642
4643         len = snprintf(mask_str, len, "%*pb\n",
4644                        cpumask_pr_args(tr->tracing_cpumask));
4645         if (len >= count) {
4646                 count = -EINVAL;
4647                 goto out_err;
4648         }
4649         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4650
4651 out_err:
4652         kfree(mask_str);
4653
4654         return count;
4655 }
4656
4657 int tracing_set_cpumask(struct trace_array *tr,
4658                         cpumask_var_t tracing_cpumask_new)
4659 {
4660         int cpu;
4661
4662         if (!tr)
4663                 return -EINVAL;
4664
4665         local_irq_disable();
4666         arch_spin_lock(&tr->max_lock);
4667         for_each_tracing_cpu(cpu) {
4668                 /*
4669                  * Increase/decrease the disabled counter if we are
4670                  * about to flip a bit in the cpumask:
4671                  */
4672                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4673                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4674                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4675                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4676                 }
4677                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4678                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4679                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4680                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4681                 }
4682         }
4683         arch_spin_unlock(&tr->max_lock);
4684         local_irq_enable();
4685
4686         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4687
4688         return 0;
4689 }
4690
4691 static ssize_t
4692 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4693                       size_t count, loff_t *ppos)
4694 {
4695         struct trace_array *tr = file_inode(filp)->i_private;
4696         cpumask_var_t tracing_cpumask_new;
4697         int err;
4698
4699         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4700                 return -ENOMEM;
4701
4702         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4703         if (err)
4704                 goto err_free;
4705
4706         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4707         if (err)
4708                 goto err_free;
4709
4710         free_cpumask_var(tracing_cpumask_new);
4711
4712         return count;
4713
4714 err_free:
4715         free_cpumask_var(tracing_cpumask_new);
4716
4717         return err;
4718 }
4719
4720 static const struct file_operations tracing_cpumask_fops = {
4721         .open           = tracing_open_generic_tr,
4722         .read           = tracing_cpumask_read,
4723         .write          = tracing_cpumask_write,
4724         .release        = tracing_release_generic_tr,
4725         .llseek         = generic_file_llseek,
4726 };
4727
4728 static int tracing_trace_options_show(struct seq_file *m, void *v)
4729 {
4730         struct tracer_opt *trace_opts;
4731         struct trace_array *tr = m->private;
4732         u32 tracer_flags;
4733         int i;
4734
4735         mutex_lock(&trace_types_lock);
4736         tracer_flags = tr->current_trace->flags->val;
4737         trace_opts = tr->current_trace->flags->opts;
4738
4739         for (i = 0; trace_options[i]; i++) {
4740                 if (tr->trace_flags & (1 << i))
4741                         seq_printf(m, "%s\n", trace_options[i]);
4742                 else
4743                         seq_printf(m, "no%s\n", trace_options[i]);
4744         }
4745
4746         for (i = 0; trace_opts[i].name; i++) {
4747                 if (tracer_flags & trace_opts[i].bit)
4748                         seq_printf(m, "%s\n", trace_opts[i].name);
4749                 else
4750                         seq_printf(m, "no%s\n", trace_opts[i].name);
4751         }
4752         mutex_unlock(&trace_types_lock);
4753
4754         return 0;
4755 }
4756
4757 static int __set_tracer_option(struct trace_array *tr,
4758                                struct tracer_flags *tracer_flags,
4759                                struct tracer_opt *opts, int neg)
4760 {
4761         struct tracer *trace = tracer_flags->trace;
4762         int ret;
4763
4764         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4765         if (ret)
4766                 return ret;
4767
4768         if (neg)
4769                 tracer_flags->val &= ~opts->bit;
4770         else
4771                 tracer_flags->val |= opts->bit;
4772         return 0;
4773 }
4774
4775 /* Try to assign a tracer specific option */
4776 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4777 {
4778         struct tracer *trace = tr->current_trace;
4779         struct tracer_flags *tracer_flags = trace->flags;
4780         struct tracer_opt *opts = NULL;
4781         int i;
4782
4783         for (i = 0; tracer_flags->opts[i].name; i++) {
4784                 opts = &tracer_flags->opts[i];
4785
4786                 if (strcmp(cmp, opts->name) == 0)
4787                         return __set_tracer_option(tr, trace->flags, opts, neg);
4788         }
4789
4790         return -EINVAL;
4791 }
4792
4793 /* Some tracers require overwrite to stay enabled */
4794 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4795 {
4796         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4797                 return -1;
4798
4799         return 0;
4800 }
4801
4802 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4803 {
4804         if ((mask == TRACE_ITER_RECORD_TGID) ||
4805             (mask == TRACE_ITER_RECORD_CMD))
4806                 lockdep_assert_held(&event_mutex);
4807
4808         /* do nothing if flag is already set */
4809         if (!!(tr->trace_flags & mask) == !!enabled)
4810                 return 0;
4811
4812         /* Give the tracer a chance to approve the change */
4813         if (tr->current_trace->flag_changed)
4814                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4815                         return -EINVAL;
4816
4817         if (enabled)
4818                 tr->trace_flags |= mask;
4819         else
4820                 tr->trace_flags &= ~mask;
4821
4822         if (mask == TRACE_ITER_RECORD_CMD)
4823                 trace_event_enable_cmd_record(enabled);
4824
4825         if (mask == TRACE_ITER_RECORD_TGID) {
4826                 if (!tgid_map)
4827                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4828                                            sizeof(*tgid_map),
4829                                            GFP_KERNEL);
4830                 if (!tgid_map) {
4831                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4832                         return -ENOMEM;
4833                 }
4834
4835                 trace_event_enable_tgid_record(enabled);
4836         }
4837
4838         if (mask == TRACE_ITER_EVENT_FORK)
4839                 trace_event_follow_fork(tr, enabled);
4840
4841         if (mask == TRACE_ITER_FUNC_FORK)
4842                 ftrace_pid_follow_fork(tr, enabled);
4843
4844         if (mask == TRACE_ITER_OVERWRITE) {
4845                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4846 #ifdef CONFIG_TRACER_MAX_TRACE
4847                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4848 #endif
4849         }
4850
4851         if (mask == TRACE_ITER_PRINTK) {
4852                 trace_printk_start_stop_comm(enabled);
4853                 trace_printk_control(enabled);
4854         }
4855
4856         return 0;
4857 }
4858
4859 int trace_set_options(struct trace_array *tr, char *option)
4860 {
4861         char *cmp;
4862         int neg = 0;
4863         int ret;
4864         size_t orig_len = strlen(option);
4865         int len;
4866
4867         cmp = strstrip(option);
4868
4869         len = str_has_prefix(cmp, "no");
4870         if (len)
4871                 neg = 1;
4872
4873         cmp += len;
4874
4875         mutex_lock(&event_mutex);
4876         mutex_lock(&trace_types_lock);
4877
4878         ret = match_string(trace_options, -1, cmp);
4879         /* If no option could be set, test the specific tracer options */
4880         if (ret < 0)
4881                 ret = set_tracer_option(tr, cmp, neg);
4882         else
4883                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4884
4885         mutex_unlock(&trace_types_lock);
4886         mutex_unlock(&event_mutex);
4887
4888         /*
4889          * If the first trailing whitespace is replaced with '\0' by strstrip,
4890          * turn it back into a space.
4891          */
4892         if (orig_len > strlen(option))
4893                 option[strlen(option)] = ' ';
4894
4895         return ret;
4896 }
4897
4898 static void __init apply_trace_boot_options(void)
4899 {
4900         char *buf = trace_boot_options_buf;
4901         char *option;
4902
4903         while (true) {
4904                 option = strsep(&buf, ",");
4905
4906                 if (!option)
4907                         break;
4908
4909                 if (*option)
4910                         trace_set_options(&global_trace, option);
4911
4912                 /* Put back the comma to allow this to be called again */
4913                 if (buf)
4914                         *(buf - 1) = ',';
4915         }
4916 }
4917
4918 static ssize_t
4919 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4920                         size_t cnt, loff_t *ppos)
4921 {
4922         struct seq_file *m = filp->private_data;
4923         struct trace_array *tr = m->private;
4924         char buf[64];
4925         int ret;
4926
4927         if (cnt >= sizeof(buf))
4928                 return -EINVAL;
4929
4930         if (copy_from_user(buf, ubuf, cnt))
4931                 return -EFAULT;
4932
4933         buf[cnt] = 0;
4934
4935         ret = trace_set_options(tr, buf);
4936         if (ret < 0)
4937                 return ret;
4938
4939         *ppos += cnt;
4940
4941         return cnt;
4942 }
4943
4944 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4945 {
4946         struct trace_array *tr = inode->i_private;
4947         int ret;
4948
4949         ret = tracing_check_open_get_tr(tr);
4950         if (ret)
4951                 return ret;
4952
4953         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4954         if (ret < 0)
4955                 trace_array_put(tr);
4956
4957         return ret;
4958 }
4959
4960 static const struct file_operations tracing_iter_fops = {
4961         .open           = tracing_trace_options_open,
4962         .read           = seq_read,
4963         .llseek         = seq_lseek,
4964         .release        = tracing_single_release_tr,
4965         .write          = tracing_trace_options_write,
4966 };
4967
4968 static const char readme_msg[] =
4969         "tracing mini-HOWTO:\n\n"
4970         "# echo 0 > tracing_on : quick way to disable tracing\n"
4971         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4972         " Important files:\n"
4973         "  trace\t\t\t- The static contents of the buffer\n"
4974         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4975         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4976         "  current_tracer\t- function and latency tracers\n"
4977         "  available_tracers\t- list of configured tracers for current_tracer\n"
4978         "  error_log\t- error log for failed commands (that support it)\n"
4979         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4980         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4981         "  trace_clock\t\t-change the clock used to order events\n"
4982         "       local:   Per cpu clock but may not be synced across CPUs\n"
4983         "      global:   Synced across CPUs but slows tracing down.\n"
4984         "     counter:   Not a clock, but just an increment\n"
4985         "      uptime:   Jiffy counter from time of boot\n"
4986         "        perf:   Same clock that perf events use\n"
4987 #ifdef CONFIG_X86_64
4988         "     x86-tsc:   TSC cycle counter\n"
4989 #endif
4990         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4991         "       delta:   Delta difference against a buffer-wide timestamp\n"
4992         "    absolute:   Absolute (standalone) timestamp\n"
4993         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4994         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4995         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4996         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4997         "\t\t\t  Remove sub-buffer with rmdir\n"
4998         "  trace_options\t\t- Set format or modify how tracing happens\n"
4999         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5000         "\t\t\t  option name\n"
5001         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5002 #ifdef CONFIG_DYNAMIC_FTRACE
5003         "\n  available_filter_functions - list of functions that can be filtered on\n"
5004         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5005         "\t\t\t  functions\n"
5006         "\t     accepts: func_full_name or glob-matching-pattern\n"
5007         "\t     modules: Can select a group via module\n"
5008         "\t      Format: :mod:<module-name>\n"
5009         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5010         "\t    triggers: a command to perform when function is hit\n"
5011         "\t      Format: <function>:<trigger>[:count]\n"
5012         "\t     trigger: traceon, traceoff\n"
5013         "\t\t      enable_event:<system>:<event>\n"
5014         "\t\t      disable_event:<system>:<event>\n"
5015 #ifdef CONFIG_STACKTRACE
5016         "\t\t      stacktrace\n"
5017 #endif
5018 #ifdef CONFIG_TRACER_SNAPSHOT
5019         "\t\t      snapshot\n"
5020 #endif
5021         "\t\t      dump\n"
5022         "\t\t      cpudump\n"
5023         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5024         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5025         "\t     The first one will disable tracing every time do_fault is hit\n"
5026         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5027         "\t       The first time do trap is hit and it disables tracing, the\n"
5028         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5029         "\t       the counter will not decrement. It only decrements when the\n"
5030         "\t       trigger did work\n"
5031         "\t     To remove trigger without count:\n"
5032         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5033         "\t     To remove trigger with a count:\n"
5034         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5035         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5036         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5037         "\t    modules: Can select a group via module command :mod:\n"
5038         "\t    Does not accept triggers\n"
5039 #endif /* CONFIG_DYNAMIC_FTRACE */
5040 #ifdef CONFIG_FUNCTION_TRACER
5041         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5042         "\t\t    (function)\n"
5043         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5044         "\t\t    (function)\n"
5045 #endif
5046 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5047         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5048         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5049         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5050 #endif
5051 #ifdef CONFIG_TRACER_SNAPSHOT
5052         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5053         "\t\t\t  snapshot buffer. Read the contents for more\n"
5054         "\t\t\t  information\n"
5055 #endif
5056 #ifdef CONFIG_STACK_TRACER
5057         "  stack_trace\t\t- Shows the max stack trace when active\n"
5058         "  stack_max_size\t- Shows current max stack size that was traced\n"
5059         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5060         "\t\t\t  new trace)\n"
5061 #ifdef CONFIG_DYNAMIC_FTRACE
5062         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5063         "\t\t\t  traces\n"
5064 #endif
5065 #endif /* CONFIG_STACK_TRACER */
5066 #ifdef CONFIG_DYNAMIC_EVENTS
5067         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5068         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5069 #endif
5070 #ifdef CONFIG_KPROBE_EVENTS
5071         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5072         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5073 #endif
5074 #ifdef CONFIG_UPROBE_EVENTS
5075         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5076         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5077 #endif
5078 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5079         "\t  accepts: event-definitions (one definition per line)\n"
5080         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5081         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5082 #ifdef CONFIG_HIST_TRIGGERS
5083         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5084 #endif
5085         "\t           -:[<group>/]<event>\n"
5086 #ifdef CONFIG_KPROBE_EVENTS
5087         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5088   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5089 #endif
5090 #ifdef CONFIG_UPROBE_EVENTS
5091   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5092 #endif
5093         "\t     args: <name>=fetcharg[:type]\n"
5094         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5095 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5096         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5097 #else
5098         "\t           $stack<index>, $stack, $retval, $comm,\n"
5099 #endif
5100         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5101         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5102         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5103         "\t           <type>\\[<array-size>\\]\n"
5104 #ifdef CONFIG_HIST_TRIGGERS
5105         "\t    field: <stype> <name>;\n"
5106         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5107         "\t           [unsigned] char/int/long\n"
5108 #endif
5109 #endif
5110         "  events/\t\t- Directory containing all trace event subsystems:\n"
5111         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5112         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5113         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5114         "\t\t\t  events\n"
5115         "      filter\t\t- If set, only events passing filter are traced\n"
5116         "  events/<system>/<event>/\t- Directory containing control files for\n"
5117         "\t\t\t  <event>:\n"
5118         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5119         "      filter\t\t- If set, only events passing filter are traced\n"
5120         "      trigger\t\t- If set, a command to perform when event is hit\n"
5121         "\t    Format: <trigger>[:count][if <filter>]\n"
5122         "\t   trigger: traceon, traceoff\n"
5123         "\t            enable_event:<system>:<event>\n"
5124         "\t            disable_event:<system>:<event>\n"
5125 #ifdef CONFIG_HIST_TRIGGERS
5126         "\t            enable_hist:<system>:<event>\n"
5127         "\t            disable_hist:<system>:<event>\n"
5128 #endif
5129 #ifdef CONFIG_STACKTRACE
5130         "\t\t    stacktrace\n"
5131 #endif
5132 #ifdef CONFIG_TRACER_SNAPSHOT
5133         "\t\t    snapshot\n"
5134 #endif
5135 #ifdef CONFIG_HIST_TRIGGERS
5136         "\t\t    hist (see below)\n"
5137 #endif
5138         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5139         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5140         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5141         "\t                  events/block/block_unplug/trigger\n"
5142         "\t   The first disables tracing every time block_unplug is hit.\n"
5143         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5144         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5145         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5146         "\t   Like function triggers, the counter is only decremented if it\n"
5147         "\t    enabled or disabled tracing.\n"
5148         "\t   To remove a trigger without a count:\n"
5149         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5150         "\t   To remove a trigger with a count:\n"
5151         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5152         "\t   Filters can be ignored when removing a trigger.\n"
5153 #ifdef CONFIG_HIST_TRIGGERS
5154         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5155         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5156         "\t            [:values=<field1[,field2,...]>]\n"
5157         "\t            [:sort=<field1[,field2,...]>]\n"
5158         "\t            [:size=#entries]\n"
5159         "\t            [:pause][:continue][:clear]\n"
5160         "\t            [:name=histname1]\n"
5161         "\t            [:<handler>.<action>]\n"
5162         "\t            [if <filter>]\n\n"
5163         "\t    When a matching event is hit, an entry is added to a hash\n"
5164         "\t    table using the key(s) and value(s) named, and the value of a\n"
5165         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5166         "\t    correspond to fields in the event's format description.  Keys\n"
5167         "\t    can be any field, or the special string 'stacktrace'.\n"
5168         "\t    Compound keys consisting of up to two fields can be specified\n"
5169         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5170         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5171         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5172         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5173         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5174         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5175         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5176         "\t    its histogram data will be shared with other triggers of the\n"
5177         "\t    same name, and trigger hits will update this common data.\n\n"
5178         "\t    Reading the 'hist' file for the event will dump the hash\n"
5179         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5180         "\t    triggers attached to an event, there will be a table for each\n"
5181         "\t    trigger in the output.  The table displayed for a named\n"
5182         "\t    trigger will be the same as any other instance having the\n"
5183         "\t    same name.  The default format used to display a given field\n"
5184         "\t    can be modified by appending any of the following modifiers\n"
5185         "\t    to the field name, as applicable:\n\n"
5186         "\t            .hex        display a number as a hex value\n"
5187         "\t            .sym        display an address as a symbol\n"
5188         "\t            .sym-offset display an address as a symbol and offset\n"
5189         "\t            .execname   display a common_pid as a program name\n"
5190         "\t            .syscall    display a syscall id as a syscall name\n"
5191         "\t            .log2       display log2 value rather than raw number\n"
5192         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5193         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5194         "\t    trigger or to start a hist trigger but not log any events\n"
5195         "\t    until told to do so.  'continue' can be used to start or\n"
5196         "\t    restart a paused hist trigger.\n\n"
5197         "\t    The 'clear' parameter will clear the contents of a running\n"
5198         "\t    hist trigger and leave its current paused/active state\n"
5199         "\t    unchanged.\n\n"
5200         "\t    The enable_hist and disable_hist triggers can be used to\n"
5201         "\t    have one event conditionally start and stop another event's\n"
5202         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5203         "\t    the enable_event and disable_event triggers.\n\n"
5204         "\t    Hist trigger handlers and actions are executed whenever a\n"
5205         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5206         "\t        <handler>.<action>\n\n"
5207         "\t    The available handlers are:\n\n"
5208         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5209         "\t        onmax(var)               - invoke if var exceeds current max\n"
5210         "\t        onchange(var)            - invoke action if var changes\n\n"
5211         "\t    The available actions are:\n\n"
5212         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5213         "\t        save(field,...)                      - save current event fields\n"
5214 #ifdef CONFIG_TRACER_SNAPSHOT
5215         "\t        snapshot()                           - snapshot the trace buffer\n"
5216 #endif
5217 #endif
5218 ;
5219
5220 static ssize_t
5221 tracing_readme_read(struct file *filp, char __user *ubuf,
5222                        size_t cnt, loff_t *ppos)
5223 {
5224         return simple_read_from_buffer(ubuf, cnt, ppos,
5225                                         readme_msg, strlen(readme_msg));
5226 }
5227
5228 static const struct file_operations tracing_readme_fops = {
5229         .open           = tracing_open_generic,
5230         .read           = tracing_readme_read,
5231         .llseek         = generic_file_llseek,
5232 };
5233
5234 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5235 {
5236         int *ptr = v;
5237
5238         if (*pos || m->count)
5239                 ptr++;
5240
5241         (*pos)++;
5242
5243         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5244                 if (trace_find_tgid(*ptr))
5245                         return ptr;
5246         }
5247
5248         return NULL;
5249 }
5250
5251 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5252 {
5253         void *v;
5254         loff_t l = 0;
5255
5256         if (!tgid_map)
5257                 return NULL;
5258
5259         v = &tgid_map[0];
5260         while (l <= *pos) {
5261                 v = saved_tgids_next(m, v, &l);
5262                 if (!v)
5263                         return NULL;
5264         }
5265
5266         return v;
5267 }
5268
5269 static void saved_tgids_stop(struct seq_file *m, void *v)
5270 {
5271 }
5272
5273 static int saved_tgids_show(struct seq_file *m, void *v)
5274 {
5275         int pid = (int *)v - tgid_map;
5276
5277         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5278         return 0;
5279 }
5280
5281 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5282         .start          = saved_tgids_start,
5283         .stop           = saved_tgids_stop,
5284         .next           = saved_tgids_next,
5285         .show           = saved_tgids_show,
5286 };
5287
5288 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5289 {
5290         int ret;
5291
5292         ret = tracing_check_open_get_tr(NULL);
5293         if (ret)
5294                 return ret;
5295
5296         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5297 }
5298
5299
5300 static const struct file_operations tracing_saved_tgids_fops = {
5301         .open           = tracing_saved_tgids_open,
5302         .read           = seq_read,
5303         .llseek         = seq_lseek,
5304         .release        = seq_release,
5305 };
5306
5307 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5308 {
5309         unsigned int *ptr = v;
5310
5311         if (*pos || m->count)
5312                 ptr++;
5313
5314         (*pos)++;
5315
5316         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5317              ptr++) {
5318                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5319                         continue;
5320
5321                 return ptr;
5322         }
5323
5324         return NULL;
5325 }
5326
5327 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5328 {
5329         void *v;
5330         loff_t l = 0;
5331
5332         preempt_disable();
5333         arch_spin_lock(&trace_cmdline_lock);
5334
5335         v = &savedcmd->map_cmdline_to_pid[0];
5336         while (l <= *pos) {
5337                 v = saved_cmdlines_next(m, v, &l);
5338                 if (!v)
5339                         return NULL;
5340         }
5341
5342         return v;
5343 }
5344
5345 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5346 {
5347         arch_spin_unlock(&trace_cmdline_lock);
5348         preempt_enable();
5349 }
5350
5351 static int saved_cmdlines_show(struct seq_file *m, void *v)
5352 {
5353         char buf[TASK_COMM_LEN];
5354         unsigned int *pid = v;
5355
5356         __trace_find_cmdline(*pid, buf);
5357         seq_printf(m, "%d %s\n", *pid, buf);
5358         return 0;
5359 }
5360
5361 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5362         .start          = saved_cmdlines_start,
5363         .next           = saved_cmdlines_next,
5364         .stop           = saved_cmdlines_stop,
5365         .show           = saved_cmdlines_show,
5366 };
5367
5368 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5369 {
5370         int ret;
5371
5372         ret = tracing_check_open_get_tr(NULL);
5373         if (ret)
5374                 return ret;
5375
5376         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5377 }
5378
5379 static const struct file_operations tracing_saved_cmdlines_fops = {
5380         .open           = tracing_saved_cmdlines_open,
5381         .read           = seq_read,
5382         .llseek         = seq_lseek,
5383         .release        = seq_release,
5384 };
5385
5386 static ssize_t
5387 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5388                                  size_t cnt, loff_t *ppos)
5389 {
5390         char buf[64];
5391         int r;
5392
5393         arch_spin_lock(&trace_cmdline_lock);
5394         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5395         arch_spin_unlock(&trace_cmdline_lock);
5396
5397         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5398 }
5399
5400 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5401 {
5402         kfree(s->saved_cmdlines);
5403         kfree(s->map_cmdline_to_pid);
5404         kfree(s);
5405 }
5406
5407 static int tracing_resize_saved_cmdlines(unsigned int val)
5408 {
5409         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5410
5411         s = kmalloc(sizeof(*s), GFP_KERNEL);
5412         if (!s)
5413                 return -ENOMEM;
5414
5415         if (allocate_cmdlines_buffer(val, s) < 0) {
5416                 kfree(s);
5417                 return -ENOMEM;
5418         }
5419
5420         arch_spin_lock(&trace_cmdline_lock);
5421         savedcmd_temp = savedcmd;
5422         savedcmd = s;
5423         arch_spin_unlock(&trace_cmdline_lock);
5424         free_saved_cmdlines_buffer(savedcmd_temp);
5425
5426         return 0;
5427 }
5428
5429 static ssize_t
5430 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5431                                   size_t cnt, loff_t *ppos)
5432 {
5433         unsigned long val;
5434         int ret;
5435
5436         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5437         if (ret)
5438                 return ret;
5439
5440         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5441         if (!val || val > PID_MAX_DEFAULT)
5442                 return -EINVAL;
5443
5444         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5445         if (ret < 0)
5446                 return ret;
5447
5448         *ppos += cnt;
5449
5450         return cnt;
5451 }
5452
5453 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5454         .open           = tracing_open_generic,
5455         .read           = tracing_saved_cmdlines_size_read,
5456         .write          = tracing_saved_cmdlines_size_write,
5457 };
5458
5459 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5460 static union trace_eval_map_item *
5461 update_eval_map(union trace_eval_map_item *ptr)
5462 {
5463         if (!ptr->map.eval_string) {
5464                 if (ptr->tail.next) {
5465                         ptr = ptr->tail.next;
5466                         /* Set ptr to the next real item (skip head) */
5467                         ptr++;
5468                 } else
5469                         return NULL;
5470         }
5471         return ptr;
5472 }
5473
5474 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5475 {
5476         union trace_eval_map_item *ptr = v;
5477
5478         /*
5479          * Paranoid! If ptr points to end, we don't want to increment past it.
5480          * This really should never happen.
5481          */
5482         (*pos)++;
5483         ptr = update_eval_map(ptr);
5484         if (WARN_ON_ONCE(!ptr))
5485                 return NULL;
5486
5487         ptr++;
5488         ptr = update_eval_map(ptr);
5489
5490         return ptr;
5491 }
5492
5493 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5494 {
5495         union trace_eval_map_item *v;
5496         loff_t l = 0;
5497
5498         mutex_lock(&trace_eval_mutex);
5499
5500         v = trace_eval_maps;
5501         if (v)
5502                 v++;
5503
5504         while (v && l < *pos) {
5505                 v = eval_map_next(m, v, &l);
5506         }
5507
5508         return v;
5509 }
5510
5511 static void eval_map_stop(struct seq_file *m, void *v)
5512 {
5513         mutex_unlock(&trace_eval_mutex);
5514 }
5515
5516 static int eval_map_show(struct seq_file *m, void *v)
5517 {
5518         union trace_eval_map_item *ptr = v;
5519
5520         seq_printf(m, "%s %ld (%s)\n",
5521                    ptr->map.eval_string, ptr->map.eval_value,
5522                    ptr->map.system);
5523
5524         return 0;
5525 }
5526
5527 static const struct seq_operations tracing_eval_map_seq_ops = {
5528         .start          = eval_map_start,
5529         .next           = eval_map_next,
5530         .stop           = eval_map_stop,
5531         .show           = eval_map_show,
5532 };
5533
5534 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5535 {
5536         int ret;
5537
5538         ret = tracing_check_open_get_tr(NULL);
5539         if (ret)
5540                 return ret;
5541
5542         return seq_open(filp, &tracing_eval_map_seq_ops);
5543 }
5544
5545 static const struct file_operations tracing_eval_map_fops = {
5546         .open           = tracing_eval_map_open,
5547         .read           = seq_read,
5548         .llseek         = seq_lseek,
5549         .release        = seq_release,
5550 };
5551
5552 static inline union trace_eval_map_item *
5553 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5554 {
5555         /* Return tail of array given the head */
5556         return ptr + ptr->head.length + 1;
5557 }
5558
5559 static void
5560 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5561                            int len)
5562 {
5563         struct trace_eval_map **stop;
5564         struct trace_eval_map **map;
5565         union trace_eval_map_item *map_array;
5566         union trace_eval_map_item *ptr;
5567
5568         stop = start + len;
5569
5570         /*
5571          * The trace_eval_maps contains the map plus a head and tail item,
5572          * where the head holds the module and length of array, and the
5573          * tail holds a pointer to the next list.
5574          */
5575         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5576         if (!map_array) {
5577                 pr_warn("Unable to allocate trace eval mapping\n");
5578                 return;
5579         }
5580
5581         mutex_lock(&trace_eval_mutex);
5582
5583         if (!trace_eval_maps)
5584                 trace_eval_maps = map_array;
5585         else {
5586                 ptr = trace_eval_maps;
5587                 for (;;) {
5588                         ptr = trace_eval_jmp_to_tail(ptr);
5589                         if (!ptr->tail.next)
5590                                 break;
5591                         ptr = ptr->tail.next;
5592
5593                 }
5594                 ptr->tail.next = map_array;
5595         }
5596         map_array->head.mod = mod;
5597         map_array->head.length = len;
5598         map_array++;
5599
5600         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5601                 map_array->map = **map;
5602                 map_array++;
5603         }
5604         memset(map_array, 0, sizeof(*map_array));
5605
5606         mutex_unlock(&trace_eval_mutex);
5607 }
5608
5609 static void trace_create_eval_file(struct dentry *d_tracer)
5610 {
5611         trace_create_file("eval_map", 0444, d_tracer,
5612                           NULL, &tracing_eval_map_fops);
5613 }
5614
5615 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5616 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5617 static inline void trace_insert_eval_map_file(struct module *mod,
5618                               struct trace_eval_map **start, int len) { }
5619 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5620
5621 static void trace_insert_eval_map(struct module *mod,
5622                                   struct trace_eval_map **start, int len)
5623 {
5624         struct trace_eval_map **map;
5625
5626         if (len <= 0)
5627                 return;
5628
5629         map = start;
5630
5631         trace_event_eval_update(map, len);
5632
5633         trace_insert_eval_map_file(mod, start, len);
5634 }
5635
5636 static ssize_t
5637 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5638                        size_t cnt, loff_t *ppos)
5639 {
5640         struct trace_array *tr = filp->private_data;
5641         char buf[MAX_TRACER_SIZE+2];
5642         int r;
5643
5644         mutex_lock(&trace_types_lock);
5645         r = sprintf(buf, "%s\n", tr->current_trace->name);
5646         mutex_unlock(&trace_types_lock);
5647
5648         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5649 }
5650
5651 int tracer_init(struct tracer *t, struct trace_array *tr)
5652 {
5653         tracing_reset_online_cpus(&tr->array_buffer);
5654         return t->init(tr);
5655 }
5656
5657 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5658 {
5659         int cpu;
5660
5661         for_each_tracing_cpu(cpu)
5662                 per_cpu_ptr(buf->data, cpu)->entries = val;
5663 }
5664
5665 #ifdef CONFIG_TRACER_MAX_TRACE
5666 /* resize @tr's buffer to the size of @size_tr's entries */
5667 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5668                                         struct array_buffer *size_buf, int cpu_id)
5669 {
5670         int cpu, ret = 0;
5671
5672         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5673                 for_each_tracing_cpu(cpu) {
5674                         ret = ring_buffer_resize(trace_buf->buffer,
5675                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5676                         if (ret < 0)
5677                                 break;
5678                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5679                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5680                 }
5681         } else {
5682                 ret = ring_buffer_resize(trace_buf->buffer,
5683                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5684                 if (ret == 0)
5685                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5686                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5687         }
5688
5689         return ret;
5690 }
5691 #endif /* CONFIG_TRACER_MAX_TRACE */
5692
5693 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5694                                         unsigned long size, int cpu)
5695 {
5696         int ret;
5697
5698         /*
5699          * If kernel or user changes the size of the ring buffer
5700          * we use the size that was given, and we can forget about
5701          * expanding it later.
5702          */
5703         ring_buffer_expanded = true;
5704
5705         /* May be called before buffers are initialized */
5706         if (!tr->array_buffer.buffer)
5707                 return 0;
5708
5709         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5710         if (ret < 0)
5711                 return ret;
5712
5713 #ifdef CONFIG_TRACER_MAX_TRACE
5714         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5715             !tr->current_trace->use_max_tr)
5716                 goto out;
5717
5718         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5719         if (ret < 0) {
5720                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5721                                                      &tr->array_buffer, cpu);
5722                 if (r < 0) {
5723                         /*
5724                          * AARGH! We are left with different
5725                          * size max buffer!!!!
5726                          * The max buffer is our "snapshot" buffer.
5727                          * When a tracer needs a snapshot (one of the
5728                          * latency tracers), it swaps the max buffer
5729                          * with the saved snap shot. We succeeded to
5730                          * update the size of the main buffer, but failed to
5731                          * update the size of the max buffer. But when we tried
5732                          * to reset the main buffer to the original size, we
5733                          * failed there too. This is very unlikely to
5734                          * happen, but if it does, warn and kill all
5735                          * tracing.
5736                          */
5737                         WARN_ON(1);
5738                         tracing_disabled = 1;
5739                 }
5740                 return ret;
5741         }
5742
5743         if (cpu == RING_BUFFER_ALL_CPUS)
5744                 set_buffer_entries(&tr->max_buffer, size);
5745         else
5746                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5747
5748  out:
5749 #endif /* CONFIG_TRACER_MAX_TRACE */
5750
5751         if (cpu == RING_BUFFER_ALL_CPUS)
5752                 set_buffer_entries(&tr->array_buffer, size);
5753         else
5754                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5755
5756         return ret;
5757 }
5758
5759 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5760                                   unsigned long size, int cpu_id)
5761 {
5762         int ret = size;
5763
5764         mutex_lock(&trace_types_lock);
5765
5766         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5767                 /* make sure, this cpu is enabled in the mask */
5768                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5769                         ret = -EINVAL;
5770                         goto out;
5771                 }
5772         }
5773
5774         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5775         if (ret < 0)
5776                 ret = -ENOMEM;
5777
5778 out:
5779         mutex_unlock(&trace_types_lock);
5780
5781         return ret;
5782 }
5783
5784
5785 /**
5786  * tracing_update_buffers - used by tracing facility to expand ring buffers
5787  *
5788  * To save on memory when the tracing is never used on a system with it
5789  * configured in. The ring buffers are set to a minimum size. But once
5790  * a user starts to use the tracing facility, then they need to grow
5791  * to their default size.
5792  *
5793  * This function is to be called when a tracer is about to be used.
5794  */
5795 int tracing_update_buffers(void)
5796 {
5797         int ret = 0;
5798
5799         mutex_lock(&trace_types_lock);
5800         if (!ring_buffer_expanded)
5801                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5802                                                 RING_BUFFER_ALL_CPUS);
5803         mutex_unlock(&trace_types_lock);
5804
5805         return ret;
5806 }
5807
5808 struct trace_option_dentry;
5809
5810 static void
5811 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5812
5813 /*
5814  * Used to clear out the tracer before deletion of an instance.
5815  * Must have trace_types_lock held.
5816  */
5817 static void tracing_set_nop(struct trace_array *tr)
5818 {
5819         if (tr->current_trace == &nop_trace)
5820                 return;
5821         
5822         tr->current_trace->enabled--;
5823
5824         if (tr->current_trace->reset)
5825                 tr->current_trace->reset(tr);
5826
5827         tr->current_trace = &nop_trace;
5828 }
5829
5830 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5831 {
5832         /* Only enable if the directory has been created already. */
5833         if (!tr->dir)
5834                 return;
5835
5836         create_trace_option_files(tr, t);
5837 }
5838
5839 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5840 {
5841         struct tracer *t;
5842 #ifdef CONFIG_TRACER_MAX_TRACE
5843         bool had_max_tr;
5844 #endif
5845         int ret = 0;
5846
5847         mutex_lock(&trace_types_lock);
5848
5849         if (!ring_buffer_expanded) {
5850                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5851                                                 RING_BUFFER_ALL_CPUS);
5852                 if (ret < 0)
5853                         goto out;
5854                 ret = 0;
5855         }
5856
5857         for (t = trace_types; t; t = t->next) {
5858                 if (strcmp(t->name, buf) == 0)
5859                         break;
5860         }
5861         if (!t) {
5862                 ret = -EINVAL;
5863                 goto out;
5864         }
5865         if (t == tr->current_trace)
5866                 goto out;
5867
5868 #ifdef CONFIG_TRACER_SNAPSHOT
5869         if (t->use_max_tr) {
5870                 arch_spin_lock(&tr->max_lock);
5871                 if (tr->cond_snapshot)
5872                         ret = -EBUSY;
5873                 arch_spin_unlock(&tr->max_lock);
5874                 if (ret)
5875                         goto out;
5876         }
5877 #endif
5878         /* Some tracers won't work on kernel command line */
5879         if (system_state < SYSTEM_RUNNING && t->noboot) {
5880                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5881                         t->name);
5882                 goto out;
5883         }
5884
5885         /* Some tracers are only allowed for the top level buffer */
5886         if (!trace_ok_for_array(t, tr)) {
5887                 ret = -EINVAL;
5888                 goto out;
5889         }
5890
5891         /* If trace pipe files are being read, we can't change the tracer */
5892         if (tr->trace_ref) {
5893                 ret = -EBUSY;
5894                 goto out;
5895         }
5896
5897         trace_branch_disable();
5898
5899         tr->current_trace->enabled--;
5900
5901         if (tr->current_trace->reset)
5902                 tr->current_trace->reset(tr);
5903
5904         /* Current trace needs to be nop_trace before synchronize_rcu */
5905         tr->current_trace = &nop_trace;
5906
5907 #ifdef CONFIG_TRACER_MAX_TRACE
5908         had_max_tr = tr->allocated_snapshot;
5909
5910         if (had_max_tr && !t->use_max_tr) {
5911                 /*
5912                  * We need to make sure that the update_max_tr sees that
5913                  * current_trace changed to nop_trace to keep it from
5914                  * swapping the buffers after we resize it.
5915                  * The update_max_tr is called from interrupts disabled
5916                  * so a synchronized_sched() is sufficient.
5917                  */
5918                 synchronize_rcu();
5919                 free_snapshot(tr);
5920         }
5921 #endif
5922
5923 #ifdef CONFIG_TRACER_MAX_TRACE
5924         if (t->use_max_tr && !had_max_tr) {
5925                 ret = tracing_alloc_snapshot_instance(tr);
5926                 if (ret < 0)
5927                         goto out;
5928         }
5929 #endif
5930
5931         if (t->init) {
5932                 ret = tracer_init(t, tr);
5933                 if (ret)
5934                         goto out;
5935         }
5936
5937         tr->current_trace = t;
5938         tr->current_trace->enabled++;
5939         trace_branch_enable(tr);
5940  out:
5941         mutex_unlock(&trace_types_lock);
5942
5943         return ret;
5944 }
5945
5946 static ssize_t
5947 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5948                         size_t cnt, loff_t *ppos)
5949 {
5950         struct trace_array *tr = filp->private_data;
5951         char buf[MAX_TRACER_SIZE+1];
5952         int i;
5953         size_t ret;
5954         int err;
5955
5956         ret = cnt;
5957
5958         if (cnt > MAX_TRACER_SIZE)
5959                 cnt = MAX_TRACER_SIZE;
5960
5961         if (copy_from_user(buf, ubuf, cnt))
5962                 return -EFAULT;
5963
5964         buf[cnt] = 0;
5965
5966         /* strip ending whitespace. */
5967         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5968                 buf[i] = 0;
5969
5970         err = tracing_set_tracer(tr, buf);
5971         if (err)
5972                 return err;
5973
5974         *ppos += ret;
5975
5976         return ret;
5977 }
5978
5979 static ssize_t
5980 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5981                    size_t cnt, loff_t *ppos)
5982 {
5983         char buf[64];
5984         int r;
5985
5986         r = snprintf(buf, sizeof(buf), "%ld\n",
5987                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5988         if (r > sizeof(buf))
5989                 r = sizeof(buf);
5990         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5991 }
5992
5993 static ssize_t
5994 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5995                     size_t cnt, loff_t *ppos)
5996 {
5997         unsigned long val;
5998         int ret;
5999
6000         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6001         if (ret)
6002                 return ret;
6003
6004         *ptr = val * 1000;
6005
6006         return cnt;
6007 }
6008
6009 static ssize_t
6010 tracing_thresh_read(struct file *filp, char __user *ubuf,
6011                     size_t cnt, loff_t *ppos)
6012 {
6013         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6014 }
6015
6016 static ssize_t
6017 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6018                      size_t cnt, loff_t *ppos)
6019 {
6020         struct trace_array *tr = filp->private_data;
6021         int ret;
6022
6023         mutex_lock(&trace_types_lock);
6024         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6025         if (ret < 0)
6026                 goto out;
6027
6028         if (tr->current_trace->update_thresh) {
6029                 ret = tr->current_trace->update_thresh(tr);
6030                 if (ret < 0)
6031                         goto out;
6032         }
6033
6034         ret = cnt;
6035 out:
6036         mutex_unlock(&trace_types_lock);
6037
6038         return ret;
6039 }
6040
6041 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6042
6043 static ssize_t
6044 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6045                      size_t cnt, loff_t *ppos)
6046 {
6047         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6048 }
6049
6050 static ssize_t
6051 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6052                       size_t cnt, loff_t *ppos)
6053 {
6054         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6055 }
6056
6057 #endif
6058
6059 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6060 {
6061         struct trace_array *tr = inode->i_private;
6062         struct trace_iterator *iter;
6063         int ret;
6064
6065         ret = tracing_check_open_get_tr(tr);
6066         if (ret)
6067                 return ret;
6068
6069         mutex_lock(&trace_types_lock);
6070
6071         /* create a buffer to store the information to pass to userspace */
6072         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6073         if (!iter) {
6074                 ret = -ENOMEM;
6075                 __trace_array_put(tr);
6076                 goto out;
6077         }
6078
6079         trace_seq_init(&iter->seq);
6080         iter->trace = tr->current_trace;
6081
6082         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6083                 ret = -ENOMEM;
6084                 goto fail;
6085         }
6086
6087         /* trace pipe does not show start of buffer */
6088         cpumask_setall(iter->started);
6089
6090         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6091                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6092
6093         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6094         if (trace_clocks[tr->clock_id].in_ns)
6095                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6096
6097         iter->tr = tr;
6098         iter->array_buffer = &tr->array_buffer;
6099         iter->cpu_file = tracing_get_cpu(inode);
6100         mutex_init(&iter->mutex);
6101         filp->private_data = iter;
6102
6103         if (iter->trace->pipe_open)
6104                 iter->trace->pipe_open(iter);
6105
6106         nonseekable_open(inode, filp);
6107
6108         tr->trace_ref++;
6109 out:
6110         mutex_unlock(&trace_types_lock);
6111         return ret;
6112
6113 fail:
6114         kfree(iter);
6115         __trace_array_put(tr);
6116         mutex_unlock(&trace_types_lock);
6117         return ret;
6118 }
6119
6120 static int tracing_release_pipe(struct inode *inode, struct file *file)
6121 {
6122         struct trace_iterator *iter = file->private_data;
6123         struct trace_array *tr = inode->i_private;
6124
6125         mutex_lock(&trace_types_lock);
6126
6127         tr->trace_ref--;
6128
6129         if (iter->trace->pipe_close)
6130                 iter->trace->pipe_close(iter);
6131
6132         mutex_unlock(&trace_types_lock);
6133
6134         free_cpumask_var(iter->started);
6135         mutex_destroy(&iter->mutex);
6136         kfree(iter);
6137
6138         trace_array_put(tr);
6139
6140         return 0;
6141 }
6142
6143 static __poll_t
6144 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6145 {
6146         struct trace_array *tr = iter->tr;
6147
6148         /* Iterators are static, they should be filled or empty */
6149         if (trace_buffer_iter(iter, iter->cpu_file))
6150                 return EPOLLIN | EPOLLRDNORM;
6151
6152         if (tr->trace_flags & TRACE_ITER_BLOCK)
6153                 /*
6154                  * Always select as readable when in blocking mode
6155                  */
6156                 return EPOLLIN | EPOLLRDNORM;
6157         else
6158                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6159                                              filp, poll_table);
6160 }
6161
6162 static __poll_t
6163 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6164 {
6165         struct trace_iterator *iter = filp->private_data;
6166
6167         return trace_poll(iter, filp, poll_table);
6168 }
6169
6170 /* Must be called with iter->mutex held. */
6171 static int tracing_wait_pipe(struct file *filp)
6172 {
6173         struct trace_iterator *iter = filp->private_data;
6174         int ret;
6175
6176         while (trace_empty(iter)) {
6177
6178                 if ((filp->f_flags & O_NONBLOCK)) {
6179                         return -EAGAIN;
6180                 }
6181
6182                 /*
6183                  * We block until we read something and tracing is disabled.
6184                  * We still block if tracing is disabled, but we have never
6185                  * read anything. This allows a user to cat this file, and
6186                  * then enable tracing. But after we have read something,
6187                  * we give an EOF when tracing is again disabled.
6188                  *
6189                  * iter->pos will be 0 if we haven't read anything.
6190                  */
6191                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6192                         break;
6193
6194                 mutex_unlock(&iter->mutex);
6195
6196                 ret = wait_on_pipe(iter, 0);
6197
6198                 mutex_lock(&iter->mutex);
6199
6200                 if (ret)
6201                         return ret;
6202         }
6203
6204         return 1;
6205 }
6206
6207 /*
6208  * Consumer reader.
6209  */
6210 static ssize_t
6211 tracing_read_pipe(struct file *filp, char __user *ubuf,
6212                   size_t cnt, loff_t *ppos)
6213 {
6214         struct trace_iterator *iter = filp->private_data;
6215         ssize_t sret;
6216
6217         /*
6218          * Avoid more than one consumer on a single file descriptor
6219          * This is just a matter of traces coherency, the ring buffer itself
6220          * is protected.
6221          */
6222         mutex_lock(&iter->mutex);
6223
6224         /* return any leftover data */
6225         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6226         if (sret != -EBUSY)
6227                 goto out;
6228
6229         trace_seq_init(&iter->seq);
6230
6231         if (iter->trace->read) {
6232                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6233                 if (sret)
6234                         goto out;
6235         }
6236
6237 waitagain:
6238         sret = tracing_wait_pipe(filp);
6239         if (sret <= 0)
6240                 goto out;
6241
6242         /* stop when tracing is finished */
6243         if (trace_empty(iter)) {
6244                 sret = 0;
6245                 goto out;
6246         }
6247
6248         if (cnt >= PAGE_SIZE)
6249                 cnt = PAGE_SIZE - 1;
6250
6251         /* reset all but tr, trace, and overruns */
6252         memset(&iter->seq, 0,
6253                sizeof(struct trace_iterator) -
6254                offsetof(struct trace_iterator, seq));
6255         cpumask_clear(iter->started);
6256         trace_seq_init(&iter->seq);
6257         iter->pos = -1;
6258
6259         trace_event_read_lock();
6260         trace_access_lock(iter->cpu_file);
6261         while (trace_find_next_entry_inc(iter) != NULL) {
6262                 enum print_line_t ret;
6263                 int save_len = iter->seq.seq.len;
6264
6265                 ret = print_trace_line(iter);
6266                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6267                         /* don't print partial lines */
6268                         iter->seq.seq.len = save_len;
6269                         break;
6270                 }
6271                 if (ret != TRACE_TYPE_NO_CONSUME)
6272                         trace_consume(iter);
6273
6274                 if (trace_seq_used(&iter->seq) >= cnt)
6275                         break;
6276
6277                 /*
6278                  * Setting the full flag means we reached the trace_seq buffer
6279                  * size and we should leave by partial output condition above.
6280                  * One of the trace_seq_* functions is not used properly.
6281                  */
6282                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6283                           iter->ent->type);
6284         }
6285         trace_access_unlock(iter->cpu_file);
6286         trace_event_read_unlock();
6287
6288         /* Now copy what we have to the user */
6289         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6290         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6291                 trace_seq_init(&iter->seq);
6292
6293         /*
6294          * If there was nothing to send to user, in spite of consuming trace
6295          * entries, go back to wait for more entries.
6296          */
6297         if (sret == -EBUSY)
6298                 goto waitagain;
6299
6300 out:
6301         mutex_unlock(&iter->mutex);
6302
6303         return sret;
6304 }
6305
6306 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6307                                      unsigned int idx)
6308 {
6309         __free_page(spd->pages[idx]);
6310 }
6311
6312 static size_t
6313 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6314 {
6315         size_t count;
6316         int save_len;
6317         int ret;
6318
6319         /* Seq buffer is page-sized, exactly what we need. */
6320         for (;;) {
6321                 save_len = iter->seq.seq.len;
6322                 ret = print_trace_line(iter);
6323
6324                 if (trace_seq_has_overflowed(&iter->seq)) {
6325                         iter->seq.seq.len = save_len;
6326                         break;
6327                 }
6328
6329                 /*
6330                  * This should not be hit, because it should only
6331                  * be set if the iter->seq overflowed. But check it
6332                  * anyway to be safe.
6333                  */
6334                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6335                         iter->seq.seq.len = save_len;
6336                         break;
6337                 }
6338
6339                 count = trace_seq_used(&iter->seq) - save_len;
6340                 if (rem < count) {
6341                         rem = 0;
6342                         iter->seq.seq.len = save_len;
6343                         break;
6344                 }
6345
6346                 if (ret != TRACE_TYPE_NO_CONSUME)
6347                         trace_consume(iter);
6348                 rem -= count;
6349                 if (!trace_find_next_entry_inc(iter))   {
6350                         rem = 0;
6351                         iter->ent = NULL;
6352                         break;
6353                 }
6354         }
6355
6356         return rem;
6357 }
6358
6359 static ssize_t tracing_splice_read_pipe(struct file *filp,
6360                                         loff_t *ppos,
6361                                         struct pipe_inode_info *pipe,
6362                                         size_t len,
6363                                         unsigned int flags)
6364 {
6365         struct page *pages_def[PIPE_DEF_BUFFERS];
6366         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6367         struct trace_iterator *iter = filp->private_data;
6368         struct splice_pipe_desc spd = {
6369                 .pages          = pages_def,
6370                 .partial        = partial_def,
6371                 .nr_pages       = 0, /* This gets updated below. */
6372                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6373                 .ops            = &default_pipe_buf_ops,
6374                 .spd_release    = tracing_spd_release_pipe,
6375         };
6376         ssize_t ret;
6377         size_t rem;
6378         unsigned int i;
6379
6380         if (splice_grow_spd(pipe, &spd))
6381                 return -ENOMEM;
6382
6383         mutex_lock(&iter->mutex);
6384
6385         if (iter->trace->splice_read) {
6386                 ret = iter->trace->splice_read(iter, filp,
6387                                                ppos, pipe, len, flags);
6388                 if (ret)
6389                         goto out_err;
6390         }
6391
6392         ret = tracing_wait_pipe(filp);
6393         if (ret <= 0)
6394                 goto out_err;
6395
6396         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6397                 ret = -EFAULT;
6398                 goto out_err;
6399         }
6400
6401         trace_event_read_lock();
6402         trace_access_lock(iter->cpu_file);
6403
6404         /* Fill as many pages as possible. */
6405         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6406                 spd.pages[i] = alloc_page(GFP_KERNEL);
6407                 if (!spd.pages[i])
6408                         break;
6409
6410                 rem = tracing_fill_pipe_page(rem, iter);
6411
6412                 /* Copy the data into the page, so we can start over. */
6413                 ret = trace_seq_to_buffer(&iter->seq,
6414                                           page_address(spd.pages[i]),
6415                                           trace_seq_used(&iter->seq));
6416                 if (ret < 0) {
6417                         __free_page(spd.pages[i]);
6418                         break;
6419                 }
6420                 spd.partial[i].offset = 0;
6421                 spd.partial[i].len = trace_seq_used(&iter->seq);
6422
6423                 trace_seq_init(&iter->seq);
6424         }
6425
6426         trace_access_unlock(iter->cpu_file);
6427         trace_event_read_unlock();
6428         mutex_unlock(&iter->mutex);
6429
6430         spd.nr_pages = i;
6431
6432         if (i)
6433                 ret = splice_to_pipe(pipe, &spd);
6434         else
6435                 ret = 0;
6436 out:
6437         splice_shrink_spd(&spd);
6438         return ret;
6439
6440 out_err:
6441         mutex_unlock(&iter->mutex);
6442         goto out;
6443 }
6444
6445 static ssize_t
6446 tracing_entries_read(struct file *filp, char __user *ubuf,
6447                      size_t cnt, loff_t *ppos)
6448 {
6449         struct inode *inode = file_inode(filp);
6450         struct trace_array *tr = inode->i_private;
6451         int cpu = tracing_get_cpu(inode);
6452         char buf[64];
6453         int r = 0;
6454         ssize_t ret;
6455
6456         mutex_lock(&trace_types_lock);
6457
6458         if (cpu == RING_BUFFER_ALL_CPUS) {
6459                 int cpu, buf_size_same;
6460                 unsigned long size;
6461
6462                 size = 0;
6463                 buf_size_same = 1;
6464                 /* check if all cpu sizes are same */
6465                 for_each_tracing_cpu(cpu) {
6466                         /* fill in the size from first enabled cpu */
6467                         if (size == 0)
6468                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6469                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6470                                 buf_size_same = 0;
6471                                 break;
6472                         }
6473                 }
6474
6475                 if (buf_size_same) {
6476                         if (!ring_buffer_expanded)
6477                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6478                                             size >> 10,
6479                                             trace_buf_size >> 10);
6480                         else
6481                                 r = sprintf(buf, "%lu\n", size >> 10);
6482                 } else
6483                         r = sprintf(buf, "X\n");
6484         } else
6485                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6486
6487         mutex_unlock(&trace_types_lock);
6488
6489         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6490         return ret;
6491 }
6492
6493 static ssize_t
6494 tracing_entries_write(struct file *filp, const char __user *ubuf,
6495                       size_t cnt, loff_t *ppos)
6496 {
6497         struct inode *inode = file_inode(filp);
6498         struct trace_array *tr = inode->i_private;
6499         unsigned long val;
6500         int ret;
6501
6502         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6503         if (ret)
6504                 return ret;
6505
6506         /* must have at least 1 entry */
6507         if (!val)
6508                 return -EINVAL;
6509
6510         /* value is in KB */
6511         val <<= 10;
6512         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6513         if (ret < 0)
6514                 return ret;
6515
6516         *ppos += cnt;
6517
6518         return cnt;
6519 }
6520
6521 static ssize_t
6522 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6523                                 size_t cnt, loff_t *ppos)
6524 {
6525         struct trace_array *tr = filp->private_data;
6526         char buf[64];
6527         int r, cpu;
6528         unsigned long size = 0, expanded_size = 0;
6529
6530         mutex_lock(&trace_types_lock);
6531         for_each_tracing_cpu(cpu) {
6532                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6533                 if (!ring_buffer_expanded)
6534                         expanded_size += trace_buf_size >> 10;
6535         }
6536         if (ring_buffer_expanded)
6537                 r = sprintf(buf, "%lu\n", size);
6538         else
6539                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6540         mutex_unlock(&trace_types_lock);
6541
6542         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6543 }
6544
6545 static ssize_t
6546 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6547                           size_t cnt, loff_t *ppos)
6548 {
6549         /*
6550          * There is no need to read what the user has written, this function
6551          * is just to make sure that there is no error when "echo" is used
6552          */
6553
6554         *ppos += cnt;
6555
6556         return cnt;
6557 }
6558
6559 static int
6560 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6561 {
6562         struct trace_array *tr = inode->i_private;
6563
6564         /* disable tracing ? */
6565         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6566                 tracer_tracing_off(tr);
6567         /* resize the ring buffer to 0 */
6568         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6569
6570         trace_array_put(tr);
6571
6572         return 0;
6573 }
6574
6575 static ssize_t
6576 tracing_mark_write(struct file *filp, const char __user *ubuf,
6577                                         size_t cnt, loff_t *fpos)
6578 {
6579         struct trace_array *tr = filp->private_data;
6580         struct ring_buffer_event *event;
6581         enum event_trigger_type tt = ETT_NONE;
6582         struct trace_buffer *buffer;
6583         struct print_entry *entry;
6584         unsigned long irq_flags;
6585         ssize_t written;
6586         int size;
6587         int len;
6588
6589 /* Used in tracing_mark_raw_write() as well */
6590 #define FAULTED_STR "<faulted>"
6591 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6592
6593         if (tracing_disabled)
6594                 return -EINVAL;
6595
6596         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6597                 return -EINVAL;
6598
6599         if (cnt > TRACE_BUF_SIZE)
6600                 cnt = TRACE_BUF_SIZE;
6601
6602         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6603
6604         local_save_flags(irq_flags);
6605         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6606
6607         /* If less than "<faulted>", then make sure we can still add that */
6608         if (cnt < FAULTED_SIZE)
6609                 size += FAULTED_SIZE - cnt;
6610
6611         buffer = tr->array_buffer.buffer;
6612         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6613                                             irq_flags, preempt_count());
6614         if (unlikely(!event))
6615                 /* Ring buffer disabled, return as if not open for write */
6616                 return -EBADF;
6617
6618         entry = ring_buffer_event_data(event);
6619         entry->ip = _THIS_IP_;
6620
6621         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6622         if (len) {
6623                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6624                 cnt = FAULTED_SIZE;
6625                 written = -EFAULT;
6626         } else
6627                 written = cnt;
6628         len = cnt;
6629
6630         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6631                 /* do not add \n before testing triggers, but add \0 */
6632                 entry->buf[cnt] = '\0';
6633                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6634         }
6635
6636         if (entry->buf[cnt - 1] != '\n') {
6637                 entry->buf[cnt] = '\n';
6638                 entry->buf[cnt + 1] = '\0';
6639         } else
6640                 entry->buf[cnt] = '\0';
6641
6642         __buffer_unlock_commit(buffer, event);
6643
6644         if (tt)
6645                 event_triggers_post_call(tr->trace_marker_file, tt);
6646
6647         if (written > 0)
6648                 *fpos += written;
6649
6650         return written;
6651 }
6652
6653 /* Limit it for now to 3K (including tag) */
6654 #define RAW_DATA_MAX_SIZE (1024*3)
6655
6656 static ssize_t
6657 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6658                                         size_t cnt, loff_t *fpos)
6659 {
6660         struct trace_array *tr = filp->private_data;
6661         struct ring_buffer_event *event;
6662         struct trace_buffer *buffer;
6663         struct raw_data_entry *entry;
6664         unsigned long irq_flags;
6665         ssize_t written;
6666         int size;
6667         int len;
6668
6669 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6670
6671         if (tracing_disabled)
6672                 return -EINVAL;
6673
6674         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6675                 return -EINVAL;
6676
6677         /* The marker must at least have a tag id */
6678         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6679                 return -EINVAL;
6680
6681         if (cnt > TRACE_BUF_SIZE)
6682                 cnt = TRACE_BUF_SIZE;
6683
6684         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6685
6686         local_save_flags(irq_flags);
6687         size = sizeof(*entry) + cnt;
6688         if (cnt < FAULT_SIZE_ID)
6689                 size += FAULT_SIZE_ID - cnt;
6690
6691         buffer = tr->array_buffer.buffer;
6692         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6693                                             irq_flags, preempt_count());
6694         if (!event)
6695                 /* Ring buffer disabled, return as if not open for write */
6696                 return -EBADF;
6697
6698         entry = ring_buffer_event_data(event);
6699
6700         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6701         if (len) {
6702                 entry->id = -1;
6703                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6704                 written = -EFAULT;
6705         } else
6706                 written = cnt;
6707
6708         __buffer_unlock_commit(buffer, event);
6709
6710         if (written > 0)
6711                 *fpos += written;
6712
6713         return written;
6714 }
6715
6716 static int tracing_clock_show(struct seq_file *m, void *v)
6717 {
6718         struct trace_array *tr = m->private;
6719         int i;
6720
6721         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6722                 seq_printf(m,
6723                         "%s%s%s%s", i ? " " : "",
6724                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6725                         i == tr->clock_id ? "]" : "");
6726         seq_putc(m, '\n');
6727
6728         return 0;
6729 }
6730
6731 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6732 {
6733         int i;
6734
6735         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6736                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6737                         break;
6738         }
6739         if (i == ARRAY_SIZE(trace_clocks))
6740                 return -EINVAL;
6741
6742         mutex_lock(&trace_types_lock);
6743
6744         tr->clock_id = i;
6745
6746         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6747
6748         /*
6749          * New clock may not be consistent with the previous clock.
6750          * Reset the buffer so that it doesn't have incomparable timestamps.
6751          */
6752         tracing_reset_online_cpus(&tr->array_buffer);
6753
6754 #ifdef CONFIG_TRACER_MAX_TRACE
6755         if (tr->max_buffer.buffer)
6756                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6757         tracing_reset_online_cpus(&tr->max_buffer);
6758 #endif
6759
6760         mutex_unlock(&trace_types_lock);
6761
6762         return 0;
6763 }
6764
6765 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6766                                    size_t cnt, loff_t *fpos)
6767 {
6768         struct seq_file *m = filp->private_data;
6769         struct trace_array *tr = m->private;
6770         char buf[64];
6771         const char *clockstr;
6772         int ret;
6773
6774         if (cnt >= sizeof(buf))
6775                 return -EINVAL;
6776
6777         if (copy_from_user(buf, ubuf, cnt))
6778                 return -EFAULT;
6779
6780         buf[cnt] = 0;
6781
6782         clockstr = strstrip(buf);
6783
6784         ret = tracing_set_clock(tr, clockstr);
6785         if (ret)
6786                 return ret;
6787
6788         *fpos += cnt;
6789
6790         return cnt;
6791 }
6792
6793 static int tracing_clock_open(struct inode *inode, struct file *file)
6794 {
6795         struct trace_array *tr = inode->i_private;
6796         int ret;
6797
6798         ret = tracing_check_open_get_tr(tr);
6799         if (ret)
6800                 return ret;
6801
6802         ret = single_open(file, tracing_clock_show, inode->i_private);
6803         if (ret < 0)
6804                 trace_array_put(tr);
6805
6806         return ret;
6807 }
6808
6809 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6810 {
6811         struct trace_array *tr = m->private;
6812
6813         mutex_lock(&trace_types_lock);
6814
6815         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6816                 seq_puts(m, "delta [absolute]\n");
6817         else
6818                 seq_puts(m, "[delta] absolute\n");
6819
6820         mutex_unlock(&trace_types_lock);
6821
6822         return 0;
6823 }
6824
6825 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6826 {
6827         struct trace_array *tr = inode->i_private;
6828         int ret;
6829
6830         ret = tracing_check_open_get_tr(tr);
6831         if (ret)
6832                 return ret;
6833
6834         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6835         if (ret < 0)
6836                 trace_array_put(tr);
6837
6838         return ret;
6839 }
6840
6841 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6842 {
6843         int ret = 0;
6844
6845         mutex_lock(&trace_types_lock);
6846
6847         if (abs && tr->time_stamp_abs_ref++)
6848                 goto out;
6849
6850         if (!abs) {
6851                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6852                         ret = -EINVAL;
6853                         goto out;
6854                 }
6855
6856                 if (--tr->time_stamp_abs_ref)
6857                         goto out;
6858         }
6859
6860         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6861
6862 #ifdef CONFIG_TRACER_MAX_TRACE
6863         if (tr->max_buffer.buffer)
6864                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6865 #endif
6866  out:
6867         mutex_unlock(&trace_types_lock);
6868
6869         return ret;
6870 }
6871
6872 struct ftrace_buffer_info {
6873         struct trace_iterator   iter;
6874         void                    *spare;
6875         unsigned int            spare_cpu;
6876         unsigned int            read;
6877 };
6878
6879 #ifdef CONFIG_TRACER_SNAPSHOT
6880 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6881 {
6882         struct trace_array *tr = inode->i_private;
6883         struct trace_iterator *iter;
6884         struct seq_file *m;
6885         int ret;
6886
6887         ret = tracing_check_open_get_tr(tr);
6888         if (ret)
6889                 return ret;
6890
6891         if (file->f_mode & FMODE_READ) {
6892                 iter = __tracing_open(inode, file, true);
6893                 if (IS_ERR(iter))
6894                         ret = PTR_ERR(iter);
6895         } else {
6896                 /* Writes still need the seq_file to hold the private data */
6897                 ret = -ENOMEM;
6898                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6899                 if (!m)
6900                         goto out;
6901                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6902                 if (!iter) {
6903                         kfree(m);
6904                         goto out;
6905                 }
6906                 ret = 0;
6907
6908                 iter->tr = tr;
6909                 iter->array_buffer = &tr->max_buffer;
6910                 iter->cpu_file = tracing_get_cpu(inode);
6911                 m->private = iter;
6912                 file->private_data = m;
6913         }
6914 out:
6915         if (ret < 0)
6916                 trace_array_put(tr);
6917
6918         return ret;
6919 }
6920
6921 static ssize_t
6922 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6923                        loff_t *ppos)
6924 {
6925         struct seq_file *m = filp->private_data;
6926         struct trace_iterator *iter = m->private;
6927         struct trace_array *tr = iter->tr;
6928         unsigned long val;
6929         int ret;
6930
6931         ret = tracing_update_buffers();
6932         if (ret < 0)
6933                 return ret;
6934
6935         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6936         if (ret)
6937                 return ret;
6938
6939         mutex_lock(&trace_types_lock);
6940
6941         if (tr->current_trace->use_max_tr) {
6942                 ret = -EBUSY;
6943                 goto out;
6944         }
6945
6946         arch_spin_lock(&tr->max_lock);
6947         if (tr->cond_snapshot)
6948                 ret = -EBUSY;
6949         arch_spin_unlock(&tr->max_lock);
6950         if (ret)
6951                 goto out;
6952
6953         switch (val) {
6954         case 0:
6955                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6956                         ret = -EINVAL;
6957                         break;
6958                 }
6959                 if (tr->allocated_snapshot)
6960                         free_snapshot(tr);
6961                 break;
6962         case 1:
6963 /* Only allow per-cpu swap if the ring buffer supports it */
6964 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6965                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6966                         ret = -EINVAL;
6967                         break;
6968                 }
6969 #endif
6970                 if (tr->allocated_snapshot)
6971                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6972                                         &tr->array_buffer, iter->cpu_file);
6973                 else
6974                         ret = tracing_alloc_snapshot_instance(tr);
6975                 if (ret < 0)
6976                         break;
6977                 local_irq_disable();
6978                 /* Now, we're going to swap */
6979                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6980                         update_max_tr(tr, current, smp_processor_id(), NULL);
6981                 else
6982                         update_max_tr_single(tr, current, iter->cpu_file);
6983                 local_irq_enable();
6984                 break;
6985         default:
6986                 if (tr->allocated_snapshot) {
6987                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6988                                 tracing_reset_online_cpus(&tr->max_buffer);
6989                         else
6990                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6991                 }
6992                 break;
6993         }
6994
6995         if (ret >= 0) {
6996                 *ppos += cnt;
6997                 ret = cnt;
6998         }
6999 out:
7000         mutex_unlock(&trace_types_lock);
7001         return ret;
7002 }
7003
7004 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7005 {
7006         struct seq_file *m = file->private_data;
7007         int ret;
7008
7009         ret = tracing_release(inode, file);
7010
7011         if (file->f_mode & FMODE_READ)
7012                 return ret;
7013
7014         /* If write only, the seq_file is just a stub */
7015         if (m)
7016                 kfree(m->private);
7017         kfree(m);
7018
7019         return 0;
7020 }
7021
7022 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7023 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7024                                     size_t count, loff_t *ppos);
7025 static int tracing_buffers_release(struct inode *inode, struct file *file);
7026 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7027                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7028
7029 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7030 {
7031         struct ftrace_buffer_info *info;
7032         int ret;
7033
7034         /* The following checks for tracefs lockdown */
7035         ret = tracing_buffers_open(inode, filp);
7036         if (ret < 0)
7037                 return ret;
7038
7039         info = filp->private_data;
7040
7041         if (info->iter.trace->use_max_tr) {
7042                 tracing_buffers_release(inode, filp);
7043                 return -EBUSY;
7044         }
7045
7046         info->iter.snapshot = true;
7047         info->iter.array_buffer = &info->iter.tr->max_buffer;
7048
7049         return ret;
7050 }
7051
7052 #endif /* CONFIG_TRACER_SNAPSHOT */
7053
7054
7055 static const struct file_operations tracing_thresh_fops = {
7056         .open           = tracing_open_generic,
7057         .read           = tracing_thresh_read,
7058         .write          = tracing_thresh_write,
7059         .llseek         = generic_file_llseek,
7060 };
7061
7062 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7063 static const struct file_operations tracing_max_lat_fops = {
7064         .open           = tracing_open_generic,
7065         .read           = tracing_max_lat_read,
7066         .write          = tracing_max_lat_write,
7067         .llseek         = generic_file_llseek,
7068 };
7069 #endif
7070
7071 static const struct file_operations set_tracer_fops = {
7072         .open           = tracing_open_generic,
7073         .read           = tracing_set_trace_read,
7074         .write          = tracing_set_trace_write,
7075         .llseek         = generic_file_llseek,
7076 };
7077
7078 static const struct file_operations tracing_pipe_fops = {
7079         .open           = tracing_open_pipe,
7080         .poll           = tracing_poll_pipe,
7081         .read           = tracing_read_pipe,
7082         .splice_read    = tracing_splice_read_pipe,
7083         .release        = tracing_release_pipe,
7084         .llseek         = no_llseek,
7085 };
7086
7087 static const struct file_operations tracing_entries_fops = {
7088         .open           = tracing_open_generic_tr,
7089         .read           = tracing_entries_read,
7090         .write          = tracing_entries_write,
7091         .llseek         = generic_file_llseek,
7092         .release        = tracing_release_generic_tr,
7093 };
7094
7095 static const struct file_operations tracing_total_entries_fops = {
7096         .open           = tracing_open_generic_tr,
7097         .read           = tracing_total_entries_read,
7098         .llseek         = generic_file_llseek,
7099         .release        = tracing_release_generic_tr,
7100 };
7101
7102 static const struct file_operations tracing_free_buffer_fops = {
7103         .open           = tracing_open_generic_tr,
7104         .write          = tracing_free_buffer_write,
7105         .release        = tracing_free_buffer_release,
7106 };
7107
7108 static const struct file_operations tracing_mark_fops = {
7109         .open           = tracing_open_generic_tr,
7110         .write          = tracing_mark_write,
7111         .llseek         = generic_file_llseek,
7112         .release        = tracing_release_generic_tr,
7113 };
7114
7115 static const struct file_operations tracing_mark_raw_fops = {
7116         .open           = tracing_open_generic_tr,
7117         .write          = tracing_mark_raw_write,
7118         .llseek         = generic_file_llseek,
7119         .release        = tracing_release_generic_tr,
7120 };
7121
7122 static const struct file_operations trace_clock_fops = {
7123         .open           = tracing_clock_open,
7124         .read           = seq_read,
7125         .llseek         = seq_lseek,
7126         .release        = tracing_single_release_tr,
7127         .write          = tracing_clock_write,
7128 };
7129
7130 static const struct file_operations trace_time_stamp_mode_fops = {
7131         .open           = tracing_time_stamp_mode_open,
7132         .read           = seq_read,
7133         .llseek         = seq_lseek,
7134         .release        = tracing_single_release_tr,
7135 };
7136
7137 #ifdef CONFIG_TRACER_SNAPSHOT
7138 static const struct file_operations snapshot_fops = {
7139         .open           = tracing_snapshot_open,
7140         .read           = seq_read,
7141         .write          = tracing_snapshot_write,
7142         .llseek         = tracing_lseek,
7143         .release        = tracing_snapshot_release,
7144 };
7145
7146 static const struct file_operations snapshot_raw_fops = {
7147         .open           = snapshot_raw_open,
7148         .read           = tracing_buffers_read,
7149         .release        = tracing_buffers_release,
7150         .splice_read    = tracing_buffers_splice_read,
7151         .llseek         = no_llseek,
7152 };
7153
7154 #endif /* CONFIG_TRACER_SNAPSHOT */
7155
7156 #define TRACING_LOG_ERRS_MAX    8
7157 #define TRACING_LOG_LOC_MAX     128
7158
7159 #define CMD_PREFIX "  Command: "
7160
7161 struct err_info {
7162         const char      **errs; /* ptr to loc-specific array of err strings */
7163         u8              type;   /* index into errs -> specific err string */
7164         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7165         u64             ts;
7166 };
7167
7168 struct tracing_log_err {
7169         struct list_head        list;
7170         struct err_info         info;
7171         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7172         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7173 };
7174
7175 static DEFINE_MUTEX(tracing_err_log_lock);
7176
7177 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7178 {
7179         struct tracing_log_err *err;
7180
7181         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7182                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7183                 if (!err)
7184                         err = ERR_PTR(-ENOMEM);
7185                 tr->n_err_log_entries++;
7186
7187                 return err;
7188         }
7189
7190         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7191         list_del(&err->list);
7192
7193         return err;
7194 }
7195
7196 /**
7197  * err_pos - find the position of a string within a command for error careting
7198  * @cmd: The tracing command that caused the error
7199  * @str: The string to position the caret at within @cmd
7200  *
7201  * Finds the position of the first occurence of @str within @cmd.  The
7202  * return value can be passed to tracing_log_err() for caret placement
7203  * within @cmd.
7204  *
7205  * Returns the index within @cmd of the first occurence of @str or 0
7206  * if @str was not found.
7207  */
7208 unsigned int err_pos(char *cmd, const char *str)
7209 {
7210         char *found;
7211
7212         if (WARN_ON(!strlen(cmd)))
7213                 return 0;
7214
7215         found = strstr(cmd, str);
7216         if (found)
7217                 return found - cmd;
7218
7219         return 0;
7220 }
7221
7222 /**
7223  * tracing_log_err - write an error to the tracing error log
7224  * @tr: The associated trace array for the error (NULL for top level array)
7225  * @loc: A string describing where the error occurred
7226  * @cmd: The tracing command that caused the error
7227  * @errs: The array of loc-specific static error strings
7228  * @type: The index into errs[], which produces the specific static err string
7229  * @pos: The position the caret should be placed in the cmd
7230  *
7231  * Writes an error into tracing/error_log of the form:
7232  *
7233  * <loc>: error: <text>
7234  *   Command: <cmd>
7235  *              ^
7236  *
7237  * tracing/error_log is a small log file containing the last
7238  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7239  * unless there has been a tracing error, and the error log can be
7240  * cleared and have its memory freed by writing the empty string in
7241  * truncation mode to it i.e. echo > tracing/error_log.
7242  *
7243  * NOTE: the @errs array along with the @type param are used to
7244  * produce a static error string - this string is not copied and saved
7245  * when the error is logged - only a pointer to it is saved.  See
7246  * existing callers for examples of how static strings are typically
7247  * defined for use with tracing_log_err().
7248  */
7249 void tracing_log_err(struct trace_array *tr,
7250                      const char *loc, const char *cmd,
7251                      const char **errs, u8 type, u8 pos)
7252 {
7253         struct tracing_log_err *err;
7254
7255         if (!tr)
7256                 tr = &global_trace;
7257
7258         mutex_lock(&tracing_err_log_lock);
7259         err = get_tracing_log_err(tr);
7260         if (PTR_ERR(err) == -ENOMEM) {
7261                 mutex_unlock(&tracing_err_log_lock);
7262                 return;
7263         }
7264
7265         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7266         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7267
7268         err->info.errs = errs;
7269         err->info.type = type;
7270         err->info.pos = pos;
7271         err->info.ts = local_clock();
7272
7273         list_add_tail(&err->list, &tr->err_log);
7274         mutex_unlock(&tracing_err_log_lock);
7275 }
7276
7277 static void clear_tracing_err_log(struct trace_array *tr)
7278 {
7279         struct tracing_log_err *err, *next;
7280
7281         mutex_lock(&tracing_err_log_lock);
7282         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7283                 list_del(&err->list);
7284                 kfree(err);
7285         }
7286
7287         tr->n_err_log_entries = 0;
7288         mutex_unlock(&tracing_err_log_lock);
7289 }
7290
7291 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7292 {
7293         struct trace_array *tr = m->private;
7294
7295         mutex_lock(&tracing_err_log_lock);
7296
7297         return seq_list_start(&tr->err_log, *pos);
7298 }
7299
7300 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7301 {
7302         struct trace_array *tr = m->private;
7303
7304         return seq_list_next(v, &tr->err_log, pos);
7305 }
7306
7307 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7308 {
7309         mutex_unlock(&tracing_err_log_lock);
7310 }
7311
7312 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7313 {
7314         u8 i;
7315
7316         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7317                 seq_putc(m, ' ');
7318         for (i = 0; i < pos; i++)
7319                 seq_putc(m, ' ');
7320         seq_puts(m, "^\n");
7321 }
7322
7323 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7324 {
7325         struct tracing_log_err *err = v;
7326
7327         if (err) {
7328                 const char *err_text = err->info.errs[err->info.type];
7329                 u64 sec = err->info.ts;
7330                 u32 nsec;
7331
7332                 nsec = do_div(sec, NSEC_PER_SEC);
7333                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7334                            err->loc, err_text);
7335                 seq_printf(m, "%s", err->cmd);
7336                 tracing_err_log_show_pos(m, err->info.pos);
7337         }
7338
7339         return 0;
7340 }
7341
7342 static const struct seq_operations tracing_err_log_seq_ops = {
7343         .start  = tracing_err_log_seq_start,
7344         .next   = tracing_err_log_seq_next,
7345         .stop   = tracing_err_log_seq_stop,
7346         .show   = tracing_err_log_seq_show
7347 };
7348
7349 static int tracing_err_log_open(struct inode *inode, struct file *file)
7350 {
7351         struct trace_array *tr = inode->i_private;
7352         int ret = 0;
7353
7354         ret = tracing_check_open_get_tr(tr);
7355         if (ret)
7356                 return ret;
7357
7358         /* If this file was opened for write, then erase contents */
7359         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7360                 clear_tracing_err_log(tr);
7361
7362         if (file->f_mode & FMODE_READ) {
7363                 ret = seq_open(file, &tracing_err_log_seq_ops);
7364                 if (!ret) {
7365                         struct seq_file *m = file->private_data;
7366                         m->private = tr;
7367                 } else {
7368                         trace_array_put(tr);
7369                 }
7370         }
7371         return ret;
7372 }
7373
7374 static ssize_t tracing_err_log_write(struct file *file,
7375                                      const char __user *buffer,
7376                                      size_t count, loff_t *ppos)
7377 {
7378         return count;
7379 }
7380
7381 static int tracing_err_log_release(struct inode *inode, struct file *file)
7382 {
7383         struct trace_array *tr = inode->i_private;
7384
7385         trace_array_put(tr);
7386
7387         if (file->f_mode & FMODE_READ)
7388                 seq_release(inode, file);
7389
7390         return 0;
7391 }
7392
7393 static const struct file_operations tracing_err_log_fops = {
7394         .open           = tracing_err_log_open,
7395         .write          = tracing_err_log_write,
7396         .read           = seq_read,
7397         .llseek         = seq_lseek,
7398         .release        = tracing_err_log_release,
7399 };
7400
7401 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7402 {
7403         struct trace_array *tr = inode->i_private;
7404         struct ftrace_buffer_info *info;
7405         int ret;
7406
7407         ret = tracing_check_open_get_tr(tr);
7408         if (ret)
7409                 return ret;
7410
7411         info = kzalloc(sizeof(*info), GFP_KERNEL);
7412         if (!info) {
7413                 trace_array_put(tr);
7414                 return -ENOMEM;
7415         }
7416
7417         mutex_lock(&trace_types_lock);
7418
7419         info->iter.tr           = tr;
7420         info->iter.cpu_file     = tracing_get_cpu(inode);
7421         info->iter.trace        = tr->current_trace;
7422         info->iter.array_buffer = &tr->array_buffer;
7423         info->spare             = NULL;
7424         /* Force reading ring buffer for first read */
7425         info->read              = (unsigned int)-1;
7426
7427         filp->private_data = info;
7428
7429         tr->trace_ref++;
7430
7431         mutex_unlock(&trace_types_lock);
7432
7433         ret = nonseekable_open(inode, filp);
7434         if (ret < 0)
7435                 trace_array_put(tr);
7436
7437         return ret;
7438 }
7439
7440 static __poll_t
7441 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7442 {
7443         struct ftrace_buffer_info *info = filp->private_data;
7444         struct trace_iterator *iter = &info->iter;
7445
7446         return trace_poll(iter, filp, poll_table);
7447 }
7448
7449 static ssize_t
7450 tracing_buffers_read(struct file *filp, char __user *ubuf,
7451                      size_t count, loff_t *ppos)
7452 {
7453         struct ftrace_buffer_info *info = filp->private_data;
7454         struct trace_iterator *iter = &info->iter;
7455         ssize_t ret = 0;
7456         ssize_t size;
7457
7458         if (!count)
7459                 return 0;
7460
7461 #ifdef CONFIG_TRACER_MAX_TRACE
7462         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7463                 return -EBUSY;
7464 #endif
7465
7466         if (!info->spare) {
7467                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7468                                                           iter->cpu_file);
7469                 if (IS_ERR(info->spare)) {
7470                         ret = PTR_ERR(info->spare);
7471                         info->spare = NULL;
7472                 } else {
7473                         info->spare_cpu = iter->cpu_file;
7474                 }
7475         }
7476         if (!info->spare)
7477                 return ret;
7478
7479         /* Do we have previous read data to read? */
7480         if (info->read < PAGE_SIZE)
7481                 goto read;
7482
7483  again:
7484         trace_access_lock(iter->cpu_file);
7485         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7486                                     &info->spare,
7487                                     count,
7488                                     iter->cpu_file, 0);
7489         trace_access_unlock(iter->cpu_file);
7490
7491         if (ret < 0) {
7492                 if (trace_empty(iter)) {
7493                         if ((filp->f_flags & O_NONBLOCK))
7494                                 return -EAGAIN;
7495
7496                         ret = wait_on_pipe(iter, 0);
7497                         if (ret)
7498                                 return ret;
7499
7500                         goto again;
7501                 }
7502                 return 0;
7503         }
7504
7505         info->read = 0;
7506  read:
7507         size = PAGE_SIZE - info->read;
7508         if (size > count)
7509                 size = count;
7510
7511         ret = copy_to_user(ubuf, info->spare + info->read, size);
7512         if (ret == size)
7513                 return -EFAULT;
7514
7515         size -= ret;
7516
7517         *ppos += size;
7518         info->read += size;
7519
7520         return size;
7521 }
7522
7523 static int tracing_buffers_release(struct inode *inode, struct file *file)
7524 {
7525         struct ftrace_buffer_info *info = file->private_data;
7526         struct trace_iterator *iter = &info->iter;
7527
7528         mutex_lock(&trace_types_lock);
7529
7530         iter->tr->trace_ref--;
7531
7532         __trace_array_put(iter->tr);
7533
7534         if (info->spare)
7535                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7536                                            info->spare_cpu, info->spare);
7537         kfree(info);
7538
7539         mutex_unlock(&trace_types_lock);
7540
7541         return 0;
7542 }
7543
7544 struct buffer_ref {
7545         struct trace_buffer     *buffer;
7546         void                    *page;
7547         int                     cpu;
7548         refcount_t              refcount;
7549 };
7550
7551 static void buffer_ref_release(struct buffer_ref *ref)
7552 {
7553         if (!refcount_dec_and_test(&ref->refcount))
7554                 return;
7555         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7556         kfree(ref);
7557 }
7558
7559 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7560                                     struct pipe_buffer *buf)
7561 {
7562         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7563
7564         buffer_ref_release(ref);
7565         buf->private = 0;
7566 }
7567
7568 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7569                                 struct pipe_buffer *buf)
7570 {
7571         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7572
7573         if (refcount_read(&ref->refcount) > INT_MAX/2)
7574                 return false;
7575
7576         refcount_inc(&ref->refcount);
7577         return true;
7578 }
7579
7580 /* Pipe buffer operations for a buffer. */
7581 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7582         .release                = buffer_pipe_buf_release,
7583         .get                    = buffer_pipe_buf_get,
7584 };
7585
7586 /*
7587  * Callback from splice_to_pipe(), if we need to release some pages
7588  * at the end of the spd in case we error'ed out in filling the pipe.
7589  */
7590 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7591 {
7592         struct buffer_ref *ref =
7593                 (struct buffer_ref *)spd->partial[i].private;
7594
7595         buffer_ref_release(ref);
7596         spd->partial[i].private = 0;
7597 }
7598
7599 static ssize_t
7600 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7601                             struct pipe_inode_info *pipe, size_t len,
7602                             unsigned int flags)
7603 {
7604         struct ftrace_buffer_info *info = file->private_data;
7605         struct trace_iterator *iter = &info->iter;
7606         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7607         struct page *pages_def[PIPE_DEF_BUFFERS];
7608         struct splice_pipe_desc spd = {
7609                 .pages          = pages_def,
7610                 .partial        = partial_def,
7611                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7612                 .ops            = &buffer_pipe_buf_ops,
7613                 .spd_release    = buffer_spd_release,
7614         };
7615         struct buffer_ref *ref;
7616         int entries, i;
7617         ssize_t ret = 0;
7618
7619 #ifdef CONFIG_TRACER_MAX_TRACE
7620         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7621                 return -EBUSY;
7622 #endif
7623
7624         if (*ppos & (PAGE_SIZE - 1))
7625                 return -EINVAL;
7626
7627         if (len & (PAGE_SIZE - 1)) {
7628                 if (len < PAGE_SIZE)
7629                         return -EINVAL;
7630                 len &= PAGE_MASK;
7631         }
7632
7633         if (splice_grow_spd(pipe, &spd))
7634                 return -ENOMEM;
7635
7636  again:
7637         trace_access_lock(iter->cpu_file);
7638         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7639
7640         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7641                 struct page *page;
7642                 int r;
7643
7644                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7645                 if (!ref) {
7646                         ret = -ENOMEM;
7647                         break;
7648                 }
7649
7650                 refcount_set(&ref->refcount, 1);
7651                 ref->buffer = iter->array_buffer->buffer;
7652                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7653                 if (IS_ERR(ref->page)) {
7654                         ret = PTR_ERR(ref->page);
7655                         ref->page = NULL;
7656                         kfree(ref);
7657                         break;
7658                 }
7659                 ref->cpu = iter->cpu_file;
7660
7661                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7662                                           len, iter->cpu_file, 1);
7663                 if (r < 0) {
7664                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7665                                                    ref->page);
7666                         kfree(ref);
7667                         break;
7668                 }
7669
7670                 page = virt_to_page(ref->page);
7671
7672                 spd.pages[i] = page;
7673                 spd.partial[i].len = PAGE_SIZE;
7674                 spd.partial[i].offset = 0;
7675                 spd.partial[i].private = (unsigned long)ref;
7676                 spd.nr_pages++;
7677                 *ppos += PAGE_SIZE;
7678
7679                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7680         }
7681
7682         trace_access_unlock(iter->cpu_file);
7683         spd.nr_pages = i;
7684
7685         /* did we read anything? */
7686         if (!spd.nr_pages) {
7687                 if (ret)
7688                         goto out;
7689
7690                 ret = -EAGAIN;
7691                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7692                         goto out;
7693
7694                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7695                 if (ret)
7696                         goto out;
7697
7698                 goto again;
7699         }
7700
7701         ret = splice_to_pipe(pipe, &spd);
7702 out:
7703         splice_shrink_spd(&spd);
7704
7705         return ret;
7706 }
7707
7708 static const struct file_operations tracing_buffers_fops = {
7709         .open           = tracing_buffers_open,
7710         .read           = tracing_buffers_read,
7711         .poll           = tracing_buffers_poll,
7712         .release        = tracing_buffers_release,
7713         .splice_read    = tracing_buffers_splice_read,
7714         .llseek         = no_llseek,
7715 };
7716
7717 static ssize_t
7718 tracing_stats_read(struct file *filp, char __user *ubuf,
7719                    size_t count, loff_t *ppos)
7720 {
7721         struct inode *inode = file_inode(filp);
7722         struct trace_array *tr = inode->i_private;
7723         struct array_buffer *trace_buf = &tr->array_buffer;
7724         int cpu = tracing_get_cpu(inode);
7725         struct trace_seq *s;
7726         unsigned long cnt;
7727         unsigned long long t;
7728         unsigned long usec_rem;
7729
7730         s = kmalloc(sizeof(*s), GFP_KERNEL);
7731         if (!s)
7732                 return -ENOMEM;
7733
7734         trace_seq_init(s);
7735
7736         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7737         trace_seq_printf(s, "entries: %ld\n", cnt);
7738
7739         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7740         trace_seq_printf(s, "overrun: %ld\n", cnt);
7741
7742         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7743         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7744
7745         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7746         trace_seq_printf(s, "bytes: %ld\n", cnt);
7747
7748         if (trace_clocks[tr->clock_id].in_ns) {
7749                 /* local or global for trace_clock */
7750                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7751                 usec_rem = do_div(t, USEC_PER_SEC);
7752                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7753                                                                 t, usec_rem);
7754
7755                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7756                 usec_rem = do_div(t, USEC_PER_SEC);
7757                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7758         } else {
7759                 /* counter or tsc mode for trace_clock */
7760                 trace_seq_printf(s, "oldest event ts: %llu\n",
7761                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7762
7763                 trace_seq_printf(s, "now ts: %llu\n",
7764                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7765         }
7766
7767         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7768         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7769
7770         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7771         trace_seq_printf(s, "read events: %ld\n", cnt);
7772
7773         count = simple_read_from_buffer(ubuf, count, ppos,
7774                                         s->buffer, trace_seq_used(s));
7775
7776         kfree(s);
7777
7778         return count;
7779 }
7780
7781 static const struct file_operations tracing_stats_fops = {
7782         .open           = tracing_open_generic_tr,
7783         .read           = tracing_stats_read,
7784         .llseek         = generic_file_llseek,
7785         .release        = tracing_release_generic_tr,
7786 };
7787
7788 #ifdef CONFIG_DYNAMIC_FTRACE
7789
7790 static ssize_t
7791 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7792                   size_t cnt, loff_t *ppos)
7793 {
7794         ssize_t ret;
7795         char *buf;
7796         int r;
7797
7798         /* 256 should be plenty to hold the amount needed */
7799         buf = kmalloc(256, GFP_KERNEL);
7800         if (!buf)
7801                 return -ENOMEM;
7802
7803         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7804                       ftrace_update_tot_cnt,
7805                       ftrace_number_of_pages,
7806                       ftrace_number_of_groups);
7807
7808         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7809         kfree(buf);
7810         return ret;
7811 }
7812
7813 static const struct file_operations tracing_dyn_info_fops = {
7814         .open           = tracing_open_generic,
7815         .read           = tracing_read_dyn_info,
7816         .llseek         = generic_file_llseek,
7817 };
7818 #endif /* CONFIG_DYNAMIC_FTRACE */
7819
7820 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7821 static void
7822 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7823                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7824                 void *data)
7825 {
7826         tracing_snapshot_instance(tr);
7827 }
7828
7829 static void
7830 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7831                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7832                       void *data)
7833 {
7834         struct ftrace_func_mapper *mapper = data;
7835         long *count = NULL;
7836
7837         if (mapper)
7838                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7839
7840         if (count) {
7841
7842                 if (*count <= 0)
7843                         return;
7844
7845                 (*count)--;
7846         }
7847
7848         tracing_snapshot_instance(tr);
7849 }
7850
7851 static int
7852 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7853                       struct ftrace_probe_ops *ops, void *data)
7854 {
7855         struct ftrace_func_mapper *mapper = data;
7856         long *count = NULL;
7857
7858         seq_printf(m, "%ps:", (void *)ip);
7859
7860         seq_puts(m, "snapshot");
7861
7862         if (mapper)
7863                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7864
7865         if (count)
7866                 seq_printf(m, ":count=%ld\n", *count);
7867         else
7868                 seq_puts(m, ":unlimited\n");
7869
7870         return 0;
7871 }
7872
7873 static int
7874 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7875                      unsigned long ip, void *init_data, void **data)
7876 {
7877         struct ftrace_func_mapper *mapper = *data;
7878
7879         if (!mapper) {
7880                 mapper = allocate_ftrace_func_mapper();
7881                 if (!mapper)
7882                         return -ENOMEM;
7883                 *data = mapper;
7884         }
7885
7886         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7887 }
7888
7889 static void
7890 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7891                      unsigned long ip, void *data)
7892 {
7893         struct ftrace_func_mapper *mapper = data;
7894
7895         if (!ip) {
7896                 if (!mapper)
7897                         return;
7898                 free_ftrace_func_mapper(mapper, NULL);
7899                 return;
7900         }
7901
7902         ftrace_func_mapper_remove_ip(mapper, ip);
7903 }
7904
7905 static struct ftrace_probe_ops snapshot_probe_ops = {
7906         .func                   = ftrace_snapshot,
7907         .print                  = ftrace_snapshot_print,
7908 };
7909
7910 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7911         .func                   = ftrace_count_snapshot,
7912         .print                  = ftrace_snapshot_print,
7913         .init                   = ftrace_snapshot_init,
7914         .free                   = ftrace_snapshot_free,
7915 };
7916
7917 static int
7918 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7919                                char *glob, char *cmd, char *param, int enable)
7920 {
7921         struct ftrace_probe_ops *ops;
7922         void *count = (void *)-1;
7923         char *number;
7924         int ret;
7925
7926         if (!tr)
7927                 return -ENODEV;
7928
7929         /* hash funcs only work with set_ftrace_filter */
7930         if (!enable)
7931                 return -EINVAL;
7932
7933         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7934
7935         if (glob[0] == '!')
7936                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7937
7938         if (!param)
7939                 goto out_reg;
7940
7941         number = strsep(&param, ":");
7942
7943         if (!strlen(number))
7944                 goto out_reg;
7945
7946         /*
7947          * We use the callback data field (which is a pointer)
7948          * as our counter.
7949          */
7950         ret = kstrtoul(number, 0, (unsigned long *)&count);
7951         if (ret)
7952                 return ret;
7953
7954  out_reg:
7955         ret = tracing_alloc_snapshot_instance(tr);
7956         if (ret < 0)
7957                 goto out;
7958
7959         ret = register_ftrace_function_probe(glob, tr, ops, count);
7960
7961  out:
7962         return ret < 0 ? ret : 0;
7963 }
7964
7965 static struct ftrace_func_command ftrace_snapshot_cmd = {
7966         .name                   = "snapshot",
7967         .func                   = ftrace_trace_snapshot_callback,
7968 };
7969
7970 static __init int register_snapshot_cmd(void)
7971 {
7972         return register_ftrace_command(&ftrace_snapshot_cmd);
7973 }
7974 #else
7975 static inline __init int register_snapshot_cmd(void) { return 0; }
7976 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7977
7978 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7979 {
7980         if (WARN_ON(!tr->dir))
7981                 return ERR_PTR(-ENODEV);
7982
7983         /* Top directory uses NULL as the parent */
7984         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7985                 return NULL;
7986
7987         /* All sub buffers have a descriptor */
7988         return tr->dir;
7989 }
7990
7991 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7992 {
7993         struct dentry *d_tracer;
7994
7995         if (tr->percpu_dir)
7996                 return tr->percpu_dir;
7997
7998         d_tracer = tracing_get_dentry(tr);
7999         if (IS_ERR(d_tracer))
8000                 return NULL;
8001
8002         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8003
8004         MEM_FAIL(!tr->percpu_dir,
8005                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8006
8007         return tr->percpu_dir;
8008 }
8009
8010 static struct dentry *
8011 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8012                       void *data, long cpu, const struct file_operations *fops)
8013 {
8014         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8015
8016         if (ret) /* See tracing_get_cpu() */
8017                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8018         return ret;
8019 }
8020
8021 static void
8022 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8023 {
8024         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8025         struct dentry *d_cpu;
8026         char cpu_dir[30]; /* 30 characters should be more than enough */
8027
8028         if (!d_percpu)
8029                 return;
8030
8031         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8032         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8033         if (!d_cpu) {
8034                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8035                 return;
8036         }
8037
8038         /* per cpu trace_pipe */
8039         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8040                                 tr, cpu, &tracing_pipe_fops);
8041
8042         /* per cpu trace */
8043         trace_create_cpu_file("trace", 0644, d_cpu,
8044                                 tr, cpu, &tracing_fops);
8045
8046         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8047                                 tr, cpu, &tracing_buffers_fops);
8048
8049         trace_create_cpu_file("stats", 0444, d_cpu,
8050                                 tr, cpu, &tracing_stats_fops);
8051
8052         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8053                                 tr, cpu, &tracing_entries_fops);
8054
8055 #ifdef CONFIG_TRACER_SNAPSHOT
8056         trace_create_cpu_file("snapshot", 0644, d_cpu,
8057                                 tr, cpu, &snapshot_fops);
8058
8059         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8060                                 tr, cpu, &snapshot_raw_fops);
8061 #endif
8062 }
8063
8064 #ifdef CONFIG_FTRACE_SELFTEST
8065 /* Let selftest have access to static functions in this file */
8066 #include "trace_selftest.c"
8067 #endif
8068
8069 static ssize_t
8070 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8071                         loff_t *ppos)
8072 {
8073         struct trace_option_dentry *topt = filp->private_data;
8074         char *buf;
8075
8076         if (topt->flags->val & topt->opt->bit)
8077                 buf = "1\n";
8078         else
8079                 buf = "0\n";
8080
8081         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8082 }
8083
8084 static ssize_t
8085 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8086                          loff_t *ppos)
8087 {
8088         struct trace_option_dentry *topt = filp->private_data;
8089         unsigned long val;
8090         int ret;
8091
8092         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8093         if (ret)
8094                 return ret;
8095
8096         if (val != 0 && val != 1)
8097                 return -EINVAL;
8098
8099         if (!!(topt->flags->val & topt->opt->bit) != val) {
8100                 mutex_lock(&trace_types_lock);
8101                 ret = __set_tracer_option(topt->tr, topt->flags,
8102                                           topt->opt, !val);
8103                 mutex_unlock(&trace_types_lock);
8104                 if (ret)
8105                         return ret;
8106         }
8107
8108         *ppos += cnt;
8109
8110         return cnt;
8111 }
8112
8113
8114 static const struct file_operations trace_options_fops = {
8115         .open = tracing_open_generic,
8116         .read = trace_options_read,
8117         .write = trace_options_write,
8118         .llseek = generic_file_llseek,
8119 };
8120
8121 /*
8122  * In order to pass in both the trace_array descriptor as well as the index
8123  * to the flag that the trace option file represents, the trace_array
8124  * has a character array of trace_flags_index[], which holds the index
8125  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8126  * The address of this character array is passed to the flag option file
8127  * read/write callbacks.
8128  *
8129  * In order to extract both the index and the trace_array descriptor,
8130  * get_tr_index() uses the following algorithm.
8131  *
8132  *   idx = *ptr;
8133  *
8134  * As the pointer itself contains the address of the index (remember
8135  * index[1] == 1).
8136  *
8137  * Then to get the trace_array descriptor, by subtracting that index
8138  * from the ptr, we get to the start of the index itself.
8139  *
8140  *   ptr - idx == &index[0]
8141  *
8142  * Then a simple container_of() from that pointer gets us to the
8143  * trace_array descriptor.
8144  */
8145 static void get_tr_index(void *data, struct trace_array **ptr,
8146                          unsigned int *pindex)
8147 {
8148         *pindex = *(unsigned char *)data;
8149
8150         *ptr = container_of(data - *pindex, struct trace_array,
8151                             trace_flags_index);
8152 }
8153
8154 static ssize_t
8155 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8156                         loff_t *ppos)
8157 {
8158         void *tr_index = filp->private_data;
8159         struct trace_array *tr;
8160         unsigned int index;
8161         char *buf;
8162
8163         get_tr_index(tr_index, &tr, &index);
8164
8165         if (tr->trace_flags & (1 << index))
8166                 buf = "1\n";
8167         else
8168                 buf = "0\n";
8169
8170         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8171 }
8172
8173 static ssize_t
8174 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8175                          loff_t *ppos)
8176 {
8177         void *tr_index = filp->private_data;
8178         struct trace_array *tr;
8179         unsigned int index;
8180         unsigned long val;
8181         int ret;
8182
8183         get_tr_index(tr_index, &tr, &index);
8184
8185         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8186         if (ret)
8187                 return ret;
8188
8189         if (val != 0 && val != 1)
8190                 return -EINVAL;
8191
8192         mutex_lock(&event_mutex);
8193         mutex_lock(&trace_types_lock);
8194         ret = set_tracer_flag(tr, 1 << index, val);
8195         mutex_unlock(&trace_types_lock);
8196         mutex_unlock(&event_mutex);
8197
8198         if (ret < 0)
8199                 return ret;
8200
8201         *ppos += cnt;
8202
8203         return cnt;
8204 }
8205
8206 static const struct file_operations trace_options_core_fops = {
8207         .open = tracing_open_generic,
8208         .read = trace_options_core_read,
8209         .write = trace_options_core_write,
8210         .llseek = generic_file_llseek,
8211 };
8212
8213 struct dentry *trace_create_file(const char *name,
8214                                  umode_t mode,
8215                                  struct dentry *parent,
8216                                  void *data,
8217                                  const struct file_operations *fops)
8218 {
8219         struct dentry *ret;
8220
8221         ret = tracefs_create_file(name, mode, parent, data, fops);
8222         if (!ret)
8223                 pr_warn("Could not create tracefs '%s' entry\n", name);
8224
8225         return ret;
8226 }
8227
8228
8229 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8230 {
8231         struct dentry *d_tracer;
8232
8233         if (tr->options)
8234                 return tr->options;
8235
8236         d_tracer = tracing_get_dentry(tr);
8237         if (IS_ERR(d_tracer))
8238                 return NULL;
8239
8240         tr->options = tracefs_create_dir("options", d_tracer);
8241         if (!tr->options) {
8242                 pr_warn("Could not create tracefs directory 'options'\n");
8243                 return NULL;
8244         }
8245
8246         return tr->options;
8247 }
8248
8249 static void
8250 create_trace_option_file(struct trace_array *tr,
8251                          struct trace_option_dentry *topt,
8252                          struct tracer_flags *flags,
8253                          struct tracer_opt *opt)
8254 {
8255         struct dentry *t_options;
8256
8257         t_options = trace_options_init_dentry(tr);
8258         if (!t_options)
8259                 return;
8260
8261         topt->flags = flags;
8262         topt->opt = opt;
8263         topt->tr = tr;
8264
8265         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8266                                     &trace_options_fops);
8267
8268 }
8269
8270 static void
8271 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8272 {
8273         struct trace_option_dentry *topts;
8274         struct trace_options *tr_topts;
8275         struct tracer_flags *flags;
8276         struct tracer_opt *opts;
8277         int cnt;
8278         int i;
8279
8280         if (!tracer)
8281                 return;
8282
8283         flags = tracer->flags;
8284
8285         if (!flags || !flags->opts)
8286                 return;
8287
8288         /*
8289          * If this is an instance, only create flags for tracers
8290          * the instance may have.
8291          */
8292         if (!trace_ok_for_array(tracer, tr))
8293                 return;
8294
8295         for (i = 0; i < tr->nr_topts; i++) {
8296                 /* Make sure there's no duplicate flags. */
8297                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8298                         return;
8299         }
8300
8301         opts = flags->opts;
8302
8303         for (cnt = 0; opts[cnt].name; cnt++)
8304                 ;
8305
8306         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8307         if (!topts)
8308                 return;
8309
8310         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8311                             GFP_KERNEL);
8312         if (!tr_topts) {
8313                 kfree(topts);
8314                 return;
8315         }
8316
8317         tr->topts = tr_topts;
8318         tr->topts[tr->nr_topts].tracer = tracer;
8319         tr->topts[tr->nr_topts].topts = topts;
8320         tr->nr_topts++;
8321
8322         for (cnt = 0; opts[cnt].name; cnt++) {
8323                 create_trace_option_file(tr, &topts[cnt], flags,
8324                                          &opts[cnt]);
8325                 MEM_FAIL(topts[cnt].entry == NULL,
8326                           "Failed to create trace option: %s",
8327                           opts[cnt].name);
8328         }
8329 }
8330
8331 static struct dentry *
8332 create_trace_option_core_file(struct trace_array *tr,
8333                               const char *option, long index)
8334 {
8335         struct dentry *t_options;
8336
8337         t_options = trace_options_init_dentry(tr);
8338         if (!t_options)
8339                 return NULL;
8340
8341         return trace_create_file(option, 0644, t_options,
8342                                  (void *)&tr->trace_flags_index[index],
8343                                  &trace_options_core_fops);
8344 }
8345
8346 static void create_trace_options_dir(struct trace_array *tr)
8347 {
8348         struct dentry *t_options;
8349         bool top_level = tr == &global_trace;
8350         int i;
8351
8352         t_options = trace_options_init_dentry(tr);
8353         if (!t_options)
8354                 return;
8355
8356         for (i = 0; trace_options[i]; i++) {
8357                 if (top_level ||
8358                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8359                         create_trace_option_core_file(tr, trace_options[i], i);
8360         }
8361 }
8362
8363 static ssize_t
8364 rb_simple_read(struct file *filp, char __user *ubuf,
8365                size_t cnt, loff_t *ppos)
8366 {
8367         struct trace_array *tr = filp->private_data;
8368         char buf[64];
8369         int r;
8370
8371         r = tracer_tracing_is_on(tr);
8372         r = sprintf(buf, "%d\n", r);
8373
8374         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8375 }
8376
8377 static ssize_t
8378 rb_simple_write(struct file *filp, const char __user *ubuf,
8379                 size_t cnt, loff_t *ppos)
8380 {
8381         struct trace_array *tr = filp->private_data;
8382         struct trace_buffer *buffer = tr->array_buffer.buffer;
8383         unsigned long val;
8384         int ret;
8385
8386         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8387         if (ret)
8388                 return ret;
8389
8390         if (buffer) {
8391                 mutex_lock(&trace_types_lock);
8392                 if (!!val == tracer_tracing_is_on(tr)) {
8393                         val = 0; /* do nothing */
8394                 } else if (val) {
8395                         tracer_tracing_on(tr);
8396                         if (tr->current_trace->start)
8397                                 tr->current_trace->start(tr);
8398                 } else {
8399                         tracer_tracing_off(tr);
8400                         if (tr->current_trace->stop)
8401                                 tr->current_trace->stop(tr);
8402                 }
8403                 mutex_unlock(&trace_types_lock);
8404         }
8405
8406         (*ppos)++;
8407
8408         return cnt;
8409 }
8410
8411 static const struct file_operations rb_simple_fops = {
8412         .open           = tracing_open_generic_tr,
8413         .read           = rb_simple_read,
8414         .write          = rb_simple_write,
8415         .release        = tracing_release_generic_tr,
8416         .llseek         = default_llseek,
8417 };
8418
8419 static ssize_t
8420 buffer_percent_read(struct file *filp, char __user *ubuf,
8421                     size_t cnt, loff_t *ppos)
8422 {
8423         struct trace_array *tr = filp->private_data;
8424         char buf[64];
8425         int r;
8426
8427         r = tr->buffer_percent;
8428         r = sprintf(buf, "%d\n", r);
8429
8430         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8431 }
8432
8433 static ssize_t
8434 buffer_percent_write(struct file *filp, const char __user *ubuf,
8435                      size_t cnt, loff_t *ppos)
8436 {
8437         struct trace_array *tr = filp->private_data;
8438         unsigned long val;
8439         int ret;
8440
8441         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8442         if (ret)
8443                 return ret;
8444
8445         if (val > 100)
8446                 return -EINVAL;
8447
8448         if (!val)
8449                 val = 1;
8450
8451         tr->buffer_percent = val;
8452
8453         (*ppos)++;
8454
8455         return cnt;
8456 }
8457
8458 static const struct file_operations buffer_percent_fops = {
8459         .open           = tracing_open_generic_tr,
8460         .read           = buffer_percent_read,
8461         .write          = buffer_percent_write,
8462         .release        = tracing_release_generic_tr,
8463         .llseek         = default_llseek,
8464 };
8465
8466 static struct dentry *trace_instance_dir;
8467
8468 static void
8469 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8470
8471 static int
8472 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8473 {
8474         enum ring_buffer_flags rb_flags;
8475
8476         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8477
8478         buf->tr = tr;
8479
8480         buf->buffer = ring_buffer_alloc(size, rb_flags);
8481         if (!buf->buffer)
8482                 return -ENOMEM;
8483
8484         buf->data = alloc_percpu(struct trace_array_cpu);
8485         if (!buf->data) {
8486                 ring_buffer_free(buf->buffer);
8487                 buf->buffer = NULL;
8488                 return -ENOMEM;
8489         }
8490
8491         /* Allocate the first page for all buffers */
8492         set_buffer_entries(&tr->array_buffer,
8493                            ring_buffer_size(tr->array_buffer.buffer, 0));
8494
8495         return 0;
8496 }
8497
8498 static int allocate_trace_buffers(struct trace_array *tr, int size)
8499 {
8500         int ret;
8501
8502         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8503         if (ret)
8504                 return ret;
8505
8506 #ifdef CONFIG_TRACER_MAX_TRACE
8507         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8508                                     allocate_snapshot ? size : 1);
8509         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8510                 ring_buffer_free(tr->array_buffer.buffer);
8511                 tr->array_buffer.buffer = NULL;
8512                 free_percpu(tr->array_buffer.data);
8513                 tr->array_buffer.data = NULL;
8514                 return -ENOMEM;
8515         }
8516         tr->allocated_snapshot = allocate_snapshot;
8517
8518         /*
8519          * Only the top level trace array gets its snapshot allocated
8520          * from the kernel command line.
8521          */
8522         allocate_snapshot = false;
8523 #endif
8524
8525         return 0;
8526 }
8527
8528 static void free_trace_buffer(struct array_buffer *buf)
8529 {
8530         if (buf->buffer) {
8531                 ring_buffer_free(buf->buffer);
8532                 buf->buffer = NULL;
8533                 free_percpu(buf->data);
8534                 buf->data = NULL;
8535         }
8536 }
8537
8538 static void free_trace_buffers(struct trace_array *tr)
8539 {
8540         if (!tr)
8541                 return;
8542
8543         free_trace_buffer(&tr->array_buffer);
8544
8545 #ifdef CONFIG_TRACER_MAX_TRACE
8546         free_trace_buffer(&tr->max_buffer);
8547 #endif
8548 }
8549
8550 static void init_trace_flags_index(struct trace_array *tr)
8551 {
8552         int i;
8553
8554         /* Used by the trace options files */
8555         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8556                 tr->trace_flags_index[i] = i;
8557 }
8558
8559 static void __update_tracer_options(struct trace_array *tr)
8560 {
8561         struct tracer *t;
8562
8563         for (t = trace_types; t; t = t->next)
8564                 add_tracer_options(tr, t);
8565 }
8566
8567 static void update_tracer_options(struct trace_array *tr)
8568 {
8569         mutex_lock(&trace_types_lock);
8570         __update_tracer_options(tr);
8571         mutex_unlock(&trace_types_lock);
8572 }
8573
8574 /* Must have trace_types_lock held */
8575 struct trace_array *trace_array_find(const char *instance)
8576 {
8577         struct trace_array *tr, *found = NULL;
8578
8579         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8580                 if (tr->name && strcmp(tr->name, instance) == 0) {
8581                         found = tr;
8582                         break;
8583                 }
8584         }
8585
8586         return found;
8587 }
8588
8589 struct trace_array *trace_array_find_get(const char *instance)
8590 {
8591         struct trace_array *tr;
8592
8593         mutex_lock(&trace_types_lock);
8594         tr = trace_array_find(instance);
8595         if (tr)
8596                 tr->ref++;
8597         mutex_unlock(&trace_types_lock);
8598
8599         return tr;
8600 }
8601
8602 static struct trace_array *trace_array_create(const char *name)
8603 {
8604         struct trace_array *tr;
8605         int ret;
8606
8607         ret = -ENOMEM;
8608         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8609         if (!tr)
8610                 return ERR_PTR(ret);
8611
8612         tr->name = kstrdup(name, GFP_KERNEL);
8613         if (!tr->name)
8614                 goto out_free_tr;
8615
8616         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8617                 goto out_free_tr;
8618
8619         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8620
8621         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8622
8623         raw_spin_lock_init(&tr->start_lock);
8624
8625         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8626
8627         tr->current_trace = &nop_trace;
8628
8629         INIT_LIST_HEAD(&tr->systems);
8630         INIT_LIST_HEAD(&tr->events);
8631         INIT_LIST_HEAD(&tr->hist_vars);
8632         INIT_LIST_HEAD(&tr->err_log);
8633
8634         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8635                 goto out_free_tr;
8636
8637         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8638         if (!tr->dir)
8639                 goto out_free_tr;
8640
8641         ret = event_trace_add_tracer(tr->dir, tr);
8642         if (ret) {
8643                 tracefs_remove(tr->dir);
8644                 goto out_free_tr;
8645         }
8646
8647         ftrace_init_trace_array(tr);
8648
8649         init_tracer_tracefs(tr, tr->dir);
8650         init_trace_flags_index(tr);
8651         __update_tracer_options(tr);
8652
8653         list_add(&tr->list, &ftrace_trace_arrays);
8654
8655         tr->ref++;
8656
8657
8658         return tr;
8659
8660  out_free_tr:
8661         free_trace_buffers(tr);
8662         free_cpumask_var(tr->tracing_cpumask);
8663         kfree(tr->name);
8664         kfree(tr);
8665
8666         return ERR_PTR(ret);
8667 }
8668
8669 static int instance_mkdir(const char *name)
8670 {
8671         struct trace_array *tr;
8672         int ret;
8673
8674         mutex_lock(&event_mutex);
8675         mutex_lock(&trace_types_lock);
8676
8677         ret = -EEXIST;
8678         if (trace_array_find(name))
8679                 goto out_unlock;
8680
8681         tr = trace_array_create(name);
8682
8683         ret = PTR_ERR_OR_ZERO(tr);
8684
8685 out_unlock:
8686         mutex_unlock(&trace_types_lock);
8687         mutex_unlock(&event_mutex);
8688         return ret;
8689 }
8690
8691 /**
8692  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8693  * @name: The name of the trace array to be looked up/created.
8694  *
8695  * Returns pointer to trace array with given name.
8696  * NULL, if it cannot be created.
8697  *
8698  * NOTE: This function increments the reference counter associated with the
8699  * trace array returned. This makes sure it cannot be freed while in use.
8700  * Use trace_array_put() once the trace array is no longer needed.
8701  * If the trace_array is to be freed, trace_array_destroy() needs to
8702  * be called after the trace_array_put(), or simply let user space delete
8703  * it from the tracefs instances directory. But until the
8704  * trace_array_put() is called, user space can not delete it.
8705  *
8706  */
8707 struct trace_array *trace_array_get_by_name(const char *name)
8708 {
8709         struct trace_array *tr;
8710
8711         mutex_lock(&event_mutex);
8712         mutex_lock(&trace_types_lock);
8713
8714         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8715                 if (tr->name && strcmp(tr->name, name) == 0)
8716                         goto out_unlock;
8717         }
8718
8719         tr = trace_array_create(name);
8720
8721         if (IS_ERR(tr))
8722                 tr = NULL;
8723 out_unlock:
8724         if (tr)
8725                 tr->ref++;
8726
8727         mutex_unlock(&trace_types_lock);
8728         mutex_unlock(&event_mutex);
8729         return tr;
8730 }
8731 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8732
8733 static int __remove_instance(struct trace_array *tr)
8734 {
8735         int i;
8736
8737         /* Reference counter for a newly created trace array = 1. */
8738         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8739                 return -EBUSY;
8740
8741         list_del(&tr->list);
8742
8743         /* Disable all the flags that were enabled coming in */
8744         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8745                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8746                         set_tracer_flag(tr, 1 << i, 0);
8747         }
8748
8749         tracing_set_nop(tr);
8750         clear_ftrace_function_probes(tr);
8751         event_trace_del_tracer(tr);
8752         ftrace_clear_pids(tr);
8753         ftrace_destroy_function_files(tr);
8754         tracefs_remove(tr->dir);
8755         free_trace_buffers(tr);
8756
8757         for (i = 0; i < tr->nr_topts; i++) {
8758                 kfree(tr->topts[i].topts);
8759         }
8760         kfree(tr->topts);
8761
8762         free_cpumask_var(tr->tracing_cpumask);
8763         kfree(tr->name);
8764         kfree(tr);
8765         tr = NULL;
8766
8767         return 0;
8768 }
8769
8770 int trace_array_destroy(struct trace_array *this_tr)
8771 {
8772         struct trace_array *tr;
8773         int ret;
8774
8775         if (!this_tr)
8776                 return -EINVAL;
8777
8778         mutex_lock(&event_mutex);
8779         mutex_lock(&trace_types_lock);
8780
8781         ret = -ENODEV;
8782
8783         /* Making sure trace array exists before destroying it. */
8784         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8785                 if (tr == this_tr) {
8786                         ret = __remove_instance(tr);
8787                         break;
8788                 }
8789         }
8790
8791         mutex_unlock(&trace_types_lock);
8792         mutex_unlock(&event_mutex);
8793
8794         return ret;
8795 }
8796 EXPORT_SYMBOL_GPL(trace_array_destroy);
8797
8798 static int instance_rmdir(const char *name)
8799 {
8800         struct trace_array *tr;
8801         int ret;
8802
8803         mutex_lock(&event_mutex);
8804         mutex_lock(&trace_types_lock);
8805
8806         ret = -ENODEV;
8807         tr = trace_array_find(name);
8808         if (tr)
8809                 ret = __remove_instance(tr);
8810
8811         mutex_unlock(&trace_types_lock);
8812         mutex_unlock(&event_mutex);
8813
8814         return ret;
8815 }
8816
8817 static __init void create_trace_instances(struct dentry *d_tracer)
8818 {
8819         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8820                                                          instance_mkdir,
8821                                                          instance_rmdir);
8822         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8823                 return;
8824 }
8825
8826 static void
8827 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8828 {
8829         struct trace_event_file *file;
8830         int cpu;
8831
8832         trace_create_file("available_tracers", 0444, d_tracer,
8833                         tr, &show_traces_fops);
8834
8835         trace_create_file("current_tracer", 0644, d_tracer,
8836                         tr, &set_tracer_fops);
8837
8838         trace_create_file("tracing_cpumask", 0644, d_tracer,
8839                           tr, &tracing_cpumask_fops);
8840
8841         trace_create_file("trace_options", 0644, d_tracer,
8842                           tr, &tracing_iter_fops);
8843
8844         trace_create_file("trace", 0644, d_tracer,
8845                           tr, &tracing_fops);
8846
8847         trace_create_file("trace_pipe", 0444, d_tracer,
8848                           tr, &tracing_pipe_fops);
8849
8850         trace_create_file("buffer_size_kb", 0644, d_tracer,
8851                           tr, &tracing_entries_fops);
8852
8853         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8854                           tr, &tracing_total_entries_fops);
8855
8856         trace_create_file("free_buffer", 0200, d_tracer,
8857                           tr, &tracing_free_buffer_fops);
8858
8859         trace_create_file("trace_marker", 0220, d_tracer,
8860                           tr, &tracing_mark_fops);
8861
8862         file = __find_event_file(tr, "ftrace", "print");
8863         if (file && file->dir)
8864                 trace_create_file("trigger", 0644, file->dir, file,
8865                                   &event_trigger_fops);
8866         tr->trace_marker_file = file;
8867
8868         trace_create_file("trace_marker_raw", 0220, d_tracer,
8869                           tr, &tracing_mark_raw_fops);
8870
8871         trace_create_file("trace_clock", 0644, d_tracer, tr,
8872                           &trace_clock_fops);
8873
8874         trace_create_file("tracing_on", 0644, d_tracer,
8875                           tr, &rb_simple_fops);
8876
8877         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8878                           &trace_time_stamp_mode_fops);
8879
8880         tr->buffer_percent = 50;
8881
8882         trace_create_file("buffer_percent", 0444, d_tracer,
8883                         tr, &buffer_percent_fops);
8884
8885         create_trace_options_dir(tr);
8886
8887 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8888         trace_create_maxlat_file(tr, d_tracer);
8889 #endif
8890
8891         if (ftrace_create_function_files(tr, d_tracer))
8892                 MEM_FAIL(1, "Could not allocate function filter files");
8893
8894 #ifdef CONFIG_TRACER_SNAPSHOT
8895         trace_create_file("snapshot", 0644, d_tracer,
8896                           tr, &snapshot_fops);
8897 #endif
8898
8899         trace_create_file("error_log", 0644, d_tracer,
8900                           tr, &tracing_err_log_fops);
8901
8902         for_each_tracing_cpu(cpu)
8903                 tracing_init_tracefs_percpu(tr, cpu);
8904
8905         ftrace_init_tracefs(tr, d_tracer);
8906 }
8907
8908 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8909 {
8910         struct vfsmount *mnt;
8911         struct file_system_type *type;
8912
8913         /*
8914          * To maintain backward compatibility for tools that mount
8915          * debugfs to get to the tracing facility, tracefs is automatically
8916          * mounted to the debugfs/tracing directory.
8917          */
8918         type = get_fs_type("tracefs");
8919         if (!type)
8920                 return NULL;
8921         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8922         put_filesystem(type);
8923         if (IS_ERR(mnt))
8924                 return NULL;
8925         mntget(mnt);
8926
8927         return mnt;
8928 }
8929
8930 /**
8931  * tracing_init_dentry - initialize top level trace array
8932  *
8933  * This is called when creating files or directories in the tracing
8934  * directory. It is called via fs_initcall() by any of the boot up code
8935  * and expects to return the dentry of the top level tracing directory.
8936  */
8937 struct dentry *tracing_init_dentry(void)
8938 {
8939         struct trace_array *tr = &global_trace;
8940
8941         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8942                 pr_warn("Tracing disabled due to lockdown\n");
8943                 return ERR_PTR(-EPERM);
8944         }
8945
8946         /* The top level trace array uses  NULL as parent */
8947         if (tr->dir)
8948                 return NULL;
8949
8950         if (WARN_ON(!tracefs_initialized()) ||
8951                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8952                  WARN_ON(!debugfs_initialized())))
8953                 return ERR_PTR(-ENODEV);
8954
8955         /*
8956          * As there may still be users that expect the tracing
8957          * files to exist in debugfs/tracing, we must automount
8958          * the tracefs file system there, so older tools still
8959          * work with the newer kerenl.
8960          */
8961         tr->dir = debugfs_create_automount("tracing", NULL,
8962                                            trace_automount, NULL);
8963
8964         return NULL;
8965 }
8966
8967 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8968 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8969
8970 static void __init trace_eval_init(void)
8971 {
8972         int len;
8973
8974         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8975         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8976 }
8977
8978 #ifdef CONFIG_MODULES
8979 static void trace_module_add_evals(struct module *mod)
8980 {
8981         if (!mod->num_trace_evals)
8982                 return;
8983
8984         /*
8985          * Modules with bad taint do not have events created, do
8986          * not bother with enums either.
8987          */
8988         if (trace_module_has_bad_taint(mod))
8989                 return;
8990
8991         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8992 }
8993
8994 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8995 static void trace_module_remove_evals(struct module *mod)
8996 {
8997         union trace_eval_map_item *map;
8998         union trace_eval_map_item **last = &trace_eval_maps;
8999
9000         if (!mod->num_trace_evals)
9001                 return;
9002
9003         mutex_lock(&trace_eval_mutex);
9004
9005         map = trace_eval_maps;
9006
9007         while (map) {
9008                 if (map->head.mod == mod)
9009                         break;
9010                 map = trace_eval_jmp_to_tail(map);
9011                 last = &map->tail.next;
9012                 map = map->tail.next;
9013         }
9014         if (!map)
9015                 goto out;
9016
9017         *last = trace_eval_jmp_to_tail(map)->tail.next;
9018         kfree(map);
9019  out:
9020         mutex_unlock(&trace_eval_mutex);
9021 }
9022 #else
9023 static inline void trace_module_remove_evals(struct module *mod) { }
9024 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9025
9026 static int trace_module_notify(struct notifier_block *self,
9027                                unsigned long val, void *data)
9028 {
9029         struct module *mod = data;
9030
9031         switch (val) {
9032         case MODULE_STATE_COMING:
9033                 trace_module_add_evals(mod);
9034                 break;
9035         case MODULE_STATE_GOING:
9036                 trace_module_remove_evals(mod);
9037                 break;
9038         }
9039
9040         return 0;
9041 }
9042
9043 static struct notifier_block trace_module_nb = {
9044         .notifier_call = trace_module_notify,
9045         .priority = 0,
9046 };
9047 #endif /* CONFIG_MODULES */
9048
9049 static __init int tracer_init_tracefs(void)
9050 {
9051         struct dentry *d_tracer;
9052
9053         trace_access_lock_init();
9054
9055         d_tracer = tracing_init_dentry();
9056         if (IS_ERR(d_tracer))
9057                 return 0;
9058
9059         event_trace_init();
9060
9061         init_tracer_tracefs(&global_trace, d_tracer);
9062         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9063
9064         trace_create_file("tracing_thresh", 0644, d_tracer,
9065                         &global_trace, &tracing_thresh_fops);
9066
9067         trace_create_file("README", 0444, d_tracer,
9068                         NULL, &tracing_readme_fops);
9069
9070         trace_create_file("saved_cmdlines", 0444, d_tracer,
9071                         NULL, &tracing_saved_cmdlines_fops);
9072
9073         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9074                           NULL, &tracing_saved_cmdlines_size_fops);
9075
9076         trace_create_file("saved_tgids", 0444, d_tracer,
9077                         NULL, &tracing_saved_tgids_fops);
9078
9079         trace_eval_init();
9080
9081         trace_create_eval_file(d_tracer);
9082
9083 #ifdef CONFIG_MODULES
9084         register_module_notifier(&trace_module_nb);
9085 #endif
9086
9087 #ifdef CONFIG_DYNAMIC_FTRACE
9088         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9089                         NULL, &tracing_dyn_info_fops);
9090 #endif
9091
9092         create_trace_instances(d_tracer);
9093
9094         update_tracer_options(&global_trace);
9095
9096         return 0;
9097 }
9098
9099 static int trace_panic_handler(struct notifier_block *this,
9100                                unsigned long event, void *unused)
9101 {
9102         if (ftrace_dump_on_oops)
9103                 ftrace_dump(ftrace_dump_on_oops);
9104         return NOTIFY_OK;
9105 }
9106
9107 static struct notifier_block trace_panic_notifier = {
9108         .notifier_call  = trace_panic_handler,
9109         .next           = NULL,
9110         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9111 };
9112
9113 static int trace_die_handler(struct notifier_block *self,
9114                              unsigned long val,
9115                              void *data)
9116 {
9117         switch (val) {
9118         case DIE_OOPS:
9119                 if (ftrace_dump_on_oops)
9120                         ftrace_dump(ftrace_dump_on_oops);
9121                 break;
9122         default:
9123                 break;
9124         }
9125         return NOTIFY_OK;
9126 }
9127
9128 static struct notifier_block trace_die_notifier = {
9129         .notifier_call = trace_die_handler,
9130         .priority = 200
9131 };
9132
9133 /*
9134  * printk is set to max of 1024, we really don't need it that big.
9135  * Nothing should be printing 1000 characters anyway.
9136  */
9137 #define TRACE_MAX_PRINT         1000
9138
9139 /*
9140  * Define here KERN_TRACE so that we have one place to modify
9141  * it if we decide to change what log level the ftrace dump
9142  * should be at.
9143  */
9144 #define KERN_TRACE              KERN_EMERG
9145
9146 void
9147 trace_printk_seq(struct trace_seq *s)
9148 {
9149         /* Probably should print a warning here. */
9150         if (s->seq.len >= TRACE_MAX_PRINT)
9151                 s->seq.len = TRACE_MAX_PRINT;
9152
9153         /*
9154          * More paranoid code. Although the buffer size is set to
9155          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9156          * an extra layer of protection.
9157          */
9158         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9159                 s->seq.len = s->seq.size - 1;
9160
9161         /* should be zero ended, but we are paranoid. */
9162         s->buffer[s->seq.len] = 0;
9163
9164         printk(KERN_TRACE "%s", s->buffer);
9165
9166         trace_seq_init(s);
9167 }
9168
9169 void trace_init_global_iter(struct trace_iterator *iter)
9170 {
9171         iter->tr = &global_trace;
9172         iter->trace = iter->tr->current_trace;
9173         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9174         iter->array_buffer = &global_trace.array_buffer;
9175
9176         if (iter->trace && iter->trace->open)
9177                 iter->trace->open(iter);
9178
9179         /* Annotate start of buffers if we had overruns */
9180         if (ring_buffer_overruns(iter->array_buffer->buffer))
9181                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9182
9183         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9184         if (trace_clocks[iter->tr->clock_id].in_ns)
9185                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9186 }
9187
9188 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9189 {
9190         /* use static because iter can be a bit big for the stack */
9191         static struct trace_iterator iter;
9192         static atomic_t dump_running;
9193         struct trace_array *tr = &global_trace;
9194         unsigned int old_userobj;
9195         unsigned long flags;
9196         int cnt = 0, cpu;
9197
9198         /* Only allow one dump user at a time. */
9199         if (atomic_inc_return(&dump_running) != 1) {
9200                 atomic_dec(&dump_running);
9201                 return;
9202         }
9203
9204         /*
9205          * Always turn off tracing when we dump.
9206          * We don't need to show trace output of what happens
9207          * between multiple crashes.
9208          *
9209          * If the user does a sysrq-z, then they can re-enable
9210          * tracing with echo 1 > tracing_on.
9211          */
9212         tracing_off();
9213
9214         local_irq_save(flags);
9215         printk_nmi_direct_enter();
9216
9217         /* Simulate the iterator */
9218         trace_init_global_iter(&iter);
9219         /* Can not use kmalloc for iter.temp */
9220         iter.temp = static_temp_buf;
9221         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9222
9223         for_each_tracing_cpu(cpu) {
9224                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9225         }
9226
9227         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9228
9229         /* don't look at user memory in panic mode */
9230         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9231
9232         switch (oops_dump_mode) {
9233         case DUMP_ALL:
9234                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9235                 break;
9236         case DUMP_ORIG:
9237                 iter.cpu_file = raw_smp_processor_id();
9238                 break;
9239         case DUMP_NONE:
9240                 goto out_enable;
9241         default:
9242                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9243                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9244         }
9245
9246         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9247
9248         /* Did function tracer already get disabled? */
9249         if (ftrace_is_dead()) {
9250                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9251                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9252         }
9253
9254         /*
9255          * We need to stop all tracing on all CPUS to read the
9256          * the next buffer. This is a bit expensive, but is
9257          * not done often. We fill all what we can read,
9258          * and then release the locks again.
9259          */
9260
9261         while (!trace_empty(&iter)) {
9262
9263                 if (!cnt)
9264                         printk(KERN_TRACE "---------------------------------\n");
9265
9266                 cnt++;
9267
9268                 trace_iterator_reset(&iter);
9269                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9270
9271                 if (trace_find_next_entry_inc(&iter) != NULL) {
9272                         int ret;
9273
9274                         ret = print_trace_line(&iter);
9275                         if (ret != TRACE_TYPE_NO_CONSUME)
9276                                 trace_consume(&iter);
9277                 }
9278                 touch_nmi_watchdog();
9279
9280                 trace_printk_seq(&iter.seq);
9281         }
9282
9283         if (!cnt)
9284                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9285         else
9286                 printk(KERN_TRACE "---------------------------------\n");
9287
9288  out_enable:
9289         tr->trace_flags |= old_userobj;
9290
9291         for_each_tracing_cpu(cpu) {
9292                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9293         }
9294         atomic_dec(&dump_running);
9295         printk_nmi_direct_exit();
9296         local_irq_restore(flags);
9297 }
9298 EXPORT_SYMBOL_GPL(ftrace_dump);
9299
9300 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9301 {
9302         char **argv;
9303         int argc, ret;
9304
9305         argc = 0;
9306         ret = 0;
9307         argv = argv_split(GFP_KERNEL, buf, &argc);
9308         if (!argv)
9309                 return -ENOMEM;
9310
9311         if (argc)
9312                 ret = createfn(argc, argv);
9313
9314         argv_free(argv);
9315
9316         return ret;
9317 }
9318
9319 #define WRITE_BUFSIZE  4096
9320
9321 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9322                                 size_t count, loff_t *ppos,
9323                                 int (*createfn)(int, char **))
9324 {
9325         char *kbuf, *buf, *tmp;
9326         int ret = 0;
9327         size_t done = 0;
9328         size_t size;
9329
9330         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9331         if (!kbuf)
9332                 return -ENOMEM;
9333
9334         while (done < count) {
9335                 size = count - done;
9336
9337                 if (size >= WRITE_BUFSIZE)
9338                         size = WRITE_BUFSIZE - 1;
9339
9340                 if (copy_from_user(kbuf, buffer + done, size)) {
9341                         ret = -EFAULT;
9342                         goto out;
9343                 }
9344                 kbuf[size] = '\0';
9345                 buf = kbuf;
9346                 do {
9347                         tmp = strchr(buf, '\n');
9348                         if (tmp) {
9349                                 *tmp = '\0';
9350                                 size = tmp - buf + 1;
9351                         } else {
9352                                 size = strlen(buf);
9353                                 if (done + size < count) {
9354                                         if (buf != kbuf)
9355                                                 break;
9356                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9357                                         pr_warn("Line length is too long: Should be less than %d\n",
9358                                                 WRITE_BUFSIZE - 2);
9359                                         ret = -EINVAL;
9360                                         goto out;
9361                                 }
9362                         }
9363                         done += size;
9364
9365                         /* Remove comments */
9366                         tmp = strchr(buf, '#');
9367
9368                         if (tmp)
9369                                 *tmp = '\0';
9370
9371                         ret = trace_run_command(buf, createfn);
9372                         if (ret)
9373                                 goto out;
9374                         buf += size;
9375
9376                 } while (done < count);
9377         }
9378         ret = done;
9379
9380 out:
9381         kfree(kbuf);
9382
9383         return ret;
9384 }
9385
9386 __init static int tracer_alloc_buffers(void)
9387 {
9388         int ring_buf_size;
9389         int ret = -ENOMEM;
9390
9391
9392         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9393                 pr_warn("Tracing disabled due to lockdown\n");
9394                 return -EPERM;
9395         }
9396
9397         /*
9398          * Make sure we don't accidently add more trace options
9399          * than we have bits for.
9400          */
9401         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9402
9403         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9404                 goto out;
9405
9406         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9407                 goto out_free_buffer_mask;
9408
9409         /* Only allocate trace_printk buffers if a trace_printk exists */
9410         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9411                 /* Must be called before global_trace.buffer is allocated */
9412                 trace_printk_init_buffers();
9413
9414         /* To save memory, keep the ring buffer size to its minimum */
9415         if (ring_buffer_expanded)
9416                 ring_buf_size = trace_buf_size;
9417         else
9418                 ring_buf_size = 1;
9419
9420         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9421         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9422
9423         raw_spin_lock_init(&global_trace.start_lock);
9424
9425         /*
9426          * The prepare callbacks allocates some memory for the ring buffer. We
9427          * don't free the buffer if the if the CPU goes down. If we were to free
9428          * the buffer, then the user would lose any trace that was in the
9429          * buffer. The memory will be removed once the "instance" is removed.
9430          */
9431         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9432                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9433                                       NULL);
9434         if (ret < 0)
9435                 goto out_free_cpumask;
9436         /* Used for event triggers */
9437         ret = -ENOMEM;
9438         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9439         if (!temp_buffer)
9440                 goto out_rm_hp_state;
9441
9442         if (trace_create_savedcmd() < 0)
9443                 goto out_free_temp_buffer;
9444
9445         /* TODO: make the number of buffers hot pluggable with CPUS */
9446         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9447                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9448                 goto out_free_savedcmd;
9449         }
9450
9451         if (global_trace.buffer_disabled)
9452                 tracing_off();
9453
9454         if (trace_boot_clock) {
9455                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9456                 if (ret < 0)
9457                         pr_warn("Trace clock %s not defined, going back to default\n",
9458                                 trace_boot_clock);
9459         }
9460
9461         /*
9462          * register_tracer() might reference current_trace, so it
9463          * needs to be set before we register anything. This is
9464          * just a bootstrap of current_trace anyway.
9465          */
9466         global_trace.current_trace = &nop_trace;
9467
9468         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9469
9470         ftrace_init_global_array_ops(&global_trace);
9471
9472         init_trace_flags_index(&global_trace);
9473
9474         register_tracer(&nop_trace);
9475
9476         /* Function tracing may start here (via kernel command line) */
9477         init_function_trace();
9478
9479         /* All seems OK, enable tracing */
9480         tracing_disabled = 0;
9481
9482         atomic_notifier_chain_register(&panic_notifier_list,
9483                                        &trace_panic_notifier);
9484
9485         register_die_notifier(&trace_die_notifier);
9486
9487         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9488
9489         INIT_LIST_HEAD(&global_trace.systems);
9490         INIT_LIST_HEAD(&global_trace.events);
9491         INIT_LIST_HEAD(&global_trace.hist_vars);
9492         INIT_LIST_HEAD(&global_trace.err_log);
9493         list_add(&global_trace.list, &ftrace_trace_arrays);
9494
9495         apply_trace_boot_options();
9496
9497         register_snapshot_cmd();
9498
9499         return 0;
9500
9501 out_free_savedcmd:
9502         free_saved_cmdlines_buffer(savedcmd);
9503 out_free_temp_buffer:
9504         ring_buffer_free(temp_buffer);
9505 out_rm_hp_state:
9506         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9507 out_free_cpumask:
9508         free_cpumask_var(global_trace.tracing_cpumask);
9509 out_free_buffer_mask:
9510         free_cpumask_var(tracing_buffer_mask);
9511 out:
9512         return ret;
9513 }
9514
9515 void __init early_trace_init(void)
9516 {
9517         if (tracepoint_printk) {
9518                 tracepoint_print_iter =
9519                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9520                 if (MEM_FAIL(!tracepoint_print_iter,
9521                              "Failed to allocate trace iterator\n"))
9522                         tracepoint_printk = 0;
9523                 else
9524                         static_key_enable(&tracepoint_printk_key.key);
9525         }
9526         tracer_alloc_buffers();
9527 }
9528
9529 void __init trace_init(void)
9530 {
9531         trace_event_init();
9532 }
9533
9534 __init static int clear_boot_tracer(void)
9535 {
9536         /*
9537          * The default tracer at boot buffer is an init section.
9538          * This function is called in lateinit. If we did not
9539          * find the boot tracer, then clear it out, to prevent
9540          * later registration from accessing the buffer that is
9541          * about to be freed.
9542          */
9543         if (!default_bootup_tracer)
9544                 return 0;
9545
9546         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9547                default_bootup_tracer);
9548         default_bootup_tracer = NULL;
9549
9550         return 0;
9551 }
9552
9553 fs_initcall(tracer_init_tracefs);
9554 late_initcall_sync(clear_boot_tracer);
9555
9556 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9557 __init static int tracing_set_default_clock(void)
9558 {
9559         /* sched_clock_stable() is determined in late_initcall */
9560         if (!trace_boot_clock && !sched_clock_stable()) {
9561                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9562                         pr_warn("Can not set tracing clock due to lockdown\n");
9563                         return -EPERM;
9564                 }
9565
9566                 printk(KERN_WARNING
9567                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9568                        "If you want to keep using the local clock, then add:\n"
9569                        "  \"trace_clock=local\"\n"
9570                        "on the kernel command line\n");
9571                 tracing_set_clock(&global_trace, "global");
9572         }
9573
9574         return 0;
9575 }
9576 late_initcall_sync(tracing_set_default_clock);
9577 #endif