Merge tag 'for-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning) {
1303                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304                         "Disabling tracing due to warning\n");
1305                 tracing_off();
1306         }
1307 }
1308
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317         if (tr->array_buffer.buffer)
1318                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319         return !tr->buffer_disabled;
1320 }
1321
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327         return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330
1331 static int __init set_buf_size(char *str)
1332 {
1333         unsigned long buf_size;
1334
1335         if (!str)
1336                 return 0;
1337         buf_size = memparse(str, &str);
1338         /* nr_entries can not be zero */
1339         if (buf_size == 0)
1340                 return 0;
1341         trace_buf_size = buf_size;
1342         return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348         unsigned long threshold;
1349         int ret;
1350
1351         if (!str)
1352                 return 0;
1353         ret = kstrtoul(str, 0, &threshold);
1354         if (ret < 0)
1355                 return 0;
1356         tracing_thresh = threshold * 1000;
1357         return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363         return nsecs / 1000;
1364 }
1365
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377         TRACE_FLAGS
1378         NULL
1379 };
1380
1381 static struct {
1382         u64 (*func)(void);
1383         const char *name;
1384         int in_ns;              /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386         { trace_clock_local,            "local",        1 },
1387         { trace_clock_global,           "global",       1 },
1388         { trace_clock_counter,          "counter",      0 },
1389         { trace_clock_jiffies,          "uptime",       0 },
1390         { trace_clock,                  "perf",         1 },
1391         { ktime_get_mono_fast_ns,       "mono",         1 },
1392         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1393         { ktime_get_boot_fast_ns,       "boot",         1 },
1394         ARCH_TRACE_CLOCKS
1395 };
1396
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399         if (trace_clocks[tr->clock_id].in_ns)
1400                 return true;
1401
1402         return false;
1403 }
1404
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410         memset(parser, 0, sizeof(*parser));
1411
1412         parser->buffer = kmalloc(size, GFP_KERNEL);
1413         if (!parser->buffer)
1414                 return 1;
1415
1416         parser->size = size;
1417         return 0;
1418 }
1419
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425         kfree(parser->buffer);
1426         parser->buffer = NULL;
1427 }
1428
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441         size_t cnt, loff_t *ppos)
1442 {
1443         char ch;
1444         size_t read = 0;
1445         ssize_t ret;
1446
1447         if (!*ppos)
1448                 trace_parser_clear(parser);
1449
1450         ret = get_user(ch, ubuf++);
1451         if (ret)
1452                 goto out;
1453
1454         read++;
1455         cnt--;
1456
1457         /*
1458          * The parser is not finished with the last write,
1459          * continue reading the user input without skipping spaces.
1460          */
1461         if (!parser->cont) {
1462                 /* skip white space */
1463                 while (cnt && isspace(ch)) {
1464                         ret = get_user(ch, ubuf++);
1465                         if (ret)
1466                                 goto out;
1467                         read++;
1468                         cnt--;
1469                 }
1470
1471                 parser->idx = 0;
1472
1473                 /* only spaces were written */
1474                 if (isspace(ch) || !ch) {
1475                         *ppos += read;
1476                         ret = read;
1477                         goto out;
1478                 }
1479         }
1480
1481         /* read the non-space input */
1482         while (cnt && !isspace(ch) && ch) {
1483                 if (parser->idx < parser->size - 1)
1484                         parser->buffer[parser->idx++] = ch;
1485                 else {
1486                         ret = -EINVAL;
1487                         goto out;
1488                 }
1489                 ret = get_user(ch, ubuf++);
1490                 if (ret)
1491                         goto out;
1492                 read++;
1493                 cnt--;
1494         }
1495
1496         /* We either got finished input or we have to wait for another call. */
1497         if (isspace(ch) || !ch) {
1498                 parser->buffer[parser->idx] = 0;
1499                 parser->cont = false;
1500         } else if (parser->idx < parser->size - 1) {
1501                 parser->cont = true;
1502                 parser->buffer[parser->idx++] = ch;
1503                 /* Make sure the parsed string always terminates with '\0'. */
1504                 parser->buffer[parser->idx] = 0;
1505         } else {
1506                 ret = -EINVAL;
1507                 goto out;
1508         }
1509
1510         *ppos += read;
1511         ret = read;
1512
1513 out:
1514         return ret;
1515 }
1516
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520         int len;
1521
1522         if (trace_seq_used(s) <= s->seq.readpos)
1523                 return -EBUSY;
1524
1525         len = trace_seq_used(s) - s->seq.readpos;
1526         if (cnt > len)
1527                 cnt = len;
1528         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529
1530         s->seq.readpos += cnt;
1531         return cnt;
1532 }
1533
1534 unsigned long __read_mostly     tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538         defined(CONFIG_FSNOTIFY)
1539
1540 static struct workqueue_struct *fsnotify_wq;
1541
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544         struct trace_array *tr = container_of(work, struct trace_array,
1545                                               fsnotify_work);
1546         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1547 }
1548
1549 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1550 {
1551         struct trace_array *tr = container_of(iwork, struct trace_array,
1552                                               fsnotify_irqwork);
1553         queue_work(fsnotify_wq, &tr->fsnotify_work);
1554 }
1555
1556 static void trace_create_maxlat_file(struct trace_array *tr,
1557                                      struct dentry *d_tracer)
1558 {
1559         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1560         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1561         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1562                                               d_tracer, &tr->max_latency,
1563                                               &tracing_max_lat_fops);
1564 }
1565
1566 __init static int latency_fsnotify_init(void)
1567 {
1568         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1569                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1570         if (!fsnotify_wq) {
1571                 pr_err("Unable to allocate tr_max_lat_wq\n");
1572                 return -ENOMEM;
1573         }
1574         return 0;
1575 }
1576
1577 late_initcall_sync(latency_fsnotify_init);
1578
1579 void latency_fsnotify(struct trace_array *tr)
1580 {
1581         if (!fsnotify_wq)
1582                 return;
1583         /*
1584          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1585          * possible that we are called from __schedule() or do_idle(), which
1586          * could cause a deadlock.
1587          */
1588         irq_work_queue(&tr->fsnotify_irqwork);
1589 }
1590
1591 /*
1592  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1593  *  defined(CONFIG_FSNOTIFY)
1594  */
1595 #else
1596
1597 #define trace_create_maxlat_file(tr, d_tracer)                          \
1598         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1599                           &tr->max_latency, &tracing_max_lat_fops)
1600
1601 #endif
1602
1603 #ifdef CONFIG_TRACER_MAX_TRACE
1604 /*
1605  * Copy the new maximum trace into the separate maximum-trace
1606  * structure. (this way the maximum trace is permanently saved,
1607  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1608  */
1609 static void
1610 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1611 {
1612         struct array_buffer *trace_buf = &tr->array_buffer;
1613         struct array_buffer *max_buf = &tr->max_buffer;
1614         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1616
1617         max_buf->cpu = cpu;
1618         max_buf->time_start = data->preempt_timestamp;
1619
1620         max_data->saved_latency = tr->max_latency;
1621         max_data->critical_start = data->critical_start;
1622         max_data->critical_end = data->critical_end;
1623
1624         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1625         max_data->pid = tsk->pid;
1626         /*
1627          * If tsk == current, then use current_uid(), as that does not use
1628          * RCU. The irq tracer can be called out of RCU scope.
1629          */
1630         if (tsk == current)
1631                 max_data->uid = current_uid();
1632         else
1633                 max_data->uid = task_uid(tsk);
1634
1635         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1636         max_data->policy = tsk->policy;
1637         max_data->rt_priority = tsk->rt_priority;
1638
1639         /* record this tasks comm */
1640         tracing_record_cmdline(tsk);
1641         latency_fsnotify(tr);
1642 }
1643
1644 /**
1645  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1646  * @tr: tracer
1647  * @tsk: the task with the latency
1648  * @cpu: The cpu that initiated the trace.
1649  * @cond_data: User data associated with a conditional snapshot
1650  *
1651  * Flip the buffers between the @tr and the max_tr and record information
1652  * about which task was the cause of this latency.
1653  */
1654 void
1655 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1656               void *cond_data)
1657 {
1658         if (tr->stop_count)
1659                 return;
1660
1661         WARN_ON_ONCE(!irqs_disabled());
1662
1663         if (!tr->allocated_snapshot) {
1664                 /* Only the nop tracer should hit this when disabling */
1665                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1666                 return;
1667         }
1668
1669         arch_spin_lock(&tr->max_lock);
1670
1671         /* Inherit the recordable setting from array_buffer */
1672         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1673                 ring_buffer_record_on(tr->max_buffer.buffer);
1674         else
1675                 ring_buffer_record_off(tr->max_buffer.buffer);
1676
1677 #ifdef CONFIG_TRACER_SNAPSHOT
1678         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1679                 goto out_unlock;
1680 #endif
1681         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1682
1683         __update_max_tr(tr, tsk, cpu);
1684
1685  out_unlock:
1686         arch_spin_unlock(&tr->max_lock);
1687 }
1688
1689 /**
1690  * update_max_tr_single - only copy one trace over, and reset the rest
1691  * @tr: tracer
1692  * @tsk: task with the latency
1693  * @cpu: the cpu of the buffer to copy.
1694  *
1695  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1696  */
1697 void
1698 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1699 {
1700         int ret;
1701
1702         if (tr->stop_count)
1703                 return;
1704
1705         WARN_ON_ONCE(!irqs_disabled());
1706         if (!tr->allocated_snapshot) {
1707                 /* Only the nop tracer should hit this when disabling */
1708                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1709                 return;
1710         }
1711
1712         arch_spin_lock(&tr->max_lock);
1713
1714         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1715
1716         if (ret == -EBUSY) {
1717                 /*
1718                  * We failed to swap the buffer due to a commit taking
1719                  * place on this CPU. We fail to record, but we reset
1720                  * the max trace buffer (no one writes directly to it)
1721                  * and flag that it failed.
1722                  */
1723                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1724                         "Failed to swap buffers due to commit in progress\n");
1725         }
1726
1727         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1728
1729         __update_max_tr(tr, tsk, cpu);
1730         arch_spin_unlock(&tr->max_lock);
1731 }
1732 #endif /* CONFIG_TRACER_MAX_TRACE */
1733
1734 static int wait_on_pipe(struct trace_iterator *iter, int full)
1735 {
1736         /* Iterators are static, they should be filled or empty */
1737         if (trace_buffer_iter(iter, iter->cpu_file))
1738                 return 0;
1739
1740         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1741                                 full);
1742 }
1743
1744 #ifdef CONFIG_FTRACE_STARTUP_TEST
1745 static bool selftests_can_run;
1746
1747 struct trace_selftests {
1748         struct list_head                list;
1749         struct tracer                   *type;
1750 };
1751
1752 static LIST_HEAD(postponed_selftests);
1753
1754 static int save_selftest(struct tracer *type)
1755 {
1756         struct trace_selftests *selftest;
1757
1758         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1759         if (!selftest)
1760                 return -ENOMEM;
1761
1762         selftest->type = type;
1763         list_add(&selftest->list, &postponed_selftests);
1764         return 0;
1765 }
1766
1767 static int run_tracer_selftest(struct tracer *type)
1768 {
1769         struct trace_array *tr = &global_trace;
1770         struct tracer *saved_tracer = tr->current_trace;
1771         int ret;
1772
1773         if (!type->selftest || tracing_selftest_disabled)
1774                 return 0;
1775
1776         /*
1777          * If a tracer registers early in boot up (before scheduling is
1778          * initialized and such), then do not run its selftests yet.
1779          * Instead, run it a little later in the boot process.
1780          */
1781         if (!selftests_can_run)
1782                 return save_selftest(type);
1783
1784         /*
1785          * Run a selftest on this tracer.
1786          * Here we reset the trace buffer, and set the current
1787          * tracer to be this tracer. The tracer can then run some
1788          * internal tracing to verify that everything is in order.
1789          * If we fail, we do not register this tracer.
1790          */
1791         tracing_reset_online_cpus(&tr->array_buffer);
1792
1793         tr->current_trace = type;
1794
1795 #ifdef CONFIG_TRACER_MAX_TRACE
1796         if (type->use_max_tr) {
1797                 /* If we expanded the buffers, make sure the max is expanded too */
1798                 if (ring_buffer_expanded)
1799                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1800                                            RING_BUFFER_ALL_CPUS);
1801                 tr->allocated_snapshot = true;
1802         }
1803 #endif
1804
1805         /* the test is responsible for initializing and enabling */
1806         pr_info("Testing tracer %s: ", type->name);
1807         ret = type->selftest(type, tr);
1808         /* the test is responsible for resetting too */
1809         tr->current_trace = saved_tracer;
1810         if (ret) {
1811                 printk(KERN_CONT "FAILED!\n");
1812                 /* Add the warning after printing 'FAILED' */
1813                 WARN_ON(1);
1814                 return -1;
1815         }
1816         /* Only reset on passing, to avoid touching corrupted buffers */
1817         tracing_reset_online_cpus(&tr->array_buffer);
1818
1819 #ifdef CONFIG_TRACER_MAX_TRACE
1820         if (type->use_max_tr) {
1821                 tr->allocated_snapshot = false;
1822
1823                 /* Shrink the max buffer again */
1824                 if (ring_buffer_expanded)
1825                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1826                                            RING_BUFFER_ALL_CPUS);
1827         }
1828 #endif
1829
1830         printk(KERN_CONT "PASSED\n");
1831         return 0;
1832 }
1833
1834 static __init int init_trace_selftests(void)
1835 {
1836         struct trace_selftests *p, *n;
1837         struct tracer *t, **last;
1838         int ret;
1839
1840         selftests_can_run = true;
1841
1842         mutex_lock(&trace_types_lock);
1843
1844         if (list_empty(&postponed_selftests))
1845                 goto out;
1846
1847         pr_info("Running postponed tracer tests:\n");
1848
1849         tracing_selftest_running = true;
1850         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1851                 /* This loop can take minutes when sanitizers are enabled, so
1852                  * lets make sure we allow RCU processing.
1853                  */
1854                 cond_resched();
1855                 ret = run_tracer_selftest(p->type);
1856                 /* If the test fails, then warn and remove from available_tracers */
1857                 if (ret < 0) {
1858                         WARN(1, "tracer: %s failed selftest, disabling\n",
1859                              p->type->name);
1860                         last = &trace_types;
1861                         for (t = trace_types; t; t = t->next) {
1862                                 if (t == p->type) {
1863                                         *last = t->next;
1864                                         break;
1865                                 }
1866                                 last = &t->next;
1867                         }
1868                 }
1869                 list_del(&p->list);
1870                 kfree(p);
1871         }
1872         tracing_selftest_running = false;
1873
1874  out:
1875         mutex_unlock(&trace_types_lock);
1876
1877         return 0;
1878 }
1879 core_initcall(init_trace_selftests);
1880 #else
1881 static inline int run_tracer_selftest(struct tracer *type)
1882 {
1883         return 0;
1884 }
1885 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1886
1887 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1888
1889 static void __init apply_trace_boot_options(void);
1890
1891 /**
1892  * register_tracer - register a tracer with the ftrace system.
1893  * @type: the plugin for the tracer
1894  *
1895  * Register a new plugin tracer.
1896  */
1897 int __init register_tracer(struct tracer *type)
1898 {
1899         struct tracer *t;
1900         int ret = 0;
1901
1902         if (!type->name) {
1903                 pr_info("Tracer must have a name\n");
1904                 return -1;
1905         }
1906
1907         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1908                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1909                 return -1;
1910         }
1911
1912         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1913                 pr_warn("Can not register tracer %s due to lockdown\n",
1914                            type->name);
1915                 return -EPERM;
1916         }
1917
1918         mutex_lock(&trace_types_lock);
1919
1920         tracing_selftest_running = true;
1921
1922         for (t = trace_types; t; t = t->next) {
1923                 if (strcmp(type->name, t->name) == 0) {
1924                         /* already found */
1925                         pr_info("Tracer %s already registered\n",
1926                                 type->name);
1927                         ret = -1;
1928                         goto out;
1929                 }
1930         }
1931
1932         if (!type->set_flag)
1933                 type->set_flag = &dummy_set_flag;
1934         if (!type->flags) {
1935                 /*allocate a dummy tracer_flags*/
1936                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1937                 if (!type->flags) {
1938                         ret = -ENOMEM;
1939                         goto out;
1940                 }
1941                 type->flags->val = 0;
1942                 type->flags->opts = dummy_tracer_opt;
1943         } else
1944                 if (!type->flags->opts)
1945                         type->flags->opts = dummy_tracer_opt;
1946
1947         /* store the tracer for __set_tracer_option */
1948         type->flags->trace = type;
1949
1950         ret = run_tracer_selftest(type);
1951         if (ret < 0)
1952                 goto out;
1953
1954         type->next = trace_types;
1955         trace_types = type;
1956         add_tracer_options(&global_trace, type);
1957
1958  out:
1959         tracing_selftest_running = false;
1960         mutex_unlock(&trace_types_lock);
1961
1962         if (ret || !default_bootup_tracer)
1963                 goto out_unlock;
1964
1965         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1966                 goto out_unlock;
1967
1968         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1969         /* Do we want this tracer to start on bootup? */
1970         tracing_set_tracer(&global_trace, type->name);
1971         default_bootup_tracer = NULL;
1972
1973         apply_trace_boot_options();
1974
1975         /* disable other selftests, since this will break it. */
1976         tracing_selftest_disabled = true;
1977 #ifdef CONFIG_FTRACE_STARTUP_TEST
1978         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1979                type->name);
1980 #endif
1981
1982  out_unlock:
1983         return ret;
1984 }
1985
1986 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1987 {
1988         struct trace_buffer *buffer = buf->buffer;
1989
1990         if (!buffer)
1991                 return;
1992
1993         ring_buffer_record_disable(buffer);
1994
1995         /* Make sure all commits have finished */
1996         synchronize_rcu();
1997         ring_buffer_reset_cpu(buffer, cpu);
1998
1999         ring_buffer_record_enable(buffer);
2000 }
2001
2002 void tracing_reset_online_cpus(struct array_buffer *buf)
2003 {
2004         struct trace_buffer *buffer = buf->buffer;
2005
2006         if (!buffer)
2007                 return;
2008
2009         ring_buffer_record_disable(buffer);
2010
2011         /* Make sure all commits have finished */
2012         synchronize_rcu();
2013
2014         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2015
2016         ring_buffer_reset_online_cpus(buffer);
2017
2018         ring_buffer_record_enable(buffer);
2019 }
2020
2021 /* Must have trace_types_lock held */
2022 void tracing_reset_all_online_cpus(void)
2023 {
2024         struct trace_array *tr;
2025
2026         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2027                 if (!tr->clear_trace)
2028                         continue;
2029                 tr->clear_trace = false;
2030                 tracing_reset_online_cpus(&tr->array_buffer);
2031 #ifdef CONFIG_TRACER_MAX_TRACE
2032                 tracing_reset_online_cpus(&tr->max_buffer);
2033 #endif
2034         }
2035 }
2036
2037 static int *tgid_map;
2038
2039 #define SAVED_CMDLINES_DEFAULT 128
2040 #define NO_CMDLINE_MAP UINT_MAX
2041 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2042 struct saved_cmdlines_buffer {
2043         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2044         unsigned *map_cmdline_to_pid;
2045         unsigned cmdline_num;
2046         int cmdline_idx;
2047         char *saved_cmdlines;
2048 };
2049 static struct saved_cmdlines_buffer *savedcmd;
2050
2051 /* temporary disable recording */
2052 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2053
2054 static inline char *get_saved_cmdlines(int idx)
2055 {
2056         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2057 }
2058
2059 static inline void set_cmdline(int idx, const char *cmdline)
2060 {
2061         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2062 }
2063
2064 static int allocate_cmdlines_buffer(unsigned int val,
2065                                     struct saved_cmdlines_buffer *s)
2066 {
2067         s->map_cmdline_to_pid = kmalloc_array(val,
2068                                               sizeof(*s->map_cmdline_to_pid),
2069                                               GFP_KERNEL);
2070         if (!s->map_cmdline_to_pid)
2071                 return -ENOMEM;
2072
2073         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2074         if (!s->saved_cmdlines) {
2075                 kfree(s->map_cmdline_to_pid);
2076                 return -ENOMEM;
2077         }
2078
2079         s->cmdline_idx = 0;
2080         s->cmdline_num = val;
2081         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2082                sizeof(s->map_pid_to_cmdline));
2083         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2084                val * sizeof(*s->map_cmdline_to_pid));
2085
2086         return 0;
2087 }
2088
2089 static int trace_create_savedcmd(void)
2090 {
2091         int ret;
2092
2093         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2094         if (!savedcmd)
2095                 return -ENOMEM;
2096
2097         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2098         if (ret < 0) {
2099                 kfree(savedcmd);
2100                 savedcmd = NULL;
2101                 return -ENOMEM;
2102         }
2103
2104         return 0;
2105 }
2106
2107 int is_tracing_stopped(void)
2108 {
2109         return global_trace.stop_count;
2110 }
2111
2112 /**
2113  * tracing_start - quick start of the tracer
2114  *
2115  * If tracing is enabled but was stopped by tracing_stop,
2116  * this will start the tracer back up.
2117  */
2118 void tracing_start(void)
2119 {
2120         struct trace_buffer *buffer;
2121         unsigned long flags;
2122
2123         if (tracing_disabled)
2124                 return;
2125
2126         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2127         if (--global_trace.stop_count) {
2128                 if (global_trace.stop_count < 0) {
2129                         /* Someone screwed up their debugging */
2130                         WARN_ON_ONCE(1);
2131                         global_trace.stop_count = 0;
2132                 }
2133                 goto out;
2134         }
2135
2136         /* Prevent the buffers from switching */
2137         arch_spin_lock(&global_trace.max_lock);
2138
2139         buffer = global_trace.array_buffer.buffer;
2140         if (buffer)
2141                 ring_buffer_record_enable(buffer);
2142
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144         buffer = global_trace.max_buffer.buffer;
2145         if (buffer)
2146                 ring_buffer_record_enable(buffer);
2147 #endif
2148
2149         arch_spin_unlock(&global_trace.max_lock);
2150
2151  out:
2152         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2153 }
2154
2155 static void tracing_start_tr(struct trace_array *tr)
2156 {
2157         struct trace_buffer *buffer;
2158         unsigned long flags;
2159
2160         if (tracing_disabled)
2161                 return;
2162
2163         /* If global, we need to also start the max tracer */
2164         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2165                 return tracing_start();
2166
2167         raw_spin_lock_irqsave(&tr->start_lock, flags);
2168
2169         if (--tr->stop_count) {
2170                 if (tr->stop_count < 0) {
2171                         /* Someone screwed up their debugging */
2172                         WARN_ON_ONCE(1);
2173                         tr->stop_count = 0;
2174                 }
2175                 goto out;
2176         }
2177
2178         buffer = tr->array_buffer.buffer;
2179         if (buffer)
2180                 ring_buffer_record_enable(buffer);
2181
2182  out:
2183         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2184 }
2185
2186 /**
2187  * tracing_stop - quick stop of the tracer
2188  *
2189  * Light weight way to stop tracing. Use in conjunction with
2190  * tracing_start.
2191  */
2192 void tracing_stop(void)
2193 {
2194         struct trace_buffer *buffer;
2195         unsigned long flags;
2196
2197         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2198         if (global_trace.stop_count++)
2199                 goto out;
2200
2201         /* Prevent the buffers from switching */
2202         arch_spin_lock(&global_trace.max_lock);
2203
2204         buffer = global_trace.array_buffer.buffer;
2205         if (buffer)
2206                 ring_buffer_record_disable(buffer);
2207
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209         buffer = global_trace.max_buffer.buffer;
2210         if (buffer)
2211                 ring_buffer_record_disable(buffer);
2212 #endif
2213
2214         arch_spin_unlock(&global_trace.max_lock);
2215
2216  out:
2217         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2218 }
2219
2220 static void tracing_stop_tr(struct trace_array *tr)
2221 {
2222         struct trace_buffer *buffer;
2223         unsigned long flags;
2224
2225         /* If global, we need to also stop the max tracer */
2226         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2227                 return tracing_stop();
2228
2229         raw_spin_lock_irqsave(&tr->start_lock, flags);
2230         if (tr->stop_count++)
2231                 goto out;
2232
2233         buffer = tr->array_buffer.buffer;
2234         if (buffer)
2235                 ring_buffer_record_disable(buffer);
2236
2237  out:
2238         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2239 }
2240
2241 static int trace_save_cmdline(struct task_struct *tsk)
2242 {
2243         unsigned pid, idx;
2244
2245         /* treat recording of idle task as a success */
2246         if (!tsk->pid)
2247                 return 1;
2248
2249         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2250                 return 0;
2251
2252         /*
2253          * It's not the end of the world if we don't get
2254          * the lock, but we also don't want to spin
2255          * nor do we want to disable interrupts,
2256          * so if we miss here, then better luck next time.
2257          */
2258         if (!arch_spin_trylock(&trace_cmdline_lock))
2259                 return 0;
2260
2261         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2262         if (idx == NO_CMDLINE_MAP) {
2263                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2264
2265                 /*
2266                  * Check whether the cmdline buffer at idx has a pid
2267                  * mapped. We are going to overwrite that entry so we
2268                  * need to clear the map_pid_to_cmdline. Otherwise we
2269                  * would read the new comm for the old pid.
2270                  */
2271                 pid = savedcmd->map_cmdline_to_pid[idx];
2272                 if (pid != NO_CMDLINE_MAP)
2273                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2274
2275                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2276                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2277
2278                 savedcmd->cmdline_idx = idx;
2279         }
2280
2281         set_cmdline(idx, tsk->comm);
2282
2283         arch_spin_unlock(&trace_cmdline_lock);
2284
2285         return 1;
2286 }
2287
2288 static void __trace_find_cmdline(int pid, char comm[])
2289 {
2290         unsigned map;
2291
2292         if (!pid) {
2293                 strcpy(comm, "<idle>");
2294                 return;
2295         }
2296
2297         if (WARN_ON_ONCE(pid < 0)) {
2298                 strcpy(comm, "<XXX>");
2299                 return;
2300         }
2301
2302         if (pid > PID_MAX_DEFAULT) {
2303                 strcpy(comm, "<...>");
2304                 return;
2305         }
2306
2307         map = savedcmd->map_pid_to_cmdline[pid];
2308         if (map != NO_CMDLINE_MAP)
2309                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2310         else
2311                 strcpy(comm, "<...>");
2312 }
2313
2314 void trace_find_cmdline(int pid, char comm[])
2315 {
2316         preempt_disable();
2317         arch_spin_lock(&trace_cmdline_lock);
2318
2319         __trace_find_cmdline(pid, comm);
2320
2321         arch_spin_unlock(&trace_cmdline_lock);
2322         preempt_enable();
2323 }
2324
2325 int trace_find_tgid(int pid)
2326 {
2327         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2328                 return 0;
2329
2330         return tgid_map[pid];
2331 }
2332
2333 static int trace_save_tgid(struct task_struct *tsk)
2334 {
2335         /* treat recording of idle task as a success */
2336         if (!tsk->pid)
2337                 return 1;
2338
2339         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2340                 return 0;
2341
2342         tgid_map[tsk->pid] = tsk->tgid;
2343         return 1;
2344 }
2345
2346 static bool tracing_record_taskinfo_skip(int flags)
2347 {
2348         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2349                 return true;
2350         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2351                 return true;
2352         if (!__this_cpu_read(trace_taskinfo_save))
2353                 return true;
2354         return false;
2355 }
2356
2357 /**
2358  * tracing_record_taskinfo - record the task info of a task
2359  *
2360  * @task:  task to record
2361  * @flags: TRACE_RECORD_CMDLINE for recording comm
2362  *         TRACE_RECORD_TGID for recording tgid
2363  */
2364 void tracing_record_taskinfo(struct task_struct *task, int flags)
2365 {
2366         bool done;
2367
2368         if (tracing_record_taskinfo_skip(flags))
2369                 return;
2370
2371         /*
2372          * Record as much task information as possible. If some fail, continue
2373          * to try to record the others.
2374          */
2375         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2376         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2377
2378         /* If recording any information failed, retry again soon. */
2379         if (!done)
2380                 return;
2381
2382         __this_cpu_write(trace_taskinfo_save, false);
2383 }
2384
2385 /**
2386  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2387  *
2388  * @prev: previous task during sched_switch
2389  * @next: next task during sched_switch
2390  * @flags: TRACE_RECORD_CMDLINE for recording comm
2391  *         TRACE_RECORD_TGID for recording tgid
2392  */
2393 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2394                                           struct task_struct *next, int flags)
2395 {
2396         bool done;
2397
2398         if (tracing_record_taskinfo_skip(flags))
2399                 return;
2400
2401         /*
2402          * Record as much task information as possible. If some fail, continue
2403          * to try to record the others.
2404          */
2405         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2406         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2407         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2408         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2409
2410         /* If recording any information failed, retry again soon. */
2411         if (!done)
2412                 return;
2413
2414         __this_cpu_write(trace_taskinfo_save, false);
2415 }
2416
2417 /* Helpers to record a specific task information */
2418 void tracing_record_cmdline(struct task_struct *task)
2419 {
2420         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2421 }
2422
2423 void tracing_record_tgid(struct task_struct *task)
2424 {
2425         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2426 }
2427
2428 /*
2429  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2430  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2431  * simplifies those functions and keeps them in sync.
2432  */
2433 enum print_line_t trace_handle_return(struct trace_seq *s)
2434 {
2435         return trace_seq_has_overflowed(s) ?
2436                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2437 }
2438 EXPORT_SYMBOL_GPL(trace_handle_return);
2439
2440 void
2441 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2442                              unsigned long flags, int pc)
2443 {
2444         struct task_struct *tsk = current;
2445
2446         entry->preempt_count            = pc & 0xff;
2447         entry->pid                      = (tsk) ? tsk->pid : 0;
2448         entry->type                     = type;
2449         entry->flags =
2450 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2451                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2452 #else
2453                 TRACE_FLAG_IRQS_NOSUPPORT |
2454 #endif
2455                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2456                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2457                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2458                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2459                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2460 }
2461 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2462
2463 struct ring_buffer_event *
2464 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2465                           int type,
2466                           unsigned long len,
2467                           unsigned long flags, int pc)
2468 {
2469         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2470 }
2471
2472 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2473 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2474 static int trace_buffered_event_ref;
2475
2476 /**
2477  * trace_buffered_event_enable - enable buffering events
2478  *
2479  * When events are being filtered, it is quicker to use a temporary
2480  * buffer to write the event data into if there's a likely chance
2481  * that it will not be committed. The discard of the ring buffer
2482  * is not as fast as committing, and is much slower than copying
2483  * a commit.
2484  *
2485  * When an event is to be filtered, allocate per cpu buffers to
2486  * write the event data into, and if the event is filtered and discarded
2487  * it is simply dropped, otherwise, the entire data is to be committed
2488  * in one shot.
2489  */
2490 void trace_buffered_event_enable(void)
2491 {
2492         struct ring_buffer_event *event;
2493         struct page *page;
2494         int cpu;
2495
2496         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2497
2498         if (trace_buffered_event_ref++)
2499                 return;
2500
2501         for_each_tracing_cpu(cpu) {
2502                 page = alloc_pages_node(cpu_to_node(cpu),
2503                                         GFP_KERNEL | __GFP_NORETRY, 0);
2504                 if (!page)
2505                         goto failed;
2506
2507                 event = page_address(page);
2508                 memset(event, 0, sizeof(*event));
2509
2510                 per_cpu(trace_buffered_event, cpu) = event;
2511
2512                 preempt_disable();
2513                 if (cpu == smp_processor_id() &&
2514                     this_cpu_read(trace_buffered_event) !=
2515                     per_cpu(trace_buffered_event, cpu))
2516                         WARN_ON_ONCE(1);
2517                 preempt_enable();
2518         }
2519
2520         return;
2521  failed:
2522         trace_buffered_event_disable();
2523 }
2524
2525 static void enable_trace_buffered_event(void *data)
2526 {
2527         /* Probably not needed, but do it anyway */
2528         smp_rmb();
2529         this_cpu_dec(trace_buffered_event_cnt);
2530 }
2531
2532 static void disable_trace_buffered_event(void *data)
2533 {
2534         this_cpu_inc(trace_buffered_event_cnt);
2535 }
2536
2537 /**
2538  * trace_buffered_event_disable - disable buffering events
2539  *
2540  * When a filter is removed, it is faster to not use the buffered
2541  * events, and to commit directly into the ring buffer. Free up
2542  * the temp buffers when there are no more users. This requires
2543  * special synchronization with current events.
2544  */
2545 void trace_buffered_event_disable(void)
2546 {
2547         int cpu;
2548
2549         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2550
2551         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2552                 return;
2553
2554         if (--trace_buffered_event_ref)
2555                 return;
2556
2557         preempt_disable();
2558         /* For each CPU, set the buffer as used. */
2559         smp_call_function_many(tracing_buffer_mask,
2560                                disable_trace_buffered_event, NULL, 1);
2561         preempt_enable();
2562
2563         /* Wait for all current users to finish */
2564         synchronize_rcu();
2565
2566         for_each_tracing_cpu(cpu) {
2567                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2568                 per_cpu(trace_buffered_event, cpu) = NULL;
2569         }
2570         /*
2571          * Make sure trace_buffered_event is NULL before clearing
2572          * trace_buffered_event_cnt.
2573          */
2574         smp_wmb();
2575
2576         preempt_disable();
2577         /* Do the work on each cpu */
2578         smp_call_function_many(tracing_buffer_mask,
2579                                enable_trace_buffered_event, NULL, 1);
2580         preempt_enable();
2581 }
2582
2583 static struct trace_buffer *temp_buffer;
2584
2585 struct ring_buffer_event *
2586 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2587                           struct trace_event_file *trace_file,
2588                           int type, unsigned long len,
2589                           unsigned long flags, int pc)
2590 {
2591         struct ring_buffer_event *entry;
2592         int val;
2593
2594         *current_rb = trace_file->tr->array_buffer.buffer;
2595
2596         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2597              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2598             (entry = this_cpu_read(trace_buffered_event))) {
2599                 /* Try to use the per cpu buffer first */
2600                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2601                 if (val == 1) {
2602                         trace_event_setup(entry, type, flags, pc);
2603                         entry->array[0] = len;
2604                         return entry;
2605                 }
2606                 this_cpu_dec(trace_buffered_event_cnt);
2607         }
2608
2609         entry = __trace_buffer_lock_reserve(*current_rb,
2610                                             type, len, flags, pc);
2611         /*
2612          * If tracing is off, but we have triggers enabled
2613          * we still need to look at the event data. Use the temp_buffer
2614          * to store the trace event for the tigger to use. It's recusive
2615          * safe and will not be recorded anywhere.
2616          */
2617         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2618                 *current_rb = temp_buffer;
2619                 entry = __trace_buffer_lock_reserve(*current_rb,
2620                                                     type, len, flags, pc);
2621         }
2622         return entry;
2623 }
2624 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2625
2626 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2627 static DEFINE_MUTEX(tracepoint_printk_mutex);
2628
2629 static void output_printk(struct trace_event_buffer *fbuffer)
2630 {
2631         struct trace_event_call *event_call;
2632         struct trace_event_file *file;
2633         struct trace_event *event;
2634         unsigned long flags;
2635         struct trace_iterator *iter = tracepoint_print_iter;
2636
2637         /* We should never get here if iter is NULL */
2638         if (WARN_ON_ONCE(!iter))
2639                 return;
2640
2641         event_call = fbuffer->trace_file->event_call;
2642         if (!event_call || !event_call->event.funcs ||
2643             !event_call->event.funcs->trace)
2644                 return;
2645
2646         file = fbuffer->trace_file;
2647         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2648             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2649              !filter_match_preds(file->filter, fbuffer->entry)))
2650                 return;
2651
2652         event = &fbuffer->trace_file->event_call->event;
2653
2654         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2655         trace_seq_init(&iter->seq);
2656         iter->ent = fbuffer->entry;
2657         event_call->event.funcs->trace(iter, 0, event);
2658         trace_seq_putc(&iter->seq, 0);
2659         printk("%s", iter->seq.buffer);
2660
2661         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2662 }
2663
2664 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2665                              void *buffer, size_t *lenp,
2666                              loff_t *ppos)
2667 {
2668         int save_tracepoint_printk;
2669         int ret;
2670
2671         mutex_lock(&tracepoint_printk_mutex);
2672         save_tracepoint_printk = tracepoint_printk;
2673
2674         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2675
2676         /*
2677          * This will force exiting early, as tracepoint_printk
2678          * is always zero when tracepoint_printk_iter is not allocated
2679          */
2680         if (!tracepoint_print_iter)
2681                 tracepoint_printk = 0;
2682
2683         if (save_tracepoint_printk == tracepoint_printk)
2684                 goto out;
2685
2686         if (tracepoint_printk)
2687                 static_key_enable(&tracepoint_printk_key.key);
2688         else
2689                 static_key_disable(&tracepoint_printk_key.key);
2690
2691  out:
2692         mutex_unlock(&tracepoint_printk_mutex);
2693
2694         return ret;
2695 }
2696
2697 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2698 {
2699         if (static_key_false(&tracepoint_printk_key.key))
2700                 output_printk(fbuffer);
2701
2702         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2703                                     fbuffer->event, fbuffer->entry,
2704                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2705 }
2706 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2707
2708 /*
2709  * Skip 3:
2710  *
2711  *   trace_buffer_unlock_commit_regs()
2712  *   trace_event_buffer_commit()
2713  *   trace_event_raw_event_xxx()
2714  */
2715 # define STACK_SKIP 3
2716
2717 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2718                                      struct trace_buffer *buffer,
2719                                      struct ring_buffer_event *event,
2720                                      unsigned long flags, int pc,
2721                                      struct pt_regs *regs)
2722 {
2723         __buffer_unlock_commit(buffer, event);
2724
2725         /*
2726          * If regs is not set, then skip the necessary functions.
2727          * Note, we can still get here via blktrace, wakeup tracer
2728          * and mmiotrace, but that's ok if they lose a function or
2729          * two. They are not that meaningful.
2730          */
2731         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2732         ftrace_trace_userstack(buffer, flags, pc);
2733 }
2734
2735 /*
2736  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2737  */
2738 void
2739 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2740                                    struct ring_buffer_event *event)
2741 {
2742         __buffer_unlock_commit(buffer, event);
2743 }
2744
2745 static void
2746 trace_process_export(struct trace_export *export,
2747                struct ring_buffer_event *event)
2748 {
2749         struct trace_entry *entry;
2750         unsigned int size = 0;
2751
2752         entry = ring_buffer_event_data(event);
2753         size = ring_buffer_event_length(event);
2754         export->write(export, entry, size);
2755 }
2756
2757 static DEFINE_MUTEX(ftrace_export_lock);
2758
2759 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2760
2761 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2762
2763 static inline void ftrace_exports_enable(void)
2764 {
2765         static_branch_enable(&ftrace_exports_enabled);
2766 }
2767
2768 static inline void ftrace_exports_disable(void)
2769 {
2770         static_branch_disable(&ftrace_exports_enabled);
2771 }
2772
2773 static void ftrace_exports(struct ring_buffer_event *event)
2774 {
2775         struct trace_export *export;
2776
2777         preempt_disable_notrace();
2778
2779         export = rcu_dereference_raw_check(ftrace_exports_list);
2780         while (export) {
2781                 trace_process_export(export, event);
2782                 export = rcu_dereference_raw_check(export->next);
2783         }
2784
2785         preempt_enable_notrace();
2786 }
2787
2788 static inline void
2789 add_trace_export(struct trace_export **list, struct trace_export *export)
2790 {
2791         rcu_assign_pointer(export->next, *list);
2792         /*
2793          * We are entering export into the list but another
2794          * CPU might be walking that list. We need to make sure
2795          * the export->next pointer is valid before another CPU sees
2796          * the export pointer included into the list.
2797          */
2798         rcu_assign_pointer(*list, export);
2799 }
2800
2801 static inline int
2802 rm_trace_export(struct trace_export **list, struct trace_export *export)
2803 {
2804         struct trace_export **p;
2805
2806         for (p = list; *p != NULL; p = &(*p)->next)
2807                 if (*p == export)
2808                         break;
2809
2810         if (*p != export)
2811                 return -1;
2812
2813         rcu_assign_pointer(*p, (*p)->next);
2814
2815         return 0;
2816 }
2817
2818 static inline void
2819 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2820 {
2821         if (*list == NULL)
2822                 ftrace_exports_enable();
2823
2824         add_trace_export(list, export);
2825 }
2826
2827 static inline int
2828 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2829 {
2830         int ret;
2831
2832         ret = rm_trace_export(list, export);
2833         if (*list == NULL)
2834                 ftrace_exports_disable();
2835
2836         return ret;
2837 }
2838
2839 int register_ftrace_export(struct trace_export *export)
2840 {
2841         if (WARN_ON_ONCE(!export->write))
2842                 return -1;
2843
2844         mutex_lock(&ftrace_export_lock);
2845
2846         add_ftrace_export(&ftrace_exports_list, export);
2847
2848         mutex_unlock(&ftrace_export_lock);
2849
2850         return 0;
2851 }
2852 EXPORT_SYMBOL_GPL(register_ftrace_export);
2853
2854 int unregister_ftrace_export(struct trace_export *export)
2855 {
2856         int ret;
2857
2858         mutex_lock(&ftrace_export_lock);
2859
2860         ret = rm_ftrace_export(&ftrace_exports_list, export);
2861
2862         mutex_unlock(&ftrace_export_lock);
2863
2864         return ret;
2865 }
2866 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2867
2868 void
2869 trace_function(struct trace_array *tr,
2870                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2871                int pc)
2872 {
2873         struct trace_event_call *call = &event_function;
2874         struct trace_buffer *buffer = tr->array_buffer.buffer;
2875         struct ring_buffer_event *event;
2876         struct ftrace_entry *entry;
2877
2878         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2879                                             flags, pc);
2880         if (!event)
2881                 return;
2882         entry   = ring_buffer_event_data(event);
2883         entry->ip                       = ip;
2884         entry->parent_ip                = parent_ip;
2885
2886         if (!call_filter_check_discard(call, entry, buffer, event)) {
2887                 if (static_branch_unlikely(&ftrace_exports_enabled))
2888                         ftrace_exports(event);
2889                 __buffer_unlock_commit(buffer, event);
2890         }
2891 }
2892
2893 #ifdef CONFIG_STACKTRACE
2894
2895 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2896 #define FTRACE_KSTACK_NESTING   4
2897
2898 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2899
2900 struct ftrace_stack {
2901         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2902 };
2903
2904
2905 struct ftrace_stacks {
2906         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2907 };
2908
2909 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2910 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2911
2912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2913                                  unsigned long flags,
2914                                  int skip, int pc, struct pt_regs *regs)
2915 {
2916         struct trace_event_call *call = &event_kernel_stack;
2917         struct ring_buffer_event *event;
2918         unsigned int size, nr_entries;
2919         struct ftrace_stack *fstack;
2920         struct stack_entry *entry;
2921         int stackidx;
2922
2923         /*
2924          * Add one, for this function and the call to save_stack_trace()
2925          * If regs is set, then these functions will not be in the way.
2926          */
2927 #ifndef CONFIG_UNWINDER_ORC
2928         if (!regs)
2929                 skip++;
2930 #endif
2931
2932         preempt_disable_notrace();
2933
2934         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2935
2936         /* This should never happen. If it does, yell once and skip */
2937         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2938                 goto out;
2939
2940         /*
2941          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2942          * interrupt will either see the value pre increment or post
2943          * increment. If the interrupt happens pre increment it will have
2944          * restored the counter when it returns.  We just need a barrier to
2945          * keep gcc from moving things around.
2946          */
2947         barrier();
2948
2949         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2950         size = ARRAY_SIZE(fstack->calls);
2951
2952         if (regs) {
2953                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2954                                                    size, skip);
2955         } else {
2956                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2957         }
2958
2959         size = nr_entries * sizeof(unsigned long);
2960         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2961                                             sizeof(*entry) + size, flags, pc);
2962         if (!event)
2963                 goto out;
2964         entry = ring_buffer_event_data(event);
2965
2966         memcpy(&entry->caller, fstack->calls, size);
2967         entry->size = nr_entries;
2968
2969         if (!call_filter_check_discard(call, entry, buffer, event))
2970                 __buffer_unlock_commit(buffer, event);
2971
2972  out:
2973         /* Again, don't let gcc optimize things here */
2974         barrier();
2975         __this_cpu_dec(ftrace_stack_reserve);
2976         preempt_enable_notrace();
2977
2978 }
2979
2980 static inline void ftrace_trace_stack(struct trace_array *tr,
2981                                       struct trace_buffer *buffer,
2982                                       unsigned long flags,
2983                                       int skip, int pc, struct pt_regs *regs)
2984 {
2985         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2986                 return;
2987
2988         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2989 }
2990
2991 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2992                    int pc)
2993 {
2994         struct trace_buffer *buffer = tr->array_buffer.buffer;
2995
2996         if (rcu_is_watching()) {
2997                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2998                 return;
2999         }
3000
3001         /*
3002          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3003          * but if the above rcu_is_watching() failed, then the NMI
3004          * triggered someplace critical, and rcu_irq_enter() should
3005          * not be called from NMI.
3006          */
3007         if (unlikely(in_nmi()))
3008                 return;
3009
3010         rcu_irq_enter_irqson();
3011         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3012         rcu_irq_exit_irqson();
3013 }
3014
3015 /**
3016  * trace_dump_stack - record a stack back trace in the trace buffer
3017  * @skip: Number of functions to skip (helper handlers)
3018  */
3019 void trace_dump_stack(int skip)
3020 {
3021         unsigned long flags;
3022
3023         if (tracing_disabled || tracing_selftest_running)
3024                 return;
3025
3026         local_save_flags(flags);
3027
3028 #ifndef CONFIG_UNWINDER_ORC
3029         /* Skip 1 to skip this function. */
3030         skip++;
3031 #endif
3032         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3033                              flags, skip, preempt_count(), NULL);
3034 }
3035 EXPORT_SYMBOL_GPL(trace_dump_stack);
3036
3037 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3038 static DEFINE_PER_CPU(int, user_stack_count);
3039
3040 static void
3041 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3042 {
3043         struct trace_event_call *call = &event_user_stack;
3044         struct ring_buffer_event *event;
3045         struct userstack_entry *entry;
3046
3047         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3048                 return;
3049
3050         /*
3051          * NMIs can not handle page faults, even with fix ups.
3052          * The save user stack can (and often does) fault.
3053          */
3054         if (unlikely(in_nmi()))
3055                 return;
3056
3057         /*
3058          * prevent recursion, since the user stack tracing may
3059          * trigger other kernel events.
3060          */
3061         preempt_disable();
3062         if (__this_cpu_read(user_stack_count))
3063                 goto out;
3064
3065         __this_cpu_inc(user_stack_count);
3066
3067         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3068                                             sizeof(*entry), flags, pc);
3069         if (!event)
3070                 goto out_drop_count;
3071         entry   = ring_buffer_event_data(event);
3072
3073         entry->tgid             = current->tgid;
3074         memset(&entry->caller, 0, sizeof(entry->caller));
3075
3076         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3077         if (!call_filter_check_discard(call, entry, buffer, event))
3078                 __buffer_unlock_commit(buffer, event);
3079
3080  out_drop_count:
3081         __this_cpu_dec(user_stack_count);
3082  out:
3083         preempt_enable();
3084 }
3085 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3086 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3087                                    unsigned long flags, int pc)
3088 {
3089 }
3090 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3091
3092 #endif /* CONFIG_STACKTRACE */
3093
3094 /* created for use with alloc_percpu */
3095 struct trace_buffer_struct {
3096         int nesting;
3097         char buffer[4][TRACE_BUF_SIZE];
3098 };
3099
3100 static struct trace_buffer_struct *trace_percpu_buffer;
3101
3102 /*
3103  * Thise allows for lockless recording.  If we're nested too deeply, then
3104  * this returns NULL.
3105  */
3106 static char *get_trace_buf(void)
3107 {
3108         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3109
3110         if (!buffer || buffer->nesting >= 4)
3111                 return NULL;
3112
3113         buffer->nesting++;
3114
3115         /* Interrupts must see nesting incremented before we use the buffer */
3116         barrier();
3117         return &buffer->buffer[buffer->nesting][0];
3118 }
3119
3120 static void put_trace_buf(void)
3121 {
3122         /* Don't let the decrement of nesting leak before this */
3123         barrier();
3124         this_cpu_dec(trace_percpu_buffer->nesting);
3125 }
3126
3127 static int alloc_percpu_trace_buffer(void)
3128 {
3129         struct trace_buffer_struct *buffers;
3130
3131         if (trace_percpu_buffer)
3132                 return 0;
3133
3134         buffers = alloc_percpu(struct trace_buffer_struct);
3135         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3136                 return -ENOMEM;
3137
3138         trace_percpu_buffer = buffers;
3139         return 0;
3140 }
3141
3142 static int buffers_allocated;
3143
3144 void trace_printk_init_buffers(void)
3145 {
3146         if (buffers_allocated)
3147                 return;
3148
3149         if (alloc_percpu_trace_buffer())
3150                 return;
3151
3152         /* trace_printk() is for debug use only. Don't use it in production. */
3153
3154         pr_warn("\n");
3155         pr_warn("**********************************************************\n");
3156         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3157         pr_warn("**                                                      **\n");
3158         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3159         pr_warn("**                                                      **\n");
3160         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3161         pr_warn("** unsafe for production use.                           **\n");
3162         pr_warn("**                                                      **\n");
3163         pr_warn("** If you see this message and you are not debugging    **\n");
3164         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3165         pr_warn("**                                                      **\n");
3166         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3167         pr_warn("**********************************************************\n");
3168
3169         /* Expand the buffers to set size */
3170         tracing_update_buffers();
3171
3172         buffers_allocated = 1;
3173
3174         /*
3175          * trace_printk_init_buffers() can be called by modules.
3176          * If that happens, then we need to start cmdline recording
3177          * directly here. If the global_trace.buffer is already
3178          * allocated here, then this was called by module code.
3179          */
3180         if (global_trace.array_buffer.buffer)
3181                 tracing_start_cmdline_record();
3182 }
3183 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3184
3185 void trace_printk_start_comm(void)
3186 {
3187         /* Start tracing comms if trace printk is set */
3188         if (!buffers_allocated)
3189                 return;
3190         tracing_start_cmdline_record();
3191 }
3192
3193 static void trace_printk_start_stop_comm(int enabled)
3194 {
3195         if (!buffers_allocated)
3196                 return;
3197
3198         if (enabled)
3199                 tracing_start_cmdline_record();
3200         else
3201                 tracing_stop_cmdline_record();
3202 }
3203
3204 /**
3205  * trace_vbprintk - write binary msg to tracing buffer
3206  * @ip:    The address of the caller
3207  * @fmt:   The string format to write to the buffer
3208  * @args:  Arguments for @fmt
3209  */
3210 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3211 {
3212         struct trace_event_call *call = &event_bprint;
3213         struct ring_buffer_event *event;
3214         struct trace_buffer *buffer;
3215         struct trace_array *tr = &global_trace;
3216         struct bprint_entry *entry;
3217         unsigned long flags;
3218         char *tbuffer;
3219         int len = 0, size, pc;
3220
3221         if (unlikely(tracing_selftest_running || tracing_disabled))
3222                 return 0;
3223
3224         /* Don't pollute graph traces with trace_vprintk internals */
3225         pause_graph_tracing();
3226
3227         pc = preempt_count();
3228         preempt_disable_notrace();
3229
3230         tbuffer = get_trace_buf();
3231         if (!tbuffer) {
3232                 len = 0;
3233                 goto out_nobuffer;
3234         }
3235
3236         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3237
3238         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3239                 goto out_put;
3240
3241         local_save_flags(flags);
3242         size = sizeof(*entry) + sizeof(u32) * len;
3243         buffer = tr->array_buffer.buffer;
3244         ring_buffer_nest_start(buffer);
3245         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3246                                             flags, pc);
3247         if (!event)
3248                 goto out;
3249         entry = ring_buffer_event_data(event);
3250         entry->ip                       = ip;
3251         entry->fmt                      = fmt;
3252
3253         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3254         if (!call_filter_check_discard(call, entry, buffer, event)) {
3255                 __buffer_unlock_commit(buffer, event);
3256                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3257         }
3258
3259 out:
3260         ring_buffer_nest_end(buffer);
3261 out_put:
3262         put_trace_buf();
3263
3264 out_nobuffer:
3265         preempt_enable_notrace();
3266         unpause_graph_tracing();
3267
3268         return len;
3269 }
3270 EXPORT_SYMBOL_GPL(trace_vbprintk);
3271
3272 __printf(3, 0)
3273 static int
3274 __trace_array_vprintk(struct trace_buffer *buffer,
3275                       unsigned long ip, const char *fmt, va_list args)
3276 {
3277         struct trace_event_call *call = &event_print;
3278         struct ring_buffer_event *event;
3279         int len = 0, size, pc;
3280         struct print_entry *entry;
3281         unsigned long flags;
3282         char *tbuffer;
3283
3284         if (tracing_disabled || tracing_selftest_running)
3285                 return 0;
3286
3287         /* Don't pollute graph traces with trace_vprintk internals */
3288         pause_graph_tracing();
3289
3290         pc = preempt_count();
3291         preempt_disable_notrace();
3292
3293
3294         tbuffer = get_trace_buf();
3295         if (!tbuffer) {
3296                 len = 0;
3297                 goto out_nobuffer;
3298         }
3299
3300         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3301
3302         local_save_flags(flags);
3303         size = sizeof(*entry) + len + 1;
3304         ring_buffer_nest_start(buffer);
3305         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3306                                             flags, pc);
3307         if (!event)
3308                 goto out;
3309         entry = ring_buffer_event_data(event);
3310         entry->ip = ip;
3311
3312         memcpy(&entry->buf, tbuffer, len + 1);
3313         if (!call_filter_check_discard(call, entry, buffer, event)) {
3314                 __buffer_unlock_commit(buffer, event);
3315                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3316         }
3317
3318 out:
3319         ring_buffer_nest_end(buffer);
3320         put_trace_buf();
3321
3322 out_nobuffer:
3323         preempt_enable_notrace();
3324         unpause_graph_tracing();
3325
3326         return len;
3327 }
3328
3329 __printf(3, 0)
3330 int trace_array_vprintk(struct trace_array *tr,
3331                         unsigned long ip, const char *fmt, va_list args)
3332 {
3333         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3334 }
3335
3336 /**
3337  * trace_array_printk - Print a message to a specific instance
3338  * @tr: The instance trace_array descriptor
3339  * @ip: The instruction pointer that this is called from.
3340  * @fmt: The format to print (printf format)
3341  *
3342  * If a subsystem sets up its own instance, they have the right to
3343  * printk strings into their tracing instance buffer using this
3344  * function. Note, this function will not write into the top level
3345  * buffer (use trace_printk() for that), as writing into the top level
3346  * buffer should only have events that can be individually disabled.
3347  * trace_printk() is only used for debugging a kernel, and should not
3348  * be ever encorporated in normal use.
3349  *
3350  * trace_array_printk() can be used, as it will not add noise to the
3351  * top level tracing buffer.
3352  *
3353  * Note, trace_array_init_printk() must be called on @tr before this
3354  * can be used.
3355  */
3356 __printf(3, 0)
3357 int trace_array_printk(struct trace_array *tr,
3358                        unsigned long ip, const char *fmt, ...)
3359 {
3360         int ret;
3361         va_list ap;
3362
3363         if (!tr)
3364                 return -ENOENT;
3365
3366         /* This is only allowed for created instances */
3367         if (tr == &global_trace)
3368                 return 0;
3369
3370         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3371                 return 0;
3372
3373         va_start(ap, fmt);
3374         ret = trace_array_vprintk(tr, ip, fmt, ap);
3375         va_end(ap);
3376         return ret;
3377 }
3378 EXPORT_SYMBOL_GPL(trace_array_printk);
3379
3380 /**
3381  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3382  * @tr: The trace array to initialize the buffers for
3383  *
3384  * As trace_array_printk() only writes into instances, they are OK to
3385  * have in the kernel (unlike trace_printk()). This needs to be called
3386  * before trace_array_printk() can be used on a trace_array.
3387  */
3388 int trace_array_init_printk(struct trace_array *tr)
3389 {
3390         if (!tr)
3391                 return -ENOENT;
3392
3393         /* This is only allowed for created instances */
3394         if (tr == &global_trace)
3395                 return -EINVAL;
3396
3397         return alloc_percpu_trace_buffer();
3398 }
3399 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3400
3401 __printf(3, 4)
3402 int trace_array_printk_buf(struct trace_buffer *buffer,
3403                            unsigned long ip, const char *fmt, ...)
3404 {
3405         int ret;
3406         va_list ap;
3407
3408         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3409                 return 0;
3410
3411         va_start(ap, fmt);
3412         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3413         va_end(ap);
3414         return ret;
3415 }
3416
3417 __printf(2, 0)
3418 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3419 {
3420         return trace_array_vprintk(&global_trace, ip, fmt, args);
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vprintk);
3423
3424 static void trace_iterator_increment(struct trace_iterator *iter)
3425 {
3426         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3427
3428         iter->idx++;
3429         if (buf_iter)
3430                 ring_buffer_iter_advance(buf_iter);
3431 }
3432
3433 static struct trace_entry *
3434 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3435                 unsigned long *lost_events)
3436 {
3437         struct ring_buffer_event *event;
3438         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3439
3440         if (buf_iter) {
3441                 event = ring_buffer_iter_peek(buf_iter, ts);
3442                 if (lost_events)
3443                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3444                                 (unsigned long)-1 : 0;
3445         } else {
3446                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3447                                          lost_events);
3448         }
3449
3450         if (event) {
3451                 iter->ent_size = ring_buffer_event_length(event);
3452                 return ring_buffer_event_data(event);
3453         }
3454         iter->ent_size = 0;
3455         return NULL;
3456 }
3457
3458 static struct trace_entry *
3459 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3460                   unsigned long *missing_events, u64 *ent_ts)
3461 {
3462         struct trace_buffer *buffer = iter->array_buffer->buffer;
3463         struct trace_entry *ent, *next = NULL;
3464         unsigned long lost_events = 0, next_lost = 0;
3465         int cpu_file = iter->cpu_file;
3466         u64 next_ts = 0, ts;
3467         int next_cpu = -1;
3468         int next_size = 0;
3469         int cpu;
3470
3471         /*
3472          * If we are in a per_cpu trace file, don't bother by iterating over
3473          * all cpu and peek directly.
3474          */
3475         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3476                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3477                         return NULL;
3478                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3479                 if (ent_cpu)
3480                         *ent_cpu = cpu_file;
3481
3482                 return ent;
3483         }
3484
3485         for_each_tracing_cpu(cpu) {
3486
3487                 if (ring_buffer_empty_cpu(buffer, cpu))
3488                         continue;
3489
3490                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3491
3492                 /*
3493                  * Pick the entry with the smallest timestamp:
3494                  */
3495                 if (ent && (!next || ts < next_ts)) {
3496                         next = ent;
3497                         next_cpu = cpu;
3498                         next_ts = ts;
3499                         next_lost = lost_events;
3500                         next_size = iter->ent_size;
3501                 }
3502         }
3503
3504         iter->ent_size = next_size;
3505
3506         if (ent_cpu)
3507                 *ent_cpu = next_cpu;
3508
3509         if (ent_ts)
3510                 *ent_ts = next_ts;
3511
3512         if (missing_events)
3513                 *missing_events = next_lost;
3514
3515         return next;
3516 }
3517
3518 #define STATIC_TEMP_BUF_SIZE    128
3519 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3520
3521 /* Find the next real entry, without updating the iterator itself */
3522 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3523                                           int *ent_cpu, u64 *ent_ts)
3524 {
3525         /* __find_next_entry will reset ent_size */
3526         int ent_size = iter->ent_size;
3527         struct trace_entry *entry;
3528
3529         /*
3530          * If called from ftrace_dump(), then the iter->temp buffer
3531          * will be the static_temp_buf and not created from kmalloc.
3532          * If the entry size is greater than the buffer, we can
3533          * not save it. Just return NULL in that case. This is only
3534          * used to add markers when two consecutive events' time
3535          * stamps have a large delta. See trace_print_lat_context()
3536          */
3537         if (iter->temp == static_temp_buf &&
3538             STATIC_TEMP_BUF_SIZE < ent_size)
3539                 return NULL;
3540
3541         /*
3542          * The __find_next_entry() may call peek_next_entry(), which may
3543          * call ring_buffer_peek() that may make the contents of iter->ent
3544          * undefined. Need to copy iter->ent now.
3545          */
3546         if (iter->ent && iter->ent != iter->temp) {
3547                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3548                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3549                         kfree(iter->temp);
3550                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3551                         if (!iter->temp)
3552                                 return NULL;
3553                 }
3554                 memcpy(iter->temp, iter->ent, iter->ent_size);
3555                 iter->temp_size = iter->ent_size;
3556                 iter->ent = iter->temp;
3557         }
3558         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3559         /* Put back the original ent_size */
3560         iter->ent_size = ent_size;
3561
3562         return entry;
3563 }
3564
3565 /* Find the next real entry, and increment the iterator to the next entry */
3566 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3567 {
3568         iter->ent = __find_next_entry(iter, &iter->cpu,
3569                                       &iter->lost_events, &iter->ts);
3570
3571         if (iter->ent)
3572                 trace_iterator_increment(iter);
3573
3574         return iter->ent ? iter : NULL;
3575 }
3576
3577 static void trace_consume(struct trace_iterator *iter)
3578 {
3579         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3580                             &iter->lost_events);
3581 }
3582
3583 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3584 {
3585         struct trace_iterator *iter = m->private;
3586         int i = (int)*pos;
3587         void *ent;
3588
3589         WARN_ON_ONCE(iter->leftover);
3590
3591         (*pos)++;
3592
3593         /* can't go backwards */
3594         if (iter->idx > i)
3595                 return NULL;
3596
3597         if (iter->idx < 0)
3598                 ent = trace_find_next_entry_inc(iter);
3599         else
3600                 ent = iter;
3601
3602         while (ent && iter->idx < i)
3603                 ent = trace_find_next_entry_inc(iter);
3604
3605         iter->pos = *pos;
3606
3607         return ent;
3608 }
3609
3610 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3611 {
3612         struct ring_buffer_iter *buf_iter;
3613         unsigned long entries = 0;
3614         u64 ts;
3615
3616         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3617
3618         buf_iter = trace_buffer_iter(iter, cpu);
3619         if (!buf_iter)
3620                 return;
3621
3622         ring_buffer_iter_reset(buf_iter);
3623
3624         /*
3625          * We could have the case with the max latency tracers
3626          * that a reset never took place on a cpu. This is evident
3627          * by the timestamp being before the start of the buffer.
3628          */
3629         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3630                 if (ts >= iter->array_buffer->time_start)
3631                         break;
3632                 entries++;
3633                 ring_buffer_iter_advance(buf_iter);
3634         }
3635
3636         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3637 }
3638
3639 /*
3640  * The current tracer is copied to avoid a global locking
3641  * all around.
3642  */
3643 static void *s_start(struct seq_file *m, loff_t *pos)
3644 {
3645         struct trace_iterator *iter = m->private;
3646         struct trace_array *tr = iter->tr;
3647         int cpu_file = iter->cpu_file;
3648         void *p = NULL;
3649         loff_t l = 0;
3650         int cpu;
3651
3652         /*
3653          * copy the tracer to avoid using a global lock all around.
3654          * iter->trace is a copy of current_trace, the pointer to the
3655          * name may be used instead of a strcmp(), as iter->trace->name
3656          * will point to the same string as current_trace->name.
3657          */
3658         mutex_lock(&trace_types_lock);
3659         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3660                 *iter->trace = *tr->current_trace;
3661         mutex_unlock(&trace_types_lock);
3662
3663 #ifdef CONFIG_TRACER_MAX_TRACE
3664         if (iter->snapshot && iter->trace->use_max_tr)
3665                 return ERR_PTR(-EBUSY);
3666 #endif
3667
3668         if (!iter->snapshot)
3669                 atomic_inc(&trace_record_taskinfo_disabled);
3670
3671         if (*pos != iter->pos) {
3672                 iter->ent = NULL;
3673                 iter->cpu = 0;
3674                 iter->idx = -1;
3675
3676                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3677                         for_each_tracing_cpu(cpu)
3678                                 tracing_iter_reset(iter, cpu);
3679                 } else
3680                         tracing_iter_reset(iter, cpu_file);
3681
3682                 iter->leftover = 0;
3683                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3684                         ;
3685
3686         } else {
3687                 /*
3688                  * If we overflowed the seq_file before, then we want
3689                  * to just reuse the trace_seq buffer again.
3690                  */
3691                 if (iter->leftover)
3692                         p = iter;
3693                 else {
3694                         l = *pos - 1;
3695                         p = s_next(m, p, &l);
3696                 }
3697         }
3698
3699         trace_event_read_lock();
3700         trace_access_lock(cpu_file);
3701         return p;
3702 }
3703
3704 static void s_stop(struct seq_file *m, void *p)
3705 {
3706         struct trace_iterator *iter = m->private;
3707
3708 #ifdef CONFIG_TRACER_MAX_TRACE
3709         if (iter->snapshot && iter->trace->use_max_tr)
3710                 return;
3711 #endif
3712
3713         if (!iter->snapshot)
3714                 atomic_dec(&trace_record_taskinfo_disabled);
3715
3716         trace_access_unlock(iter->cpu_file);
3717         trace_event_read_unlock();
3718 }
3719
3720 static void
3721 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3722                       unsigned long *entries, int cpu)
3723 {
3724         unsigned long count;
3725
3726         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3727         /*
3728          * If this buffer has skipped entries, then we hold all
3729          * entries for the trace and we need to ignore the
3730          * ones before the time stamp.
3731          */
3732         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3733                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3734                 /* total is the same as the entries */
3735                 *total = count;
3736         } else
3737                 *total = count +
3738                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3739         *entries = count;
3740 }
3741
3742 static void
3743 get_total_entries(struct array_buffer *buf,
3744                   unsigned long *total, unsigned long *entries)
3745 {
3746         unsigned long t, e;
3747         int cpu;
3748
3749         *total = 0;
3750         *entries = 0;
3751
3752         for_each_tracing_cpu(cpu) {
3753                 get_total_entries_cpu(buf, &t, &e, cpu);
3754                 *total += t;
3755                 *entries += e;
3756         }
3757 }
3758
3759 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3760 {
3761         unsigned long total, entries;
3762
3763         if (!tr)
3764                 tr = &global_trace;
3765
3766         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3767
3768         return entries;
3769 }
3770
3771 unsigned long trace_total_entries(struct trace_array *tr)
3772 {
3773         unsigned long total, entries;
3774
3775         if (!tr)
3776                 tr = &global_trace;
3777
3778         get_total_entries(&tr->array_buffer, &total, &entries);
3779
3780         return entries;
3781 }
3782
3783 static void print_lat_help_header(struct seq_file *m)
3784 {
3785         seq_puts(m, "#                  _------=> CPU#            \n"
3786                     "#                 / _-----=> irqs-off        \n"
3787                     "#                | / _----=> need-resched    \n"
3788                     "#                || / _---=> hardirq/softirq \n"
3789                     "#                ||| / _--=> preempt-depth   \n"
3790                     "#                |||| /     delay            \n"
3791                     "#  cmd     pid   ||||| time  |   caller      \n"
3792                     "#     \\   /      |||||  \\    |   /         \n");
3793 }
3794
3795 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3796 {
3797         unsigned long total;
3798         unsigned long entries;
3799
3800         get_total_entries(buf, &total, &entries);
3801         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3802                    entries, total, num_online_cpus());
3803         seq_puts(m, "#\n");
3804 }
3805
3806 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3807                                    unsigned int flags)
3808 {
3809         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3810
3811         print_event_info(buf, m);
3812
3813         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3814         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3815 }
3816
3817 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3818                                        unsigned int flags)
3819 {
3820         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3821         const char *space = "          ";
3822         int prec = tgid ? 10 : 2;
3823
3824         print_event_info(buf, m);
3825
3826         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3827         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3828         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3829         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3830         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3831         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3832         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3833 }
3834
3835 void
3836 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3837 {
3838         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3839         struct array_buffer *buf = iter->array_buffer;
3840         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3841         struct tracer *type = iter->trace;
3842         unsigned long entries;
3843         unsigned long total;
3844         const char *name = "preemption";
3845
3846         name = type->name;
3847
3848         get_total_entries(buf, &total, &entries);
3849
3850         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3851                    name, UTS_RELEASE);
3852         seq_puts(m, "# -----------------------------------"
3853                  "---------------------------------\n");
3854         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3855                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3856                    nsecs_to_usecs(data->saved_latency),
3857                    entries,
3858                    total,
3859                    buf->cpu,
3860 #if defined(CONFIG_PREEMPT_NONE)
3861                    "server",
3862 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3863                    "desktop",
3864 #elif defined(CONFIG_PREEMPT)
3865                    "preempt",
3866 #elif defined(CONFIG_PREEMPT_RT)
3867                    "preempt_rt",
3868 #else
3869                    "unknown",
3870 #endif
3871                    /* These are reserved for later use */
3872                    0, 0, 0, 0);
3873 #ifdef CONFIG_SMP
3874         seq_printf(m, " #P:%d)\n", num_online_cpus());
3875 #else
3876         seq_puts(m, ")\n");
3877 #endif
3878         seq_puts(m, "#    -----------------\n");
3879         seq_printf(m, "#    | task: %.16s-%d "
3880                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3881                    data->comm, data->pid,
3882                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3883                    data->policy, data->rt_priority);
3884         seq_puts(m, "#    -----------------\n");
3885
3886         if (data->critical_start) {
3887                 seq_puts(m, "#  => started at: ");
3888                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3889                 trace_print_seq(m, &iter->seq);
3890                 seq_puts(m, "\n#  => ended at:   ");
3891                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3892                 trace_print_seq(m, &iter->seq);
3893                 seq_puts(m, "\n#\n");
3894         }
3895
3896         seq_puts(m, "#\n");
3897 }
3898
3899 static void test_cpu_buff_start(struct trace_iterator *iter)
3900 {
3901         struct trace_seq *s = &iter->seq;
3902         struct trace_array *tr = iter->tr;
3903
3904         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3905                 return;
3906
3907         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3908                 return;
3909
3910         if (cpumask_available(iter->started) &&
3911             cpumask_test_cpu(iter->cpu, iter->started))
3912                 return;
3913
3914         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3915                 return;
3916
3917         if (cpumask_available(iter->started))
3918                 cpumask_set_cpu(iter->cpu, iter->started);
3919
3920         /* Don't print started cpu buffer for the first entry of the trace */
3921         if (iter->idx > 1)
3922                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3923                                 iter->cpu);
3924 }
3925
3926 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3927 {
3928         struct trace_array *tr = iter->tr;
3929         struct trace_seq *s = &iter->seq;
3930         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3931         struct trace_entry *entry;
3932         struct trace_event *event;
3933
3934         entry = iter->ent;
3935
3936         test_cpu_buff_start(iter);
3937
3938         event = ftrace_find_event(entry->type);
3939
3940         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3941                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3942                         trace_print_lat_context(iter);
3943                 else
3944                         trace_print_context(iter);
3945         }
3946
3947         if (trace_seq_has_overflowed(s))
3948                 return TRACE_TYPE_PARTIAL_LINE;
3949
3950         if (event)
3951                 return event->funcs->trace(iter, sym_flags, event);
3952
3953         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3954
3955         return trace_handle_return(s);
3956 }
3957
3958 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3959 {
3960         struct trace_array *tr = iter->tr;
3961         struct trace_seq *s = &iter->seq;
3962         struct trace_entry *entry;
3963         struct trace_event *event;
3964
3965         entry = iter->ent;
3966
3967         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3968                 trace_seq_printf(s, "%d %d %llu ",
3969                                  entry->pid, iter->cpu, iter->ts);
3970
3971         if (trace_seq_has_overflowed(s))
3972                 return TRACE_TYPE_PARTIAL_LINE;
3973
3974         event = ftrace_find_event(entry->type);
3975         if (event)
3976                 return event->funcs->raw(iter, 0, event);
3977
3978         trace_seq_printf(s, "%d ?\n", entry->type);
3979
3980         return trace_handle_return(s);
3981 }
3982
3983 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3984 {
3985         struct trace_array *tr = iter->tr;
3986         struct trace_seq *s = &iter->seq;
3987         unsigned char newline = '\n';
3988         struct trace_entry *entry;
3989         struct trace_event *event;
3990
3991         entry = iter->ent;
3992
3993         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3994                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3995                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3996                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3997                 if (trace_seq_has_overflowed(s))
3998                         return TRACE_TYPE_PARTIAL_LINE;
3999         }
4000
4001         event = ftrace_find_event(entry->type);
4002         if (event) {
4003                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4004                 if (ret != TRACE_TYPE_HANDLED)
4005                         return ret;
4006         }
4007
4008         SEQ_PUT_FIELD(s, newline);
4009
4010         return trace_handle_return(s);
4011 }
4012
4013 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4014 {
4015         struct trace_array *tr = iter->tr;
4016         struct trace_seq *s = &iter->seq;
4017         struct trace_entry *entry;
4018         struct trace_event *event;
4019
4020         entry = iter->ent;
4021
4022         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4023                 SEQ_PUT_FIELD(s, entry->pid);
4024                 SEQ_PUT_FIELD(s, iter->cpu);
4025                 SEQ_PUT_FIELD(s, iter->ts);
4026                 if (trace_seq_has_overflowed(s))
4027                         return TRACE_TYPE_PARTIAL_LINE;
4028         }
4029
4030         event = ftrace_find_event(entry->type);
4031         return event ? event->funcs->binary(iter, 0, event) :
4032                 TRACE_TYPE_HANDLED;
4033 }
4034
4035 int trace_empty(struct trace_iterator *iter)
4036 {
4037         struct ring_buffer_iter *buf_iter;
4038         int cpu;
4039
4040         /* If we are looking at one CPU buffer, only check that one */
4041         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4042                 cpu = iter->cpu_file;
4043                 buf_iter = trace_buffer_iter(iter, cpu);
4044                 if (buf_iter) {
4045                         if (!ring_buffer_iter_empty(buf_iter))
4046                                 return 0;
4047                 } else {
4048                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4049                                 return 0;
4050                 }
4051                 return 1;
4052         }
4053
4054         for_each_tracing_cpu(cpu) {
4055                 buf_iter = trace_buffer_iter(iter, cpu);
4056                 if (buf_iter) {
4057                         if (!ring_buffer_iter_empty(buf_iter))
4058                                 return 0;
4059                 } else {
4060                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4061                                 return 0;
4062                 }
4063         }
4064
4065         return 1;
4066 }
4067
4068 /*  Called with trace_event_read_lock() held. */
4069 enum print_line_t print_trace_line(struct trace_iterator *iter)
4070 {
4071         struct trace_array *tr = iter->tr;
4072         unsigned long trace_flags = tr->trace_flags;
4073         enum print_line_t ret;
4074
4075         if (iter->lost_events) {
4076                 if (iter->lost_events == (unsigned long)-1)
4077                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4078                                          iter->cpu);
4079                 else
4080                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4081                                          iter->cpu, iter->lost_events);
4082                 if (trace_seq_has_overflowed(&iter->seq))
4083                         return TRACE_TYPE_PARTIAL_LINE;
4084         }
4085
4086         if (iter->trace && iter->trace->print_line) {
4087                 ret = iter->trace->print_line(iter);
4088                 if (ret != TRACE_TYPE_UNHANDLED)
4089                         return ret;
4090         }
4091
4092         if (iter->ent->type == TRACE_BPUTS &&
4093                         trace_flags & TRACE_ITER_PRINTK &&
4094                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4095                 return trace_print_bputs_msg_only(iter);
4096
4097         if (iter->ent->type == TRACE_BPRINT &&
4098                         trace_flags & TRACE_ITER_PRINTK &&
4099                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4100                 return trace_print_bprintk_msg_only(iter);
4101
4102         if (iter->ent->type == TRACE_PRINT &&
4103                         trace_flags & TRACE_ITER_PRINTK &&
4104                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4105                 return trace_print_printk_msg_only(iter);
4106
4107         if (trace_flags & TRACE_ITER_BIN)
4108                 return print_bin_fmt(iter);
4109
4110         if (trace_flags & TRACE_ITER_HEX)
4111                 return print_hex_fmt(iter);
4112
4113         if (trace_flags & TRACE_ITER_RAW)
4114                 return print_raw_fmt(iter);
4115
4116         return print_trace_fmt(iter);
4117 }
4118
4119 void trace_latency_header(struct seq_file *m)
4120 {
4121         struct trace_iterator *iter = m->private;
4122         struct trace_array *tr = iter->tr;
4123
4124         /* print nothing if the buffers are empty */
4125         if (trace_empty(iter))
4126                 return;
4127
4128         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4129                 print_trace_header(m, iter);
4130
4131         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4132                 print_lat_help_header(m);
4133 }
4134
4135 void trace_default_header(struct seq_file *m)
4136 {
4137         struct trace_iterator *iter = m->private;
4138         struct trace_array *tr = iter->tr;
4139         unsigned long trace_flags = tr->trace_flags;
4140
4141         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4142                 return;
4143
4144         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4145                 /* print nothing if the buffers are empty */
4146                 if (trace_empty(iter))
4147                         return;
4148                 print_trace_header(m, iter);
4149                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4150                         print_lat_help_header(m);
4151         } else {
4152                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4153                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4154                                 print_func_help_header_irq(iter->array_buffer,
4155                                                            m, trace_flags);
4156                         else
4157                                 print_func_help_header(iter->array_buffer, m,
4158                                                        trace_flags);
4159                 }
4160         }
4161 }
4162
4163 static void test_ftrace_alive(struct seq_file *m)
4164 {
4165         if (!ftrace_is_dead())
4166                 return;
4167         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4168                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4169 }
4170
4171 #ifdef CONFIG_TRACER_MAX_TRACE
4172 static void show_snapshot_main_help(struct seq_file *m)
4173 {
4174         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4175                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4176                     "#                      Takes a snapshot of the main buffer.\n"
4177                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4178                     "#                      (Doesn't have to be '2' works with any number that\n"
4179                     "#                       is not a '0' or '1')\n");
4180 }
4181
4182 static void show_snapshot_percpu_help(struct seq_file *m)
4183 {
4184         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4185 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4186         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4187                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4188 #else
4189         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4190                     "#                     Must use main snapshot file to allocate.\n");
4191 #endif
4192         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4193                     "#                      (Doesn't have to be '2' works with any number that\n"
4194                     "#                       is not a '0' or '1')\n");
4195 }
4196
4197 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4198 {
4199         if (iter->tr->allocated_snapshot)
4200                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4201         else
4202                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4203
4204         seq_puts(m, "# Snapshot commands:\n");
4205         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4206                 show_snapshot_main_help(m);
4207         else
4208                 show_snapshot_percpu_help(m);
4209 }
4210 #else
4211 /* Should never be called */
4212 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4213 #endif
4214
4215 static int s_show(struct seq_file *m, void *v)
4216 {
4217         struct trace_iterator *iter = v;
4218         int ret;
4219
4220         if (iter->ent == NULL) {
4221                 if (iter->tr) {
4222                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4223                         seq_puts(m, "#\n");
4224                         test_ftrace_alive(m);
4225                 }
4226                 if (iter->snapshot && trace_empty(iter))
4227                         print_snapshot_help(m, iter);
4228                 else if (iter->trace && iter->trace->print_header)
4229                         iter->trace->print_header(m);
4230                 else
4231                         trace_default_header(m);
4232
4233         } else if (iter->leftover) {
4234                 /*
4235                  * If we filled the seq_file buffer earlier, we
4236                  * want to just show it now.
4237                  */
4238                 ret = trace_print_seq(m, &iter->seq);
4239
4240                 /* ret should this time be zero, but you never know */
4241                 iter->leftover = ret;
4242
4243         } else {
4244                 print_trace_line(iter);
4245                 ret = trace_print_seq(m, &iter->seq);
4246                 /*
4247                  * If we overflow the seq_file buffer, then it will
4248                  * ask us for this data again at start up.
4249                  * Use that instead.
4250                  *  ret is 0 if seq_file write succeeded.
4251                  *        -1 otherwise.
4252                  */
4253                 iter->leftover = ret;
4254         }
4255
4256         return 0;
4257 }
4258
4259 /*
4260  * Should be used after trace_array_get(), trace_types_lock
4261  * ensures that i_cdev was already initialized.
4262  */
4263 static inline int tracing_get_cpu(struct inode *inode)
4264 {
4265         if (inode->i_cdev) /* See trace_create_cpu_file() */
4266                 return (long)inode->i_cdev - 1;
4267         return RING_BUFFER_ALL_CPUS;
4268 }
4269
4270 static const struct seq_operations tracer_seq_ops = {
4271         .start          = s_start,
4272         .next           = s_next,
4273         .stop           = s_stop,
4274         .show           = s_show,
4275 };
4276
4277 static struct trace_iterator *
4278 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4279 {
4280         struct trace_array *tr = inode->i_private;
4281         struct trace_iterator *iter;
4282         int cpu;
4283
4284         if (tracing_disabled)
4285                 return ERR_PTR(-ENODEV);
4286
4287         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4288         if (!iter)
4289                 return ERR_PTR(-ENOMEM);
4290
4291         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4292                                     GFP_KERNEL);
4293         if (!iter->buffer_iter)
4294                 goto release;
4295
4296         /*
4297          * trace_find_next_entry() may need to save off iter->ent.
4298          * It will place it into the iter->temp buffer. As most
4299          * events are less than 128, allocate a buffer of that size.
4300          * If one is greater, then trace_find_next_entry() will
4301          * allocate a new buffer to adjust for the bigger iter->ent.
4302          * It's not critical if it fails to get allocated here.
4303          */
4304         iter->temp = kmalloc(128, GFP_KERNEL);
4305         if (iter->temp)
4306                 iter->temp_size = 128;
4307
4308         /*
4309          * We make a copy of the current tracer to avoid concurrent
4310          * changes on it while we are reading.
4311          */
4312         mutex_lock(&trace_types_lock);
4313         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4314         if (!iter->trace)
4315                 goto fail;
4316
4317         *iter->trace = *tr->current_trace;
4318
4319         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4320                 goto fail;
4321
4322         iter->tr = tr;
4323
4324 #ifdef CONFIG_TRACER_MAX_TRACE
4325         /* Currently only the top directory has a snapshot */
4326         if (tr->current_trace->print_max || snapshot)
4327                 iter->array_buffer = &tr->max_buffer;
4328         else
4329 #endif
4330                 iter->array_buffer = &tr->array_buffer;
4331         iter->snapshot = snapshot;
4332         iter->pos = -1;
4333         iter->cpu_file = tracing_get_cpu(inode);
4334         mutex_init(&iter->mutex);
4335
4336         /* Notify the tracer early; before we stop tracing. */
4337         if (iter->trace->open)
4338                 iter->trace->open(iter);
4339
4340         /* Annotate start of buffers if we had overruns */
4341         if (ring_buffer_overruns(iter->array_buffer->buffer))
4342                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4343
4344         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4345         if (trace_clocks[tr->clock_id].in_ns)
4346                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4347
4348         /*
4349          * If pause-on-trace is enabled, then stop the trace while
4350          * dumping, unless this is the "snapshot" file
4351          */
4352         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4353                 tracing_stop_tr(tr);
4354
4355         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4356                 for_each_tracing_cpu(cpu) {
4357                         iter->buffer_iter[cpu] =
4358                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4359                                                          cpu, GFP_KERNEL);
4360                 }
4361                 ring_buffer_read_prepare_sync();
4362                 for_each_tracing_cpu(cpu) {
4363                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4364                         tracing_iter_reset(iter, cpu);
4365                 }
4366         } else {
4367                 cpu = iter->cpu_file;
4368                 iter->buffer_iter[cpu] =
4369                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4370                                                  cpu, GFP_KERNEL);
4371                 ring_buffer_read_prepare_sync();
4372                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4373                 tracing_iter_reset(iter, cpu);
4374         }
4375
4376         mutex_unlock(&trace_types_lock);
4377
4378         return iter;
4379
4380  fail:
4381         mutex_unlock(&trace_types_lock);
4382         kfree(iter->trace);
4383         kfree(iter->temp);
4384         kfree(iter->buffer_iter);
4385 release:
4386         seq_release_private(inode, file);
4387         return ERR_PTR(-ENOMEM);
4388 }
4389
4390 int tracing_open_generic(struct inode *inode, struct file *filp)
4391 {
4392         int ret;
4393
4394         ret = tracing_check_open_get_tr(NULL);
4395         if (ret)
4396                 return ret;
4397
4398         filp->private_data = inode->i_private;
4399         return 0;
4400 }
4401
4402 bool tracing_is_disabled(void)
4403 {
4404         return (tracing_disabled) ? true: false;
4405 }
4406
4407 /*
4408  * Open and update trace_array ref count.
4409  * Must have the current trace_array passed to it.
4410  */
4411 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4412 {
4413         struct trace_array *tr = inode->i_private;
4414         int ret;
4415
4416         ret = tracing_check_open_get_tr(tr);
4417         if (ret)
4418                 return ret;
4419
4420         filp->private_data = inode->i_private;
4421
4422         return 0;
4423 }
4424
4425 static int tracing_release(struct inode *inode, struct file *file)
4426 {
4427         struct trace_array *tr = inode->i_private;
4428         struct seq_file *m = file->private_data;
4429         struct trace_iterator *iter;
4430         int cpu;
4431
4432         if (!(file->f_mode & FMODE_READ)) {
4433                 trace_array_put(tr);
4434                 return 0;
4435         }
4436
4437         /* Writes do not use seq_file */
4438         iter = m->private;
4439         mutex_lock(&trace_types_lock);
4440
4441         for_each_tracing_cpu(cpu) {
4442                 if (iter->buffer_iter[cpu])
4443                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4444         }
4445
4446         if (iter->trace && iter->trace->close)
4447                 iter->trace->close(iter);
4448
4449         if (!iter->snapshot && tr->stop_count)
4450                 /* reenable tracing if it was previously enabled */
4451                 tracing_start_tr(tr);
4452
4453         __trace_array_put(tr);
4454
4455         mutex_unlock(&trace_types_lock);
4456
4457         mutex_destroy(&iter->mutex);
4458         free_cpumask_var(iter->started);
4459         kfree(iter->temp);
4460         kfree(iter->trace);
4461         kfree(iter->buffer_iter);
4462         seq_release_private(inode, file);
4463
4464         return 0;
4465 }
4466
4467 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4468 {
4469         struct trace_array *tr = inode->i_private;
4470
4471         trace_array_put(tr);
4472         return 0;
4473 }
4474
4475 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4476 {
4477         struct trace_array *tr = inode->i_private;
4478
4479         trace_array_put(tr);
4480
4481         return single_release(inode, file);
4482 }
4483
4484 static int tracing_open(struct inode *inode, struct file *file)
4485 {
4486         struct trace_array *tr = inode->i_private;
4487         struct trace_iterator *iter;
4488         int ret;
4489
4490         ret = tracing_check_open_get_tr(tr);
4491         if (ret)
4492                 return ret;
4493
4494         /* If this file was open for write, then erase contents */
4495         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4496                 int cpu = tracing_get_cpu(inode);
4497                 struct array_buffer *trace_buf = &tr->array_buffer;
4498
4499 #ifdef CONFIG_TRACER_MAX_TRACE
4500                 if (tr->current_trace->print_max)
4501                         trace_buf = &tr->max_buffer;
4502 #endif
4503
4504                 if (cpu == RING_BUFFER_ALL_CPUS)
4505                         tracing_reset_online_cpus(trace_buf);
4506                 else
4507                         tracing_reset_cpu(trace_buf, cpu);
4508         }
4509
4510         if (file->f_mode & FMODE_READ) {
4511                 iter = __tracing_open(inode, file, false);
4512                 if (IS_ERR(iter))
4513                         ret = PTR_ERR(iter);
4514                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4515                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4516         }
4517
4518         if (ret < 0)
4519                 trace_array_put(tr);
4520
4521         return ret;
4522 }
4523
4524 /*
4525  * Some tracers are not suitable for instance buffers.
4526  * A tracer is always available for the global array (toplevel)
4527  * or if it explicitly states that it is.
4528  */
4529 static bool
4530 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4531 {
4532         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4533 }
4534
4535 /* Find the next tracer that this trace array may use */
4536 static struct tracer *
4537 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4538 {
4539         while (t && !trace_ok_for_array(t, tr))
4540                 t = t->next;
4541
4542         return t;
4543 }
4544
4545 static void *
4546 t_next(struct seq_file *m, void *v, loff_t *pos)
4547 {
4548         struct trace_array *tr = m->private;
4549         struct tracer *t = v;
4550
4551         (*pos)++;
4552
4553         if (t)
4554                 t = get_tracer_for_array(tr, t->next);
4555
4556         return t;
4557 }
4558
4559 static void *t_start(struct seq_file *m, loff_t *pos)
4560 {
4561         struct trace_array *tr = m->private;
4562         struct tracer *t;
4563         loff_t l = 0;
4564
4565         mutex_lock(&trace_types_lock);
4566
4567         t = get_tracer_for_array(tr, trace_types);
4568         for (; t && l < *pos; t = t_next(m, t, &l))
4569                         ;
4570
4571         return t;
4572 }
4573
4574 static void t_stop(struct seq_file *m, void *p)
4575 {
4576         mutex_unlock(&trace_types_lock);
4577 }
4578
4579 static int t_show(struct seq_file *m, void *v)
4580 {
4581         struct tracer *t = v;
4582
4583         if (!t)
4584                 return 0;
4585
4586         seq_puts(m, t->name);
4587         if (t->next)
4588                 seq_putc(m, ' ');
4589         else
4590                 seq_putc(m, '\n');
4591
4592         return 0;
4593 }
4594
4595 static const struct seq_operations show_traces_seq_ops = {
4596         .start          = t_start,
4597         .next           = t_next,
4598         .stop           = t_stop,
4599         .show           = t_show,
4600 };
4601
4602 static int show_traces_open(struct inode *inode, struct file *file)
4603 {
4604         struct trace_array *tr = inode->i_private;
4605         struct seq_file *m;
4606         int ret;
4607
4608         ret = tracing_check_open_get_tr(tr);
4609         if (ret)
4610                 return ret;
4611
4612         ret = seq_open(file, &show_traces_seq_ops);
4613         if (ret) {
4614                 trace_array_put(tr);
4615                 return ret;
4616         }
4617
4618         m = file->private_data;
4619         m->private = tr;
4620
4621         return 0;
4622 }
4623
4624 static int show_traces_release(struct inode *inode, struct file *file)
4625 {
4626         struct trace_array *tr = inode->i_private;
4627
4628         trace_array_put(tr);
4629         return seq_release(inode, file);
4630 }
4631
4632 static ssize_t
4633 tracing_write_stub(struct file *filp, const char __user *ubuf,
4634                    size_t count, loff_t *ppos)
4635 {
4636         return count;
4637 }
4638
4639 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4640 {
4641         int ret;
4642
4643         if (file->f_mode & FMODE_READ)
4644                 ret = seq_lseek(file, offset, whence);
4645         else
4646                 file->f_pos = ret = 0;
4647
4648         return ret;
4649 }
4650
4651 static const struct file_operations tracing_fops = {
4652         .open           = tracing_open,
4653         .read           = seq_read,
4654         .write          = tracing_write_stub,
4655         .llseek         = tracing_lseek,
4656         .release        = tracing_release,
4657 };
4658
4659 static const struct file_operations show_traces_fops = {
4660         .open           = show_traces_open,
4661         .read           = seq_read,
4662         .llseek         = seq_lseek,
4663         .release        = show_traces_release,
4664 };
4665
4666 static ssize_t
4667 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4668                      size_t count, loff_t *ppos)
4669 {
4670         struct trace_array *tr = file_inode(filp)->i_private;
4671         char *mask_str;
4672         int len;
4673
4674         len = snprintf(NULL, 0, "%*pb\n",
4675                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4676         mask_str = kmalloc(len, GFP_KERNEL);
4677         if (!mask_str)
4678                 return -ENOMEM;
4679
4680         len = snprintf(mask_str, len, "%*pb\n",
4681                        cpumask_pr_args(tr->tracing_cpumask));
4682         if (len >= count) {
4683                 count = -EINVAL;
4684                 goto out_err;
4685         }
4686         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4687
4688 out_err:
4689         kfree(mask_str);
4690
4691         return count;
4692 }
4693
4694 int tracing_set_cpumask(struct trace_array *tr,
4695                         cpumask_var_t tracing_cpumask_new)
4696 {
4697         int cpu;
4698
4699         if (!tr)
4700                 return -EINVAL;
4701
4702         local_irq_disable();
4703         arch_spin_lock(&tr->max_lock);
4704         for_each_tracing_cpu(cpu) {
4705                 /*
4706                  * Increase/decrease the disabled counter if we are
4707                  * about to flip a bit in the cpumask:
4708                  */
4709                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4710                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4711                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4712                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4713                 }
4714                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4715                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4716                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4717                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4718                 }
4719         }
4720         arch_spin_unlock(&tr->max_lock);
4721         local_irq_enable();
4722
4723         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4724
4725         return 0;
4726 }
4727
4728 static ssize_t
4729 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4730                       size_t count, loff_t *ppos)
4731 {
4732         struct trace_array *tr = file_inode(filp)->i_private;
4733         cpumask_var_t tracing_cpumask_new;
4734         int err;
4735
4736         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4737                 return -ENOMEM;
4738
4739         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4740         if (err)
4741                 goto err_free;
4742
4743         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4744         if (err)
4745                 goto err_free;
4746
4747         free_cpumask_var(tracing_cpumask_new);
4748
4749         return count;
4750
4751 err_free:
4752         free_cpumask_var(tracing_cpumask_new);
4753
4754         return err;
4755 }
4756
4757 static const struct file_operations tracing_cpumask_fops = {
4758         .open           = tracing_open_generic_tr,
4759         .read           = tracing_cpumask_read,
4760         .write          = tracing_cpumask_write,
4761         .release        = tracing_release_generic_tr,
4762         .llseek         = generic_file_llseek,
4763 };
4764
4765 static int tracing_trace_options_show(struct seq_file *m, void *v)
4766 {
4767         struct tracer_opt *trace_opts;
4768         struct trace_array *tr = m->private;
4769         u32 tracer_flags;
4770         int i;
4771
4772         mutex_lock(&trace_types_lock);
4773         tracer_flags = tr->current_trace->flags->val;
4774         trace_opts = tr->current_trace->flags->opts;
4775
4776         for (i = 0; trace_options[i]; i++) {
4777                 if (tr->trace_flags & (1 << i))
4778                         seq_printf(m, "%s\n", trace_options[i]);
4779                 else
4780                         seq_printf(m, "no%s\n", trace_options[i]);
4781         }
4782
4783         for (i = 0; trace_opts[i].name; i++) {
4784                 if (tracer_flags & trace_opts[i].bit)
4785                         seq_printf(m, "%s\n", trace_opts[i].name);
4786                 else
4787                         seq_printf(m, "no%s\n", trace_opts[i].name);
4788         }
4789         mutex_unlock(&trace_types_lock);
4790
4791         return 0;
4792 }
4793
4794 static int __set_tracer_option(struct trace_array *tr,
4795                                struct tracer_flags *tracer_flags,
4796                                struct tracer_opt *opts, int neg)
4797 {
4798         struct tracer *trace = tracer_flags->trace;
4799         int ret;
4800
4801         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4802         if (ret)
4803                 return ret;
4804
4805         if (neg)
4806                 tracer_flags->val &= ~opts->bit;
4807         else
4808                 tracer_flags->val |= opts->bit;
4809         return 0;
4810 }
4811
4812 /* Try to assign a tracer specific option */
4813 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4814 {
4815         struct tracer *trace = tr->current_trace;
4816         struct tracer_flags *tracer_flags = trace->flags;
4817         struct tracer_opt *opts = NULL;
4818         int i;
4819
4820         for (i = 0; tracer_flags->opts[i].name; i++) {
4821                 opts = &tracer_flags->opts[i];
4822
4823                 if (strcmp(cmp, opts->name) == 0)
4824                         return __set_tracer_option(tr, trace->flags, opts, neg);
4825         }
4826
4827         return -EINVAL;
4828 }
4829
4830 /* Some tracers require overwrite to stay enabled */
4831 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4832 {
4833         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4834                 return -1;
4835
4836         return 0;
4837 }
4838
4839 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4840 {
4841         if ((mask == TRACE_ITER_RECORD_TGID) ||
4842             (mask == TRACE_ITER_RECORD_CMD))
4843                 lockdep_assert_held(&event_mutex);
4844
4845         /* do nothing if flag is already set */
4846         if (!!(tr->trace_flags & mask) == !!enabled)
4847                 return 0;
4848
4849         /* Give the tracer a chance to approve the change */
4850         if (tr->current_trace->flag_changed)
4851                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4852                         return -EINVAL;
4853
4854         if (enabled)
4855                 tr->trace_flags |= mask;
4856         else
4857                 tr->trace_flags &= ~mask;
4858
4859         if (mask == TRACE_ITER_RECORD_CMD)
4860                 trace_event_enable_cmd_record(enabled);
4861
4862         if (mask == TRACE_ITER_RECORD_TGID) {
4863                 if (!tgid_map)
4864                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4865                                            sizeof(*tgid_map),
4866                                            GFP_KERNEL);
4867                 if (!tgid_map) {
4868                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4869                         return -ENOMEM;
4870                 }
4871
4872                 trace_event_enable_tgid_record(enabled);
4873         }
4874
4875         if (mask == TRACE_ITER_EVENT_FORK)
4876                 trace_event_follow_fork(tr, enabled);
4877
4878         if (mask == TRACE_ITER_FUNC_FORK)
4879                 ftrace_pid_follow_fork(tr, enabled);
4880
4881         if (mask == TRACE_ITER_OVERWRITE) {
4882                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4883 #ifdef CONFIG_TRACER_MAX_TRACE
4884                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4885 #endif
4886         }
4887
4888         if (mask == TRACE_ITER_PRINTK) {
4889                 trace_printk_start_stop_comm(enabled);
4890                 trace_printk_control(enabled);
4891         }
4892
4893         return 0;
4894 }
4895
4896 int trace_set_options(struct trace_array *tr, char *option)
4897 {
4898         char *cmp;
4899         int neg = 0;
4900         int ret;
4901         size_t orig_len = strlen(option);
4902         int len;
4903
4904         cmp = strstrip(option);
4905
4906         len = str_has_prefix(cmp, "no");
4907         if (len)
4908                 neg = 1;
4909
4910         cmp += len;
4911
4912         mutex_lock(&event_mutex);
4913         mutex_lock(&trace_types_lock);
4914
4915         ret = match_string(trace_options, -1, cmp);
4916         /* If no option could be set, test the specific tracer options */
4917         if (ret < 0)
4918                 ret = set_tracer_option(tr, cmp, neg);
4919         else
4920                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4921
4922         mutex_unlock(&trace_types_lock);
4923         mutex_unlock(&event_mutex);
4924
4925         /*
4926          * If the first trailing whitespace is replaced with '\0' by strstrip,
4927          * turn it back into a space.
4928          */
4929         if (orig_len > strlen(option))
4930                 option[strlen(option)] = ' ';
4931
4932         return ret;
4933 }
4934
4935 static void __init apply_trace_boot_options(void)
4936 {
4937         char *buf = trace_boot_options_buf;
4938         char *option;
4939
4940         while (true) {
4941                 option = strsep(&buf, ",");
4942
4943                 if (!option)
4944                         break;
4945
4946                 if (*option)
4947                         trace_set_options(&global_trace, option);
4948
4949                 /* Put back the comma to allow this to be called again */
4950                 if (buf)
4951                         *(buf - 1) = ',';
4952         }
4953 }
4954
4955 static ssize_t
4956 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4957                         size_t cnt, loff_t *ppos)
4958 {
4959         struct seq_file *m = filp->private_data;
4960         struct trace_array *tr = m->private;
4961         char buf[64];
4962         int ret;
4963
4964         if (cnt >= sizeof(buf))
4965                 return -EINVAL;
4966
4967         if (copy_from_user(buf, ubuf, cnt))
4968                 return -EFAULT;
4969
4970         buf[cnt] = 0;
4971
4972         ret = trace_set_options(tr, buf);
4973         if (ret < 0)
4974                 return ret;
4975
4976         *ppos += cnt;
4977
4978         return cnt;
4979 }
4980
4981 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4982 {
4983         struct trace_array *tr = inode->i_private;
4984         int ret;
4985
4986         ret = tracing_check_open_get_tr(tr);
4987         if (ret)
4988                 return ret;
4989
4990         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4991         if (ret < 0)
4992                 trace_array_put(tr);
4993
4994         return ret;
4995 }
4996
4997 static const struct file_operations tracing_iter_fops = {
4998         .open           = tracing_trace_options_open,
4999         .read           = seq_read,
5000         .llseek         = seq_lseek,
5001         .release        = tracing_single_release_tr,
5002         .write          = tracing_trace_options_write,
5003 };
5004
5005 static const char readme_msg[] =
5006         "tracing mini-HOWTO:\n\n"
5007         "# echo 0 > tracing_on : quick way to disable tracing\n"
5008         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5009         " Important files:\n"
5010         "  trace\t\t\t- The static contents of the buffer\n"
5011         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5012         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5013         "  current_tracer\t- function and latency tracers\n"
5014         "  available_tracers\t- list of configured tracers for current_tracer\n"
5015         "  error_log\t- error log for failed commands (that support it)\n"
5016         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5017         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5018         "  trace_clock\t\t-change the clock used to order events\n"
5019         "       local:   Per cpu clock but may not be synced across CPUs\n"
5020         "      global:   Synced across CPUs but slows tracing down.\n"
5021         "     counter:   Not a clock, but just an increment\n"
5022         "      uptime:   Jiffy counter from time of boot\n"
5023         "        perf:   Same clock that perf events use\n"
5024 #ifdef CONFIG_X86_64
5025         "     x86-tsc:   TSC cycle counter\n"
5026 #endif
5027         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5028         "       delta:   Delta difference against a buffer-wide timestamp\n"
5029         "    absolute:   Absolute (standalone) timestamp\n"
5030         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5031         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5032         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5033         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5034         "\t\t\t  Remove sub-buffer with rmdir\n"
5035         "  trace_options\t\t- Set format or modify how tracing happens\n"
5036         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5037         "\t\t\t  option name\n"
5038         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5039 #ifdef CONFIG_DYNAMIC_FTRACE
5040         "\n  available_filter_functions - list of functions that can be filtered on\n"
5041         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5042         "\t\t\t  functions\n"
5043         "\t     accepts: func_full_name or glob-matching-pattern\n"
5044         "\t     modules: Can select a group via module\n"
5045         "\t      Format: :mod:<module-name>\n"
5046         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5047         "\t    triggers: a command to perform when function is hit\n"
5048         "\t      Format: <function>:<trigger>[:count]\n"
5049         "\t     trigger: traceon, traceoff\n"
5050         "\t\t      enable_event:<system>:<event>\n"
5051         "\t\t      disable_event:<system>:<event>\n"
5052 #ifdef CONFIG_STACKTRACE
5053         "\t\t      stacktrace\n"
5054 #endif
5055 #ifdef CONFIG_TRACER_SNAPSHOT
5056         "\t\t      snapshot\n"
5057 #endif
5058         "\t\t      dump\n"
5059         "\t\t      cpudump\n"
5060         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5061         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5062         "\t     The first one will disable tracing every time do_fault is hit\n"
5063         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5064         "\t       The first time do trap is hit and it disables tracing, the\n"
5065         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5066         "\t       the counter will not decrement. It only decrements when the\n"
5067         "\t       trigger did work\n"
5068         "\t     To remove trigger without count:\n"
5069         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5070         "\t     To remove trigger with a count:\n"
5071         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5072         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5073         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5074         "\t    modules: Can select a group via module command :mod:\n"
5075         "\t    Does not accept triggers\n"
5076 #endif /* CONFIG_DYNAMIC_FTRACE */
5077 #ifdef CONFIG_FUNCTION_TRACER
5078         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5079         "\t\t    (function)\n"
5080         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5081         "\t\t    (function)\n"
5082 #endif
5083 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5084         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5085         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5086         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5087 #endif
5088 #ifdef CONFIG_TRACER_SNAPSHOT
5089         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5090         "\t\t\t  snapshot buffer. Read the contents for more\n"
5091         "\t\t\t  information\n"
5092 #endif
5093 #ifdef CONFIG_STACK_TRACER
5094         "  stack_trace\t\t- Shows the max stack trace when active\n"
5095         "  stack_max_size\t- Shows current max stack size that was traced\n"
5096         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5097         "\t\t\t  new trace)\n"
5098 #ifdef CONFIG_DYNAMIC_FTRACE
5099         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5100         "\t\t\t  traces\n"
5101 #endif
5102 #endif /* CONFIG_STACK_TRACER */
5103 #ifdef CONFIG_DYNAMIC_EVENTS
5104         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5105         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5106 #endif
5107 #ifdef CONFIG_KPROBE_EVENTS
5108         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5109         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5110 #endif
5111 #ifdef CONFIG_UPROBE_EVENTS
5112         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5113         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5114 #endif
5115 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5116         "\t  accepts: event-definitions (one definition per line)\n"
5117         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5118         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5119 #ifdef CONFIG_HIST_TRIGGERS
5120         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5121 #endif
5122         "\t           -:[<group>/]<event>\n"
5123 #ifdef CONFIG_KPROBE_EVENTS
5124         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5125   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5126 #endif
5127 #ifdef CONFIG_UPROBE_EVENTS
5128   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5129 #endif
5130         "\t     args: <name>=fetcharg[:type]\n"
5131         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5132 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5133         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5134 #else
5135         "\t           $stack<index>, $stack, $retval, $comm,\n"
5136 #endif
5137         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5138         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5139         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5140         "\t           <type>\\[<array-size>\\]\n"
5141 #ifdef CONFIG_HIST_TRIGGERS
5142         "\t    field: <stype> <name>;\n"
5143         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5144         "\t           [unsigned] char/int/long\n"
5145 #endif
5146 #endif
5147         "  events/\t\t- Directory containing all trace event subsystems:\n"
5148         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5149         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5150         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5151         "\t\t\t  events\n"
5152         "      filter\t\t- If set, only events passing filter are traced\n"
5153         "  events/<system>/<event>/\t- Directory containing control files for\n"
5154         "\t\t\t  <event>:\n"
5155         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5156         "      filter\t\t- If set, only events passing filter are traced\n"
5157         "      trigger\t\t- If set, a command to perform when event is hit\n"
5158         "\t    Format: <trigger>[:count][if <filter>]\n"
5159         "\t   trigger: traceon, traceoff\n"
5160         "\t            enable_event:<system>:<event>\n"
5161         "\t            disable_event:<system>:<event>\n"
5162 #ifdef CONFIG_HIST_TRIGGERS
5163         "\t            enable_hist:<system>:<event>\n"
5164         "\t            disable_hist:<system>:<event>\n"
5165 #endif
5166 #ifdef CONFIG_STACKTRACE
5167         "\t\t    stacktrace\n"
5168 #endif
5169 #ifdef CONFIG_TRACER_SNAPSHOT
5170         "\t\t    snapshot\n"
5171 #endif
5172 #ifdef CONFIG_HIST_TRIGGERS
5173         "\t\t    hist (see below)\n"
5174 #endif
5175         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5176         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5177         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5178         "\t                  events/block/block_unplug/trigger\n"
5179         "\t   The first disables tracing every time block_unplug is hit.\n"
5180         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5181         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5182         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5183         "\t   Like function triggers, the counter is only decremented if it\n"
5184         "\t    enabled or disabled tracing.\n"
5185         "\t   To remove a trigger without a count:\n"
5186         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5187         "\t   To remove a trigger with a count:\n"
5188         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5189         "\t   Filters can be ignored when removing a trigger.\n"
5190 #ifdef CONFIG_HIST_TRIGGERS
5191         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5192         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5193         "\t            [:values=<field1[,field2,...]>]\n"
5194         "\t            [:sort=<field1[,field2,...]>]\n"
5195         "\t            [:size=#entries]\n"
5196         "\t            [:pause][:continue][:clear]\n"
5197         "\t            [:name=histname1]\n"
5198         "\t            [:<handler>.<action>]\n"
5199         "\t            [if <filter>]\n\n"
5200         "\t    When a matching event is hit, an entry is added to a hash\n"
5201         "\t    table using the key(s) and value(s) named, and the value of a\n"
5202         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5203         "\t    correspond to fields in the event's format description.  Keys\n"
5204         "\t    can be any field, or the special string 'stacktrace'.\n"
5205         "\t    Compound keys consisting of up to two fields can be specified\n"
5206         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5207         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5208         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5209         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5210         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5211         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5212         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5213         "\t    its histogram data will be shared with other triggers of the\n"
5214         "\t    same name, and trigger hits will update this common data.\n\n"
5215         "\t    Reading the 'hist' file for the event will dump the hash\n"
5216         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5217         "\t    triggers attached to an event, there will be a table for each\n"
5218         "\t    trigger in the output.  The table displayed for a named\n"
5219         "\t    trigger will be the same as any other instance having the\n"
5220         "\t    same name.  The default format used to display a given field\n"
5221         "\t    can be modified by appending any of the following modifiers\n"
5222         "\t    to the field name, as applicable:\n\n"
5223         "\t            .hex        display a number as a hex value\n"
5224         "\t            .sym        display an address as a symbol\n"
5225         "\t            .sym-offset display an address as a symbol and offset\n"
5226         "\t            .execname   display a common_pid as a program name\n"
5227         "\t            .syscall    display a syscall id as a syscall name\n"
5228         "\t            .log2       display log2 value rather than raw number\n"
5229         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5230         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5231         "\t    trigger or to start a hist trigger but not log any events\n"
5232         "\t    until told to do so.  'continue' can be used to start or\n"
5233         "\t    restart a paused hist trigger.\n\n"
5234         "\t    The 'clear' parameter will clear the contents of a running\n"
5235         "\t    hist trigger and leave its current paused/active state\n"
5236         "\t    unchanged.\n\n"
5237         "\t    The enable_hist and disable_hist triggers can be used to\n"
5238         "\t    have one event conditionally start and stop another event's\n"
5239         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5240         "\t    the enable_event and disable_event triggers.\n\n"
5241         "\t    Hist trigger handlers and actions are executed whenever a\n"
5242         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5243         "\t        <handler>.<action>\n\n"
5244         "\t    The available handlers are:\n\n"
5245         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5246         "\t        onmax(var)               - invoke if var exceeds current max\n"
5247         "\t        onchange(var)            - invoke action if var changes\n\n"
5248         "\t    The available actions are:\n\n"
5249         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5250         "\t        save(field,...)                      - save current event fields\n"
5251 #ifdef CONFIG_TRACER_SNAPSHOT
5252         "\t        snapshot()                           - snapshot the trace buffer\n"
5253 #endif
5254 #endif
5255 ;
5256
5257 static ssize_t
5258 tracing_readme_read(struct file *filp, char __user *ubuf,
5259                        size_t cnt, loff_t *ppos)
5260 {
5261         return simple_read_from_buffer(ubuf, cnt, ppos,
5262                                         readme_msg, strlen(readme_msg));
5263 }
5264
5265 static const struct file_operations tracing_readme_fops = {
5266         .open           = tracing_open_generic,
5267         .read           = tracing_readme_read,
5268         .llseek         = generic_file_llseek,
5269 };
5270
5271 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5272 {
5273         int *ptr = v;
5274
5275         if (*pos || m->count)
5276                 ptr++;
5277
5278         (*pos)++;
5279
5280         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5281                 if (trace_find_tgid(*ptr))
5282                         return ptr;
5283         }
5284
5285         return NULL;
5286 }
5287
5288 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5289 {
5290         void *v;
5291         loff_t l = 0;
5292
5293         if (!tgid_map)
5294                 return NULL;
5295
5296         v = &tgid_map[0];
5297         while (l <= *pos) {
5298                 v = saved_tgids_next(m, v, &l);
5299                 if (!v)
5300                         return NULL;
5301         }
5302
5303         return v;
5304 }
5305
5306 static void saved_tgids_stop(struct seq_file *m, void *v)
5307 {
5308 }
5309
5310 static int saved_tgids_show(struct seq_file *m, void *v)
5311 {
5312         int pid = (int *)v - tgid_map;
5313
5314         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5315         return 0;
5316 }
5317
5318 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5319         .start          = saved_tgids_start,
5320         .stop           = saved_tgids_stop,
5321         .next           = saved_tgids_next,
5322         .show           = saved_tgids_show,
5323 };
5324
5325 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5326 {
5327         int ret;
5328
5329         ret = tracing_check_open_get_tr(NULL);
5330         if (ret)
5331                 return ret;
5332
5333         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5334 }
5335
5336
5337 static const struct file_operations tracing_saved_tgids_fops = {
5338         .open           = tracing_saved_tgids_open,
5339         .read           = seq_read,
5340         .llseek         = seq_lseek,
5341         .release        = seq_release,
5342 };
5343
5344 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5345 {
5346         unsigned int *ptr = v;
5347
5348         if (*pos || m->count)
5349                 ptr++;
5350
5351         (*pos)++;
5352
5353         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5354              ptr++) {
5355                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5356                         continue;
5357
5358                 return ptr;
5359         }
5360
5361         return NULL;
5362 }
5363
5364 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5365 {
5366         void *v;
5367         loff_t l = 0;
5368
5369         preempt_disable();
5370         arch_spin_lock(&trace_cmdline_lock);
5371
5372         v = &savedcmd->map_cmdline_to_pid[0];
5373         while (l <= *pos) {
5374                 v = saved_cmdlines_next(m, v, &l);
5375                 if (!v)
5376                         return NULL;
5377         }
5378
5379         return v;
5380 }
5381
5382 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5383 {
5384         arch_spin_unlock(&trace_cmdline_lock);
5385         preempt_enable();
5386 }
5387
5388 static int saved_cmdlines_show(struct seq_file *m, void *v)
5389 {
5390         char buf[TASK_COMM_LEN];
5391         unsigned int *pid = v;
5392
5393         __trace_find_cmdline(*pid, buf);
5394         seq_printf(m, "%d %s\n", *pid, buf);
5395         return 0;
5396 }
5397
5398 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5399         .start          = saved_cmdlines_start,
5400         .next           = saved_cmdlines_next,
5401         .stop           = saved_cmdlines_stop,
5402         .show           = saved_cmdlines_show,
5403 };
5404
5405 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5406 {
5407         int ret;
5408
5409         ret = tracing_check_open_get_tr(NULL);
5410         if (ret)
5411                 return ret;
5412
5413         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5414 }
5415
5416 static const struct file_operations tracing_saved_cmdlines_fops = {
5417         .open           = tracing_saved_cmdlines_open,
5418         .read           = seq_read,
5419         .llseek         = seq_lseek,
5420         .release        = seq_release,
5421 };
5422
5423 static ssize_t
5424 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5425                                  size_t cnt, loff_t *ppos)
5426 {
5427         char buf[64];
5428         int r;
5429
5430         arch_spin_lock(&trace_cmdline_lock);
5431         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5432         arch_spin_unlock(&trace_cmdline_lock);
5433
5434         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5435 }
5436
5437 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5438 {
5439         kfree(s->saved_cmdlines);
5440         kfree(s->map_cmdline_to_pid);
5441         kfree(s);
5442 }
5443
5444 static int tracing_resize_saved_cmdlines(unsigned int val)
5445 {
5446         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5447
5448         s = kmalloc(sizeof(*s), GFP_KERNEL);
5449         if (!s)
5450                 return -ENOMEM;
5451
5452         if (allocate_cmdlines_buffer(val, s) < 0) {
5453                 kfree(s);
5454                 return -ENOMEM;
5455         }
5456
5457         arch_spin_lock(&trace_cmdline_lock);
5458         savedcmd_temp = savedcmd;
5459         savedcmd = s;
5460         arch_spin_unlock(&trace_cmdline_lock);
5461         free_saved_cmdlines_buffer(savedcmd_temp);
5462
5463         return 0;
5464 }
5465
5466 static ssize_t
5467 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5468                                   size_t cnt, loff_t *ppos)
5469 {
5470         unsigned long val;
5471         int ret;
5472
5473         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5474         if (ret)
5475                 return ret;
5476
5477         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5478         if (!val || val > PID_MAX_DEFAULT)
5479                 return -EINVAL;
5480
5481         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5482         if (ret < 0)
5483                 return ret;
5484
5485         *ppos += cnt;
5486
5487         return cnt;
5488 }
5489
5490 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5491         .open           = tracing_open_generic,
5492         .read           = tracing_saved_cmdlines_size_read,
5493         .write          = tracing_saved_cmdlines_size_write,
5494 };
5495
5496 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5497 static union trace_eval_map_item *
5498 update_eval_map(union trace_eval_map_item *ptr)
5499 {
5500         if (!ptr->map.eval_string) {
5501                 if (ptr->tail.next) {
5502                         ptr = ptr->tail.next;
5503                         /* Set ptr to the next real item (skip head) */
5504                         ptr++;
5505                 } else
5506                         return NULL;
5507         }
5508         return ptr;
5509 }
5510
5511 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5512 {
5513         union trace_eval_map_item *ptr = v;
5514
5515         /*
5516          * Paranoid! If ptr points to end, we don't want to increment past it.
5517          * This really should never happen.
5518          */
5519         (*pos)++;
5520         ptr = update_eval_map(ptr);
5521         if (WARN_ON_ONCE(!ptr))
5522                 return NULL;
5523
5524         ptr++;
5525         ptr = update_eval_map(ptr);
5526
5527         return ptr;
5528 }
5529
5530 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5531 {
5532         union trace_eval_map_item *v;
5533         loff_t l = 0;
5534
5535         mutex_lock(&trace_eval_mutex);
5536
5537         v = trace_eval_maps;
5538         if (v)
5539                 v++;
5540
5541         while (v && l < *pos) {
5542                 v = eval_map_next(m, v, &l);
5543         }
5544
5545         return v;
5546 }
5547
5548 static void eval_map_stop(struct seq_file *m, void *v)
5549 {
5550         mutex_unlock(&trace_eval_mutex);
5551 }
5552
5553 static int eval_map_show(struct seq_file *m, void *v)
5554 {
5555         union trace_eval_map_item *ptr = v;
5556
5557         seq_printf(m, "%s %ld (%s)\n",
5558                    ptr->map.eval_string, ptr->map.eval_value,
5559                    ptr->map.system);
5560
5561         return 0;
5562 }
5563
5564 static const struct seq_operations tracing_eval_map_seq_ops = {
5565         .start          = eval_map_start,
5566         .next           = eval_map_next,
5567         .stop           = eval_map_stop,
5568         .show           = eval_map_show,
5569 };
5570
5571 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5572 {
5573         int ret;
5574
5575         ret = tracing_check_open_get_tr(NULL);
5576         if (ret)
5577                 return ret;
5578
5579         return seq_open(filp, &tracing_eval_map_seq_ops);
5580 }
5581
5582 static const struct file_operations tracing_eval_map_fops = {
5583         .open           = tracing_eval_map_open,
5584         .read           = seq_read,
5585         .llseek         = seq_lseek,
5586         .release        = seq_release,
5587 };
5588
5589 static inline union trace_eval_map_item *
5590 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5591 {
5592         /* Return tail of array given the head */
5593         return ptr + ptr->head.length + 1;
5594 }
5595
5596 static void
5597 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5598                            int len)
5599 {
5600         struct trace_eval_map **stop;
5601         struct trace_eval_map **map;
5602         union trace_eval_map_item *map_array;
5603         union trace_eval_map_item *ptr;
5604
5605         stop = start + len;
5606
5607         /*
5608          * The trace_eval_maps contains the map plus a head and tail item,
5609          * where the head holds the module and length of array, and the
5610          * tail holds a pointer to the next list.
5611          */
5612         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5613         if (!map_array) {
5614                 pr_warn("Unable to allocate trace eval mapping\n");
5615                 return;
5616         }
5617
5618         mutex_lock(&trace_eval_mutex);
5619
5620         if (!trace_eval_maps)
5621                 trace_eval_maps = map_array;
5622         else {
5623                 ptr = trace_eval_maps;
5624                 for (;;) {
5625                         ptr = trace_eval_jmp_to_tail(ptr);
5626                         if (!ptr->tail.next)
5627                                 break;
5628                         ptr = ptr->tail.next;
5629
5630                 }
5631                 ptr->tail.next = map_array;
5632         }
5633         map_array->head.mod = mod;
5634         map_array->head.length = len;
5635         map_array++;
5636
5637         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5638                 map_array->map = **map;
5639                 map_array++;
5640         }
5641         memset(map_array, 0, sizeof(*map_array));
5642
5643         mutex_unlock(&trace_eval_mutex);
5644 }
5645
5646 static void trace_create_eval_file(struct dentry *d_tracer)
5647 {
5648         trace_create_file("eval_map", 0444, d_tracer,
5649                           NULL, &tracing_eval_map_fops);
5650 }
5651
5652 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5653 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5654 static inline void trace_insert_eval_map_file(struct module *mod,
5655                               struct trace_eval_map **start, int len) { }
5656 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5657
5658 static void trace_insert_eval_map(struct module *mod,
5659                                   struct trace_eval_map **start, int len)
5660 {
5661         struct trace_eval_map **map;
5662
5663         if (len <= 0)
5664                 return;
5665
5666         map = start;
5667
5668         trace_event_eval_update(map, len);
5669
5670         trace_insert_eval_map_file(mod, start, len);
5671 }
5672
5673 static ssize_t
5674 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5675                        size_t cnt, loff_t *ppos)
5676 {
5677         struct trace_array *tr = filp->private_data;
5678         char buf[MAX_TRACER_SIZE+2];
5679         int r;
5680
5681         mutex_lock(&trace_types_lock);
5682         r = sprintf(buf, "%s\n", tr->current_trace->name);
5683         mutex_unlock(&trace_types_lock);
5684
5685         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5686 }
5687
5688 int tracer_init(struct tracer *t, struct trace_array *tr)
5689 {
5690         tracing_reset_online_cpus(&tr->array_buffer);
5691         return t->init(tr);
5692 }
5693
5694 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5695 {
5696         int cpu;
5697
5698         for_each_tracing_cpu(cpu)
5699                 per_cpu_ptr(buf->data, cpu)->entries = val;
5700 }
5701
5702 #ifdef CONFIG_TRACER_MAX_TRACE
5703 /* resize @tr's buffer to the size of @size_tr's entries */
5704 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5705                                         struct array_buffer *size_buf, int cpu_id)
5706 {
5707         int cpu, ret = 0;
5708
5709         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5710                 for_each_tracing_cpu(cpu) {
5711                         ret = ring_buffer_resize(trace_buf->buffer,
5712                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5713                         if (ret < 0)
5714                                 break;
5715                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5716                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5717                 }
5718         } else {
5719                 ret = ring_buffer_resize(trace_buf->buffer,
5720                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5721                 if (ret == 0)
5722                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5723                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5724         }
5725
5726         return ret;
5727 }
5728 #endif /* CONFIG_TRACER_MAX_TRACE */
5729
5730 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5731                                         unsigned long size, int cpu)
5732 {
5733         int ret;
5734
5735         /*
5736          * If kernel or user changes the size of the ring buffer
5737          * we use the size that was given, and we can forget about
5738          * expanding it later.
5739          */
5740         ring_buffer_expanded = true;
5741
5742         /* May be called before buffers are initialized */
5743         if (!tr->array_buffer.buffer)
5744                 return 0;
5745
5746         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5747         if (ret < 0)
5748                 return ret;
5749
5750 #ifdef CONFIG_TRACER_MAX_TRACE
5751         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5752             !tr->current_trace->use_max_tr)
5753                 goto out;
5754
5755         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5756         if (ret < 0) {
5757                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5758                                                      &tr->array_buffer, cpu);
5759                 if (r < 0) {
5760                         /*
5761                          * AARGH! We are left with different
5762                          * size max buffer!!!!
5763                          * The max buffer is our "snapshot" buffer.
5764                          * When a tracer needs a snapshot (one of the
5765                          * latency tracers), it swaps the max buffer
5766                          * with the saved snap shot. We succeeded to
5767                          * update the size of the main buffer, but failed to
5768                          * update the size of the max buffer. But when we tried
5769                          * to reset the main buffer to the original size, we
5770                          * failed there too. This is very unlikely to
5771                          * happen, but if it does, warn and kill all
5772                          * tracing.
5773                          */
5774                         WARN_ON(1);
5775                         tracing_disabled = 1;
5776                 }
5777                 return ret;
5778         }
5779
5780         if (cpu == RING_BUFFER_ALL_CPUS)
5781                 set_buffer_entries(&tr->max_buffer, size);
5782         else
5783                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5784
5785  out:
5786 #endif /* CONFIG_TRACER_MAX_TRACE */
5787
5788         if (cpu == RING_BUFFER_ALL_CPUS)
5789                 set_buffer_entries(&tr->array_buffer, size);
5790         else
5791                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5792
5793         return ret;
5794 }
5795
5796 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5797                                   unsigned long size, int cpu_id)
5798 {
5799         int ret = size;
5800
5801         mutex_lock(&trace_types_lock);
5802
5803         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5804                 /* make sure, this cpu is enabled in the mask */
5805                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5806                         ret = -EINVAL;
5807                         goto out;
5808                 }
5809         }
5810
5811         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5812         if (ret < 0)
5813                 ret = -ENOMEM;
5814
5815 out:
5816         mutex_unlock(&trace_types_lock);
5817
5818         return ret;
5819 }
5820
5821
5822 /**
5823  * tracing_update_buffers - used by tracing facility to expand ring buffers
5824  *
5825  * To save on memory when the tracing is never used on a system with it
5826  * configured in. The ring buffers are set to a minimum size. But once
5827  * a user starts to use the tracing facility, then they need to grow
5828  * to their default size.
5829  *
5830  * This function is to be called when a tracer is about to be used.
5831  */
5832 int tracing_update_buffers(void)
5833 {
5834         int ret = 0;
5835
5836         mutex_lock(&trace_types_lock);
5837         if (!ring_buffer_expanded)
5838                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5839                                                 RING_BUFFER_ALL_CPUS);
5840         mutex_unlock(&trace_types_lock);
5841
5842         return ret;
5843 }
5844
5845 struct trace_option_dentry;
5846
5847 static void
5848 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5849
5850 /*
5851  * Used to clear out the tracer before deletion of an instance.
5852  * Must have trace_types_lock held.
5853  */
5854 static void tracing_set_nop(struct trace_array *tr)
5855 {
5856         if (tr->current_trace == &nop_trace)
5857                 return;
5858         
5859         tr->current_trace->enabled--;
5860
5861         if (tr->current_trace->reset)
5862                 tr->current_trace->reset(tr);
5863
5864         tr->current_trace = &nop_trace;
5865 }
5866
5867 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5868 {
5869         /* Only enable if the directory has been created already. */
5870         if (!tr->dir)
5871                 return;
5872
5873         create_trace_option_files(tr, t);
5874 }
5875
5876 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5877 {
5878         struct tracer *t;
5879 #ifdef CONFIG_TRACER_MAX_TRACE
5880         bool had_max_tr;
5881 #endif
5882         int ret = 0;
5883
5884         mutex_lock(&trace_types_lock);
5885
5886         if (!ring_buffer_expanded) {
5887                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5888                                                 RING_BUFFER_ALL_CPUS);
5889                 if (ret < 0)
5890                         goto out;
5891                 ret = 0;
5892         }
5893
5894         for (t = trace_types; t; t = t->next) {
5895                 if (strcmp(t->name, buf) == 0)
5896                         break;
5897         }
5898         if (!t) {
5899                 ret = -EINVAL;
5900                 goto out;
5901         }
5902         if (t == tr->current_trace)
5903                 goto out;
5904
5905 #ifdef CONFIG_TRACER_SNAPSHOT
5906         if (t->use_max_tr) {
5907                 arch_spin_lock(&tr->max_lock);
5908                 if (tr->cond_snapshot)
5909                         ret = -EBUSY;
5910                 arch_spin_unlock(&tr->max_lock);
5911                 if (ret)
5912                         goto out;
5913         }
5914 #endif
5915         /* Some tracers won't work on kernel command line */
5916         if (system_state < SYSTEM_RUNNING && t->noboot) {
5917                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5918                         t->name);
5919                 goto out;
5920         }
5921
5922         /* Some tracers are only allowed for the top level buffer */
5923         if (!trace_ok_for_array(t, tr)) {
5924                 ret = -EINVAL;
5925                 goto out;
5926         }
5927
5928         /* If trace pipe files are being read, we can't change the tracer */
5929         if (tr->trace_ref) {
5930                 ret = -EBUSY;
5931                 goto out;
5932         }
5933
5934         trace_branch_disable();
5935
5936         tr->current_trace->enabled--;
5937
5938         if (tr->current_trace->reset)
5939                 tr->current_trace->reset(tr);
5940
5941         /* Current trace needs to be nop_trace before synchronize_rcu */
5942         tr->current_trace = &nop_trace;
5943
5944 #ifdef CONFIG_TRACER_MAX_TRACE
5945         had_max_tr = tr->allocated_snapshot;
5946
5947         if (had_max_tr && !t->use_max_tr) {
5948                 /*
5949                  * We need to make sure that the update_max_tr sees that
5950                  * current_trace changed to nop_trace to keep it from
5951                  * swapping the buffers after we resize it.
5952                  * The update_max_tr is called from interrupts disabled
5953                  * so a synchronized_sched() is sufficient.
5954                  */
5955                 synchronize_rcu();
5956                 free_snapshot(tr);
5957         }
5958 #endif
5959
5960 #ifdef CONFIG_TRACER_MAX_TRACE
5961         if (t->use_max_tr && !had_max_tr) {
5962                 ret = tracing_alloc_snapshot_instance(tr);
5963                 if (ret < 0)
5964                         goto out;
5965         }
5966 #endif
5967
5968         if (t->init) {
5969                 ret = tracer_init(t, tr);
5970                 if (ret)
5971                         goto out;
5972         }
5973
5974         tr->current_trace = t;
5975         tr->current_trace->enabled++;
5976         trace_branch_enable(tr);
5977  out:
5978         mutex_unlock(&trace_types_lock);
5979
5980         return ret;
5981 }
5982
5983 static ssize_t
5984 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5985                         size_t cnt, loff_t *ppos)
5986 {
5987         struct trace_array *tr = filp->private_data;
5988         char buf[MAX_TRACER_SIZE+1];
5989         int i;
5990         size_t ret;
5991         int err;
5992
5993         ret = cnt;
5994
5995         if (cnt > MAX_TRACER_SIZE)
5996                 cnt = MAX_TRACER_SIZE;
5997
5998         if (copy_from_user(buf, ubuf, cnt))
5999                 return -EFAULT;
6000
6001         buf[cnt] = 0;
6002
6003         /* strip ending whitespace. */
6004         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6005                 buf[i] = 0;
6006
6007         err = tracing_set_tracer(tr, buf);
6008         if (err)
6009                 return err;
6010
6011         *ppos += ret;
6012
6013         return ret;
6014 }
6015
6016 static ssize_t
6017 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6018                    size_t cnt, loff_t *ppos)
6019 {
6020         char buf[64];
6021         int r;
6022
6023         r = snprintf(buf, sizeof(buf), "%ld\n",
6024                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6025         if (r > sizeof(buf))
6026                 r = sizeof(buf);
6027         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6028 }
6029
6030 static ssize_t
6031 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6032                     size_t cnt, loff_t *ppos)
6033 {
6034         unsigned long val;
6035         int ret;
6036
6037         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6038         if (ret)
6039                 return ret;
6040
6041         *ptr = val * 1000;
6042
6043         return cnt;
6044 }
6045
6046 static ssize_t
6047 tracing_thresh_read(struct file *filp, char __user *ubuf,
6048                     size_t cnt, loff_t *ppos)
6049 {
6050         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6051 }
6052
6053 static ssize_t
6054 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6055                      size_t cnt, loff_t *ppos)
6056 {
6057         struct trace_array *tr = filp->private_data;
6058         int ret;
6059
6060         mutex_lock(&trace_types_lock);
6061         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6062         if (ret < 0)
6063                 goto out;
6064
6065         if (tr->current_trace->update_thresh) {
6066                 ret = tr->current_trace->update_thresh(tr);
6067                 if (ret < 0)
6068                         goto out;
6069         }
6070
6071         ret = cnt;
6072 out:
6073         mutex_unlock(&trace_types_lock);
6074
6075         return ret;
6076 }
6077
6078 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6079
6080 static ssize_t
6081 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6082                      size_t cnt, loff_t *ppos)
6083 {
6084         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6085 }
6086
6087 static ssize_t
6088 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6089                       size_t cnt, loff_t *ppos)
6090 {
6091         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6092 }
6093
6094 #endif
6095
6096 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6097 {
6098         struct trace_array *tr = inode->i_private;
6099         struct trace_iterator *iter;
6100         int ret;
6101
6102         ret = tracing_check_open_get_tr(tr);
6103         if (ret)
6104                 return ret;
6105
6106         mutex_lock(&trace_types_lock);
6107
6108         /* create a buffer to store the information to pass to userspace */
6109         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6110         if (!iter) {
6111                 ret = -ENOMEM;
6112                 __trace_array_put(tr);
6113                 goto out;
6114         }
6115
6116         trace_seq_init(&iter->seq);
6117         iter->trace = tr->current_trace;
6118
6119         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6120                 ret = -ENOMEM;
6121                 goto fail;
6122         }
6123
6124         /* trace pipe does not show start of buffer */
6125         cpumask_setall(iter->started);
6126
6127         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6128                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6129
6130         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6131         if (trace_clocks[tr->clock_id].in_ns)
6132                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6133
6134         iter->tr = tr;
6135         iter->array_buffer = &tr->array_buffer;
6136         iter->cpu_file = tracing_get_cpu(inode);
6137         mutex_init(&iter->mutex);
6138         filp->private_data = iter;
6139
6140         if (iter->trace->pipe_open)
6141                 iter->trace->pipe_open(iter);
6142
6143         nonseekable_open(inode, filp);
6144
6145         tr->trace_ref++;
6146 out:
6147         mutex_unlock(&trace_types_lock);
6148         return ret;
6149
6150 fail:
6151         kfree(iter);
6152         __trace_array_put(tr);
6153         mutex_unlock(&trace_types_lock);
6154         return ret;
6155 }
6156
6157 static int tracing_release_pipe(struct inode *inode, struct file *file)
6158 {
6159         struct trace_iterator *iter = file->private_data;
6160         struct trace_array *tr = inode->i_private;
6161
6162         mutex_lock(&trace_types_lock);
6163
6164         tr->trace_ref--;
6165
6166         if (iter->trace->pipe_close)
6167                 iter->trace->pipe_close(iter);
6168
6169         mutex_unlock(&trace_types_lock);
6170
6171         free_cpumask_var(iter->started);
6172         mutex_destroy(&iter->mutex);
6173         kfree(iter);
6174
6175         trace_array_put(tr);
6176
6177         return 0;
6178 }
6179
6180 static __poll_t
6181 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6182 {
6183         struct trace_array *tr = iter->tr;
6184
6185         /* Iterators are static, they should be filled or empty */
6186         if (trace_buffer_iter(iter, iter->cpu_file))
6187                 return EPOLLIN | EPOLLRDNORM;
6188
6189         if (tr->trace_flags & TRACE_ITER_BLOCK)
6190                 /*
6191                  * Always select as readable when in blocking mode
6192                  */
6193                 return EPOLLIN | EPOLLRDNORM;
6194         else
6195                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6196                                              filp, poll_table);
6197 }
6198
6199 static __poll_t
6200 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6201 {
6202         struct trace_iterator *iter = filp->private_data;
6203
6204         return trace_poll(iter, filp, poll_table);
6205 }
6206
6207 /* Must be called with iter->mutex held. */
6208 static int tracing_wait_pipe(struct file *filp)
6209 {
6210         struct trace_iterator *iter = filp->private_data;
6211         int ret;
6212
6213         while (trace_empty(iter)) {
6214
6215                 if ((filp->f_flags & O_NONBLOCK)) {
6216                         return -EAGAIN;
6217                 }
6218
6219                 /*
6220                  * We block until we read something and tracing is disabled.
6221                  * We still block if tracing is disabled, but we have never
6222                  * read anything. This allows a user to cat this file, and
6223                  * then enable tracing. But after we have read something,
6224                  * we give an EOF when tracing is again disabled.
6225                  *
6226                  * iter->pos will be 0 if we haven't read anything.
6227                  */
6228                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6229                         break;
6230
6231                 mutex_unlock(&iter->mutex);
6232
6233                 ret = wait_on_pipe(iter, 0);
6234
6235                 mutex_lock(&iter->mutex);
6236
6237                 if (ret)
6238                         return ret;
6239         }
6240
6241         return 1;
6242 }
6243
6244 /*
6245  * Consumer reader.
6246  */
6247 static ssize_t
6248 tracing_read_pipe(struct file *filp, char __user *ubuf,
6249                   size_t cnt, loff_t *ppos)
6250 {
6251         struct trace_iterator *iter = filp->private_data;
6252         ssize_t sret;
6253
6254         /*
6255          * Avoid more than one consumer on a single file descriptor
6256          * This is just a matter of traces coherency, the ring buffer itself
6257          * is protected.
6258          */
6259         mutex_lock(&iter->mutex);
6260
6261         /* return any leftover data */
6262         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6263         if (sret != -EBUSY)
6264                 goto out;
6265
6266         trace_seq_init(&iter->seq);
6267
6268         if (iter->trace->read) {
6269                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6270                 if (sret)
6271                         goto out;
6272         }
6273
6274 waitagain:
6275         sret = tracing_wait_pipe(filp);
6276         if (sret <= 0)
6277                 goto out;
6278
6279         /* stop when tracing is finished */
6280         if (trace_empty(iter)) {
6281                 sret = 0;
6282                 goto out;
6283         }
6284
6285         if (cnt >= PAGE_SIZE)
6286                 cnt = PAGE_SIZE - 1;
6287
6288         /* reset all but tr, trace, and overruns */
6289         memset(&iter->seq, 0,
6290                sizeof(struct trace_iterator) -
6291                offsetof(struct trace_iterator, seq));
6292         cpumask_clear(iter->started);
6293         trace_seq_init(&iter->seq);
6294         iter->pos = -1;
6295
6296         trace_event_read_lock();
6297         trace_access_lock(iter->cpu_file);
6298         while (trace_find_next_entry_inc(iter) != NULL) {
6299                 enum print_line_t ret;
6300                 int save_len = iter->seq.seq.len;
6301
6302                 ret = print_trace_line(iter);
6303                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6304                         /* don't print partial lines */
6305                         iter->seq.seq.len = save_len;
6306                         break;
6307                 }
6308                 if (ret != TRACE_TYPE_NO_CONSUME)
6309                         trace_consume(iter);
6310
6311                 if (trace_seq_used(&iter->seq) >= cnt)
6312                         break;
6313
6314                 /*
6315                  * Setting the full flag means we reached the trace_seq buffer
6316                  * size and we should leave by partial output condition above.
6317                  * One of the trace_seq_* functions is not used properly.
6318                  */
6319                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6320                           iter->ent->type);
6321         }
6322         trace_access_unlock(iter->cpu_file);
6323         trace_event_read_unlock();
6324
6325         /* Now copy what we have to the user */
6326         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6327         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6328                 trace_seq_init(&iter->seq);
6329
6330         /*
6331          * If there was nothing to send to user, in spite of consuming trace
6332          * entries, go back to wait for more entries.
6333          */
6334         if (sret == -EBUSY)
6335                 goto waitagain;
6336
6337 out:
6338         mutex_unlock(&iter->mutex);
6339
6340         return sret;
6341 }
6342
6343 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6344                                      unsigned int idx)
6345 {
6346         __free_page(spd->pages[idx]);
6347 }
6348
6349 static size_t
6350 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6351 {
6352         size_t count;
6353         int save_len;
6354         int ret;
6355
6356         /* Seq buffer is page-sized, exactly what we need. */
6357         for (;;) {
6358                 save_len = iter->seq.seq.len;
6359                 ret = print_trace_line(iter);
6360
6361                 if (trace_seq_has_overflowed(&iter->seq)) {
6362                         iter->seq.seq.len = save_len;
6363                         break;
6364                 }
6365
6366                 /*
6367                  * This should not be hit, because it should only
6368                  * be set if the iter->seq overflowed. But check it
6369                  * anyway to be safe.
6370                  */
6371                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6372                         iter->seq.seq.len = save_len;
6373                         break;
6374                 }
6375
6376                 count = trace_seq_used(&iter->seq) - save_len;
6377                 if (rem < count) {
6378                         rem = 0;
6379                         iter->seq.seq.len = save_len;
6380                         break;
6381                 }
6382
6383                 if (ret != TRACE_TYPE_NO_CONSUME)
6384                         trace_consume(iter);
6385                 rem -= count;
6386                 if (!trace_find_next_entry_inc(iter))   {
6387                         rem = 0;
6388                         iter->ent = NULL;
6389                         break;
6390                 }
6391         }
6392
6393         return rem;
6394 }
6395
6396 static ssize_t tracing_splice_read_pipe(struct file *filp,
6397                                         loff_t *ppos,
6398                                         struct pipe_inode_info *pipe,
6399                                         size_t len,
6400                                         unsigned int flags)
6401 {
6402         struct page *pages_def[PIPE_DEF_BUFFERS];
6403         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6404         struct trace_iterator *iter = filp->private_data;
6405         struct splice_pipe_desc spd = {
6406                 .pages          = pages_def,
6407                 .partial        = partial_def,
6408                 .nr_pages       = 0, /* This gets updated below. */
6409                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6410                 .ops            = &default_pipe_buf_ops,
6411                 .spd_release    = tracing_spd_release_pipe,
6412         };
6413         ssize_t ret;
6414         size_t rem;
6415         unsigned int i;
6416
6417         if (splice_grow_spd(pipe, &spd))
6418                 return -ENOMEM;
6419
6420         mutex_lock(&iter->mutex);
6421
6422         if (iter->trace->splice_read) {
6423                 ret = iter->trace->splice_read(iter, filp,
6424                                                ppos, pipe, len, flags);
6425                 if (ret)
6426                         goto out_err;
6427         }
6428
6429         ret = tracing_wait_pipe(filp);
6430         if (ret <= 0)
6431                 goto out_err;
6432
6433         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6434                 ret = -EFAULT;
6435                 goto out_err;
6436         }
6437
6438         trace_event_read_lock();
6439         trace_access_lock(iter->cpu_file);
6440
6441         /* Fill as many pages as possible. */
6442         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6443                 spd.pages[i] = alloc_page(GFP_KERNEL);
6444                 if (!spd.pages[i])
6445                         break;
6446
6447                 rem = tracing_fill_pipe_page(rem, iter);
6448
6449                 /* Copy the data into the page, so we can start over. */
6450                 ret = trace_seq_to_buffer(&iter->seq,
6451                                           page_address(spd.pages[i]),
6452                                           trace_seq_used(&iter->seq));
6453                 if (ret < 0) {
6454                         __free_page(spd.pages[i]);
6455                         break;
6456                 }
6457                 spd.partial[i].offset = 0;
6458                 spd.partial[i].len = trace_seq_used(&iter->seq);
6459
6460                 trace_seq_init(&iter->seq);
6461         }
6462
6463         trace_access_unlock(iter->cpu_file);
6464         trace_event_read_unlock();
6465         mutex_unlock(&iter->mutex);
6466
6467         spd.nr_pages = i;
6468
6469         if (i)
6470                 ret = splice_to_pipe(pipe, &spd);
6471         else
6472                 ret = 0;
6473 out:
6474         splice_shrink_spd(&spd);
6475         return ret;
6476
6477 out_err:
6478         mutex_unlock(&iter->mutex);
6479         goto out;
6480 }
6481
6482 static ssize_t
6483 tracing_entries_read(struct file *filp, char __user *ubuf,
6484                      size_t cnt, loff_t *ppos)
6485 {
6486         struct inode *inode = file_inode(filp);
6487         struct trace_array *tr = inode->i_private;
6488         int cpu = tracing_get_cpu(inode);
6489         char buf[64];
6490         int r = 0;
6491         ssize_t ret;
6492
6493         mutex_lock(&trace_types_lock);
6494
6495         if (cpu == RING_BUFFER_ALL_CPUS) {
6496                 int cpu, buf_size_same;
6497                 unsigned long size;
6498
6499                 size = 0;
6500                 buf_size_same = 1;
6501                 /* check if all cpu sizes are same */
6502                 for_each_tracing_cpu(cpu) {
6503                         /* fill in the size from first enabled cpu */
6504                         if (size == 0)
6505                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6506                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6507                                 buf_size_same = 0;
6508                                 break;
6509                         }
6510                 }
6511
6512                 if (buf_size_same) {
6513                         if (!ring_buffer_expanded)
6514                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6515                                             size >> 10,
6516                                             trace_buf_size >> 10);
6517                         else
6518                                 r = sprintf(buf, "%lu\n", size >> 10);
6519                 } else
6520                         r = sprintf(buf, "X\n");
6521         } else
6522                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6523
6524         mutex_unlock(&trace_types_lock);
6525
6526         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6527         return ret;
6528 }
6529
6530 static ssize_t
6531 tracing_entries_write(struct file *filp, const char __user *ubuf,
6532                       size_t cnt, loff_t *ppos)
6533 {
6534         struct inode *inode = file_inode(filp);
6535         struct trace_array *tr = inode->i_private;
6536         unsigned long val;
6537         int ret;
6538
6539         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6540         if (ret)
6541                 return ret;
6542
6543         /* must have at least 1 entry */
6544         if (!val)
6545                 return -EINVAL;
6546
6547         /* value is in KB */
6548         val <<= 10;
6549         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6550         if (ret < 0)
6551                 return ret;
6552
6553         *ppos += cnt;
6554
6555         return cnt;
6556 }
6557
6558 static ssize_t
6559 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6560                                 size_t cnt, loff_t *ppos)
6561 {
6562         struct trace_array *tr = filp->private_data;
6563         char buf[64];
6564         int r, cpu;
6565         unsigned long size = 0, expanded_size = 0;
6566
6567         mutex_lock(&trace_types_lock);
6568         for_each_tracing_cpu(cpu) {
6569                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6570                 if (!ring_buffer_expanded)
6571                         expanded_size += trace_buf_size >> 10;
6572         }
6573         if (ring_buffer_expanded)
6574                 r = sprintf(buf, "%lu\n", size);
6575         else
6576                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6577         mutex_unlock(&trace_types_lock);
6578
6579         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6580 }
6581
6582 static ssize_t
6583 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6584                           size_t cnt, loff_t *ppos)
6585 {
6586         /*
6587          * There is no need to read what the user has written, this function
6588          * is just to make sure that there is no error when "echo" is used
6589          */
6590
6591         *ppos += cnt;
6592
6593         return cnt;
6594 }
6595
6596 static int
6597 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6598 {
6599         struct trace_array *tr = inode->i_private;
6600
6601         /* disable tracing ? */
6602         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6603                 tracer_tracing_off(tr);
6604         /* resize the ring buffer to 0 */
6605         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6606
6607         trace_array_put(tr);
6608
6609         return 0;
6610 }
6611
6612 static ssize_t
6613 tracing_mark_write(struct file *filp, const char __user *ubuf,
6614                                         size_t cnt, loff_t *fpos)
6615 {
6616         struct trace_array *tr = filp->private_data;
6617         struct ring_buffer_event *event;
6618         enum event_trigger_type tt = ETT_NONE;
6619         struct trace_buffer *buffer;
6620         struct print_entry *entry;
6621         unsigned long irq_flags;
6622         ssize_t written;
6623         int size;
6624         int len;
6625
6626 /* Used in tracing_mark_raw_write() as well */
6627 #define FAULTED_STR "<faulted>"
6628 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6629
6630         if (tracing_disabled)
6631                 return -EINVAL;
6632
6633         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6634                 return -EINVAL;
6635
6636         if (cnt > TRACE_BUF_SIZE)
6637                 cnt = TRACE_BUF_SIZE;
6638
6639         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6640
6641         local_save_flags(irq_flags);
6642         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6643
6644         /* If less than "<faulted>", then make sure we can still add that */
6645         if (cnt < FAULTED_SIZE)
6646                 size += FAULTED_SIZE - cnt;
6647
6648         buffer = tr->array_buffer.buffer;
6649         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6650                                             irq_flags, preempt_count());
6651         if (unlikely(!event))
6652                 /* Ring buffer disabled, return as if not open for write */
6653                 return -EBADF;
6654
6655         entry = ring_buffer_event_data(event);
6656         entry->ip = _THIS_IP_;
6657
6658         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6659         if (len) {
6660                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6661                 cnt = FAULTED_SIZE;
6662                 written = -EFAULT;
6663         } else
6664                 written = cnt;
6665         len = cnt;
6666
6667         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6668                 /* do not add \n before testing triggers, but add \0 */
6669                 entry->buf[cnt] = '\0';
6670                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6671         }
6672
6673         if (entry->buf[cnt - 1] != '\n') {
6674                 entry->buf[cnt] = '\n';
6675                 entry->buf[cnt + 1] = '\0';
6676         } else
6677                 entry->buf[cnt] = '\0';
6678
6679         __buffer_unlock_commit(buffer, event);
6680
6681         if (tt)
6682                 event_triggers_post_call(tr->trace_marker_file, tt);
6683
6684         if (written > 0)
6685                 *fpos += written;
6686
6687         return written;
6688 }
6689
6690 /* Limit it for now to 3K (including tag) */
6691 #define RAW_DATA_MAX_SIZE (1024*3)
6692
6693 static ssize_t
6694 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6695                                         size_t cnt, loff_t *fpos)
6696 {
6697         struct trace_array *tr = filp->private_data;
6698         struct ring_buffer_event *event;
6699         struct trace_buffer *buffer;
6700         struct raw_data_entry *entry;
6701         unsigned long irq_flags;
6702         ssize_t written;
6703         int size;
6704         int len;
6705
6706 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6707
6708         if (tracing_disabled)
6709                 return -EINVAL;
6710
6711         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6712                 return -EINVAL;
6713
6714         /* The marker must at least have a tag id */
6715         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6716                 return -EINVAL;
6717
6718         if (cnt > TRACE_BUF_SIZE)
6719                 cnt = TRACE_BUF_SIZE;
6720
6721         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6722
6723         local_save_flags(irq_flags);
6724         size = sizeof(*entry) + cnt;
6725         if (cnt < FAULT_SIZE_ID)
6726                 size += FAULT_SIZE_ID - cnt;
6727
6728         buffer = tr->array_buffer.buffer;
6729         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6730                                             irq_flags, preempt_count());
6731         if (!event)
6732                 /* Ring buffer disabled, return as if not open for write */
6733                 return -EBADF;
6734
6735         entry = ring_buffer_event_data(event);
6736
6737         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6738         if (len) {
6739                 entry->id = -1;
6740                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6741                 written = -EFAULT;
6742         } else
6743                 written = cnt;
6744
6745         __buffer_unlock_commit(buffer, event);
6746
6747         if (written > 0)
6748                 *fpos += written;
6749
6750         return written;
6751 }
6752
6753 static int tracing_clock_show(struct seq_file *m, void *v)
6754 {
6755         struct trace_array *tr = m->private;
6756         int i;
6757
6758         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6759                 seq_printf(m,
6760                         "%s%s%s%s", i ? " " : "",
6761                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6762                         i == tr->clock_id ? "]" : "");
6763         seq_putc(m, '\n');
6764
6765         return 0;
6766 }
6767
6768 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6769 {
6770         int i;
6771
6772         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6773                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6774                         break;
6775         }
6776         if (i == ARRAY_SIZE(trace_clocks))
6777                 return -EINVAL;
6778
6779         mutex_lock(&trace_types_lock);
6780
6781         tr->clock_id = i;
6782
6783         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6784
6785         /*
6786          * New clock may not be consistent with the previous clock.
6787          * Reset the buffer so that it doesn't have incomparable timestamps.
6788          */
6789         tracing_reset_online_cpus(&tr->array_buffer);
6790
6791 #ifdef CONFIG_TRACER_MAX_TRACE
6792         if (tr->max_buffer.buffer)
6793                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6794         tracing_reset_online_cpus(&tr->max_buffer);
6795 #endif
6796
6797         mutex_unlock(&trace_types_lock);
6798
6799         return 0;
6800 }
6801
6802 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6803                                    size_t cnt, loff_t *fpos)
6804 {
6805         struct seq_file *m = filp->private_data;
6806         struct trace_array *tr = m->private;
6807         char buf[64];
6808         const char *clockstr;
6809         int ret;
6810
6811         if (cnt >= sizeof(buf))
6812                 return -EINVAL;
6813
6814         if (copy_from_user(buf, ubuf, cnt))
6815                 return -EFAULT;
6816
6817         buf[cnt] = 0;
6818
6819         clockstr = strstrip(buf);
6820
6821         ret = tracing_set_clock(tr, clockstr);
6822         if (ret)
6823                 return ret;
6824
6825         *fpos += cnt;
6826
6827         return cnt;
6828 }
6829
6830 static int tracing_clock_open(struct inode *inode, struct file *file)
6831 {
6832         struct trace_array *tr = inode->i_private;
6833         int ret;
6834
6835         ret = tracing_check_open_get_tr(tr);
6836         if (ret)
6837                 return ret;
6838
6839         ret = single_open(file, tracing_clock_show, inode->i_private);
6840         if (ret < 0)
6841                 trace_array_put(tr);
6842
6843         return ret;
6844 }
6845
6846 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6847 {
6848         struct trace_array *tr = m->private;
6849
6850         mutex_lock(&trace_types_lock);
6851
6852         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6853                 seq_puts(m, "delta [absolute]\n");
6854         else
6855                 seq_puts(m, "[delta] absolute\n");
6856
6857         mutex_unlock(&trace_types_lock);
6858
6859         return 0;
6860 }
6861
6862 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6863 {
6864         struct trace_array *tr = inode->i_private;
6865         int ret;
6866
6867         ret = tracing_check_open_get_tr(tr);
6868         if (ret)
6869                 return ret;
6870
6871         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6872         if (ret < 0)
6873                 trace_array_put(tr);
6874
6875         return ret;
6876 }
6877
6878 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6879 {
6880         int ret = 0;
6881
6882         mutex_lock(&trace_types_lock);
6883
6884         if (abs && tr->time_stamp_abs_ref++)
6885                 goto out;
6886
6887         if (!abs) {
6888                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6889                         ret = -EINVAL;
6890                         goto out;
6891                 }
6892
6893                 if (--tr->time_stamp_abs_ref)
6894                         goto out;
6895         }
6896
6897         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6898
6899 #ifdef CONFIG_TRACER_MAX_TRACE
6900         if (tr->max_buffer.buffer)
6901                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6902 #endif
6903  out:
6904         mutex_unlock(&trace_types_lock);
6905
6906         return ret;
6907 }
6908
6909 struct ftrace_buffer_info {
6910         struct trace_iterator   iter;
6911         void                    *spare;
6912         unsigned int            spare_cpu;
6913         unsigned int            read;
6914 };
6915
6916 #ifdef CONFIG_TRACER_SNAPSHOT
6917 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6918 {
6919         struct trace_array *tr = inode->i_private;
6920         struct trace_iterator *iter;
6921         struct seq_file *m;
6922         int ret;
6923
6924         ret = tracing_check_open_get_tr(tr);
6925         if (ret)
6926                 return ret;
6927
6928         if (file->f_mode & FMODE_READ) {
6929                 iter = __tracing_open(inode, file, true);
6930                 if (IS_ERR(iter))
6931                         ret = PTR_ERR(iter);
6932         } else {
6933                 /* Writes still need the seq_file to hold the private data */
6934                 ret = -ENOMEM;
6935                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6936                 if (!m)
6937                         goto out;
6938                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6939                 if (!iter) {
6940                         kfree(m);
6941                         goto out;
6942                 }
6943                 ret = 0;
6944
6945                 iter->tr = tr;
6946                 iter->array_buffer = &tr->max_buffer;
6947                 iter->cpu_file = tracing_get_cpu(inode);
6948                 m->private = iter;
6949                 file->private_data = m;
6950         }
6951 out:
6952         if (ret < 0)
6953                 trace_array_put(tr);
6954
6955         return ret;
6956 }
6957
6958 static ssize_t
6959 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6960                        loff_t *ppos)
6961 {
6962         struct seq_file *m = filp->private_data;
6963         struct trace_iterator *iter = m->private;
6964         struct trace_array *tr = iter->tr;
6965         unsigned long val;
6966         int ret;
6967
6968         ret = tracing_update_buffers();
6969         if (ret < 0)
6970                 return ret;
6971
6972         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6973         if (ret)
6974                 return ret;
6975
6976         mutex_lock(&trace_types_lock);
6977
6978         if (tr->current_trace->use_max_tr) {
6979                 ret = -EBUSY;
6980                 goto out;
6981         }
6982
6983         arch_spin_lock(&tr->max_lock);
6984         if (tr->cond_snapshot)
6985                 ret = -EBUSY;
6986         arch_spin_unlock(&tr->max_lock);
6987         if (ret)
6988                 goto out;
6989
6990         switch (val) {
6991         case 0:
6992                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6993                         ret = -EINVAL;
6994                         break;
6995                 }
6996                 if (tr->allocated_snapshot)
6997                         free_snapshot(tr);
6998                 break;
6999         case 1:
7000 /* Only allow per-cpu swap if the ring buffer supports it */
7001 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7002                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7003                         ret = -EINVAL;
7004                         break;
7005                 }
7006 #endif
7007                 if (tr->allocated_snapshot)
7008                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7009                                         &tr->array_buffer, iter->cpu_file);
7010                 else
7011                         ret = tracing_alloc_snapshot_instance(tr);
7012                 if (ret < 0)
7013                         break;
7014                 local_irq_disable();
7015                 /* Now, we're going to swap */
7016                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7017                         update_max_tr(tr, current, smp_processor_id(), NULL);
7018                 else
7019                         update_max_tr_single(tr, current, iter->cpu_file);
7020                 local_irq_enable();
7021                 break;
7022         default:
7023                 if (tr->allocated_snapshot) {
7024                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7025                                 tracing_reset_online_cpus(&tr->max_buffer);
7026                         else
7027                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7028                 }
7029                 break;
7030         }
7031
7032         if (ret >= 0) {
7033                 *ppos += cnt;
7034                 ret = cnt;
7035         }
7036 out:
7037         mutex_unlock(&trace_types_lock);
7038         return ret;
7039 }
7040
7041 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7042 {
7043         struct seq_file *m = file->private_data;
7044         int ret;
7045
7046         ret = tracing_release(inode, file);
7047
7048         if (file->f_mode & FMODE_READ)
7049                 return ret;
7050
7051         /* If write only, the seq_file is just a stub */
7052         if (m)
7053                 kfree(m->private);
7054         kfree(m);
7055
7056         return 0;
7057 }
7058
7059 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7060 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7061                                     size_t count, loff_t *ppos);
7062 static int tracing_buffers_release(struct inode *inode, struct file *file);
7063 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7064                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7065
7066 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7067 {
7068         struct ftrace_buffer_info *info;
7069         int ret;
7070
7071         /* The following checks for tracefs lockdown */
7072         ret = tracing_buffers_open(inode, filp);
7073         if (ret < 0)
7074                 return ret;
7075
7076         info = filp->private_data;
7077
7078         if (info->iter.trace->use_max_tr) {
7079                 tracing_buffers_release(inode, filp);
7080                 return -EBUSY;
7081         }
7082
7083         info->iter.snapshot = true;
7084         info->iter.array_buffer = &info->iter.tr->max_buffer;
7085
7086         return ret;
7087 }
7088
7089 #endif /* CONFIG_TRACER_SNAPSHOT */
7090
7091
7092 static const struct file_operations tracing_thresh_fops = {
7093         .open           = tracing_open_generic,
7094         .read           = tracing_thresh_read,
7095         .write          = tracing_thresh_write,
7096         .llseek         = generic_file_llseek,
7097 };
7098
7099 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7100 static const struct file_operations tracing_max_lat_fops = {
7101         .open           = tracing_open_generic,
7102         .read           = tracing_max_lat_read,
7103         .write          = tracing_max_lat_write,
7104         .llseek         = generic_file_llseek,
7105 };
7106 #endif
7107
7108 static const struct file_operations set_tracer_fops = {
7109         .open           = tracing_open_generic,
7110         .read           = tracing_set_trace_read,
7111         .write          = tracing_set_trace_write,
7112         .llseek         = generic_file_llseek,
7113 };
7114
7115 static const struct file_operations tracing_pipe_fops = {
7116         .open           = tracing_open_pipe,
7117         .poll           = tracing_poll_pipe,
7118         .read           = tracing_read_pipe,
7119         .splice_read    = tracing_splice_read_pipe,
7120         .release        = tracing_release_pipe,
7121         .llseek         = no_llseek,
7122 };
7123
7124 static const struct file_operations tracing_entries_fops = {
7125         .open           = tracing_open_generic_tr,
7126         .read           = tracing_entries_read,
7127         .write          = tracing_entries_write,
7128         .llseek         = generic_file_llseek,
7129         .release        = tracing_release_generic_tr,
7130 };
7131
7132 static const struct file_operations tracing_total_entries_fops = {
7133         .open           = tracing_open_generic_tr,
7134         .read           = tracing_total_entries_read,
7135         .llseek         = generic_file_llseek,
7136         .release        = tracing_release_generic_tr,
7137 };
7138
7139 static const struct file_operations tracing_free_buffer_fops = {
7140         .open           = tracing_open_generic_tr,
7141         .write          = tracing_free_buffer_write,
7142         .release        = tracing_free_buffer_release,
7143 };
7144
7145 static const struct file_operations tracing_mark_fops = {
7146         .open           = tracing_open_generic_tr,
7147         .write          = tracing_mark_write,
7148         .llseek         = generic_file_llseek,
7149         .release        = tracing_release_generic_tr,
7150 };
7151
7152 static const struct file_operations tracing_mark_raw_fops = {
7153         .open           = tracing_open_generic_tr,
7154         .write          = tracing_mark_raw_write,
7155         .llseek         = generic_file_llseek,
7156         .release        = tracing_release_generic_tr,
7157 };
7158
7159 static const struct file_operations trace_clock_fops = {
7160         .open           = tracing_clock_open,
7161         .read           = seq_read,
7162         .llseek         = seq_lseek,
7163         .release        = tracing_single_release_tr,
7164         .write          = tracing_clock_write,
7165 };
7166
7167 static const struct file_operations trace_time_stamp_mode_fops = {
7168         .open           = tracing_time_stamp_mode_open,
7169         .read           = seq_read,
7170         .llseek         = seq_lseek,
7171         .release        = tracing_single_release_tr,
7172 };
7173
7174 #ifdef CONFIG_TRACER_SNAPSHOT
7175 static const struct file_operations snapshot_fops = {
7176         .open           = tracing_snapshot_open,
7177         .read           = seq_read,
7178         .write          = tracing_snapshot_write,
7179         .llseek         = tracing_lseek,
7180         .release        = tracing_snapshot_release,
7181 };
7182
7183 static const struct file_operations snapshot_raw_fops = {
7184         .open           = snapshot_raw_open,
7185         .read           = tracing_buffers_read,
7186         .release        = tracing_buffers_release,
7187         .splice_read    = tracing_buffers_splice_read,
7188         .llseek         = no_llseek,
7189 };
7190
7191 #endif /* CONFIG_TRACER_SNAPSHOT */
7192
7193 #define TRACING_LOG_ERRS_MAX    8
7194 #define TRACING_LOG_LOC_MAX     128
7195
7196 #define CMD_PREFIX "  Command: "
7197
7198 struct err_info {
7199         const char      **errs; /* ptr to loc-specific array of err strings */
7200         u8              type;   /* index into errs -> specific err string */
7201         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7202         u64             ts;
7203 };
7204
7205 struct tracing_log_err {
7206         struct list_head        list;
7207         struct err_info         info;
7208         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7209         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7210 };
7211
7212 static DEFINE_MUTEX(tracing_err_log_lock);
7213
7214 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7215 {
7216         struct tracing_log_err *err;
7217
7218         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7219                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7220                 if (!err)
7221                         err = ERR_PTR(-ENOMEM);
7222                 tr->n_err_log_entries++;
7223
7224                 return err;
7225         }
7226
7227         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7228         list_del(&err->list);
7229
7230         return err;
7231 }
7232
7233 /**
7234  * err_pos - find the position of a string within a command for error careting
7235  * @cmd: The tracing command that caused the error
7236  * @str: The string to position the caret at within @cmd
7237  *
7238  * Finds the position of the first occurence of @str within @cmd.  The
7239  * return value can be passed to tracing_log_err() for caret placement
7240  * within @cmd.
7241  *
7242  * Returns the index within @cmd of the first occurence of @str or 0
7243  * if @str was not found.
7244  */
7245 unsigned int err_pos(char *cmd, const char *str)
7246 {
7247         char *found;
7248
7249         if (WARN_ON(!strlen(cmd)))
7250                 return 0;
7251
7252         found = strstr(cmd, str);
7253         if (found)
7254                 return found - cmd;
7255
7256         return 0;
7257 }
7258
7259 /**
7260  * tracing_log_err - write an error to the tracing error log
7261  * @tr: The associated trace array for the error (NULL for top level array)
7262  * @loc: A string describing where the error occurred
7263  * @cmd: The tracing command that caused the error
7264  * @errs: The array of loc-specific static error strings
7265  * @type: The index into errs[], which produces the specific static err string
7266  * @pos: The position the caret should be placed in the cmd
7267  *
7268  * Writes an error into tracing/error_log of the form:
7269  *
7270  * <loc>: error: <text>
7271  *   Command: <cmd>
7272  *              ^
7273  *
7274  * tracing/error_log is a small log file containing the last
7275  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7276  * unless there has been a tracing error, and the error log can be
7277  * cleared and have its memory freed by writing the empty string in
7278  * truncation mode to it i.e. echo > tracing/error_log.
7279  *
7280  * NOTE: the @errs array along with the @type param are used to
7281  * produce a static error string - this string is not copied and saved
7282  * when the error is logged - only a pointer to it is saved.  See
7283  * existing callers for examples of how static strings are typically
7284  * defined for use with tracing_log_err().
7285  */
7286 void tracing_log_err(struct trace_array *tr,
7287                      const char *loc, const char *cmd,
7288                      const char **errs, u8 type, u8 pos)
7289 {
7290         struct tracing_log_err *err;
7291
7292         if (!tr)
7293                 tr = &global_trace;
7294
7295         mutex_lock(&tracing_err_log_lock);
7296         err = get_tracing_log_err(tr);
7297         if (PTR_ERR(err) == -ENOMEM) {
7298                 mutex_unlock(&tracing_err_log_lock);
7299                 return;
7300         }
7301
7302         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7303         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7304
7305         err->info.errs = errs;
7306         err->info.type = type;
7307         err->info.pos = pos;
7308         err->info.ts = local_clock();
7309
7310         list_add_tail(&err->list, &tr->err_log);
7311         mutex_unlock(&tracing_err_log_lock);
7312 }
7313
7314 static void clear_tracing_err_log(struct trace_array *tr)
7315 {
7316         struct tracing_log_err *err, *next;
7317
7318         mutex_lock(&tracing_err_log_lock);
7319         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7320                 list_del(&err->list);
7321                 kfree(err);
7322         }
7323
7324         tr->n_err_log_entries = 0;
7325         mutex_unlock(&tracing_err_log_lock);
7326 }
7327
7328 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7329 {
7330         struct trace_array *tr = m->private;
7331
7332         mutex_lock(&tracing_err_log_lock);
7333
7334         return seq_list_start(&tr->err_log, *pos);
7335 }
7336
7337 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7338 {
7339         struct trace_array *tr = m->private;
7340
7341         return seq_list_next(v, &tr->err_log, pos);
7342 }
7343
7344 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7345 {
7346         mutex_unlock(&tracing_err_log_lock);
7347 }
7348
7349 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7350 {
7351         u8 i;
7352
7353         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7354                 seq_putc(m, ' ');
7355         for (i = 0; i < pos; i++)
7356                 seq_putc(m, ' ');
7357         seq_puts(m, "^\n");
7358 }
7359
7360 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7361 {
7362         struct tracing_log_err *err = v;
7363
7364         if (err) {
7365                 const char *err_text = err->info.errs[err->info.type];
7366                 u64 sec = err->info.ts;
7367                 u32 nsec;
7368
7369                 nsec = do_div(sec, NSEC_PER_SEC);
7370                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7371                            err->loc, err_text);
7372                 seq_printf(m, "%s", err->cmd);
7373                 tracing_err_log_show_pos(m, err->info.pos);
7374         }
7375
7376         return 0;
7377 }
7378
7379 static const struct seq_operations tracing_err_log_seq_ops = {
7380         .start  = tracing_err_log_seq_start,
7381         .next   = tracing_err_log_seq_next,
7382         .stop   = tracing_err_log_seq_stop,
7383         .show   = tracing_err_log_seq_show
7384 };
7385
7386 static int tracing_err_log_open(struct inode *inode, struct file *file)
7387 {
7388         struct trace_array *tr = inode->i_private;
7389         int ret = 0;
7390
7391         ret = tracing_check_open_get_tr(tr);
7392         if (ret)
7393                 return ret;
7394
7395         /* If this file was opened for write, then erase contents */
7396         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7397                 clear_tracing_err_log(tr);
7398
7399         if (file->f_mode & FMODE_READ) {
7400                 ret = seq_open(file, &tracing_err_log_seq_ops);
7401                 if (!ret) {
7402                         struct seq_file *m = file->private_data;
7403                         m->private = tr;
7404                 } else {
7405                         trace_array_put(tr);
7406                 }
7407         }
7408         return ret;
7409 }
7410
7411 static ssize_t tracing_err_log_write(struct file *file,
7412                                      const char __user *buffer,
7413                                      size_t count, loff_t *ppos)
7414 {
7415         return count;
7416 }
7417
7418 static int tracing_err_log_release(struct inode *inode, struct file *file)
7419 {
7420         struct trace_array *tr = inode->i_private;
7421
7422         trace_array_put(tr);
7423
7424         if (file->f_mode & FMODE_READ)
7425                 seq_release(inode, file);
7426
7427         return 0;
7428 }
7429
7430 static const struct file_operations tracing_err_log_fops = {
7431         .open           = tracing_err_log_open,
7432         .write          = tracing_err_log_write,
7433         .read           = seq_read,
7434         .llseek         = seq_lseek,
7435         .release        = tracing_err_log_release,
7436 };
7437
7438 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7439 {
7440         struct trace_array *tr = inode->i_private;
7441         struct ftrace_buffer_info *info;
7442         int ret;
7443
7444         ret = tracing_check_open_get_tr(tr);
7445         if (ret)
7446                 return ret;
7447
7448         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7449         if (!info) {
7450                 trace_array_put(tr);
7451                 return -ENOMEM;
7452         }
7453
7454         mutex_lock(&trace_types_lock);
7455
7456         info->iter.tr           = tr;
7457         info->iter.cpu_file     = tracing_get_cpu(inode);
7458         info->iter.trace        = tr->current_trace;
7459         info->iter.array_buffer = &tr->array_buffer;
7460         info->spare             = NULL;
7461         /* Force reading ring buffer for first read */
7462         info->read              = (unsigned int)-1;
7463
7464         filp->private_data = info;
7465
7466         tr->trace_ref++;
7467
7468         mutex_unlock(&trace_types_lock);
7469
7470         ret = nonseekable_open(inode, filp);
7471         if (ret < 0)
7472                 trace_array_put(tr);
7473
7474         return ret;
7475 }
7476
7477 static __poll_t
7478 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7479 {
7480         struct ftrace_buffer_info *info = filp->private_data;
7481         struct trace_iterator *iter = &info->iter;
7482
7483         return trace_poll(iter, filp, poll_table);
7484 }
7485
7486 static ssize_t
7487 tracing_buffers_read(struct file *filp, char __user *ubuf,
7488                      size_t count, loff_t *ppos)
7489 {
7490         struct ftrace_buffer_info *info = filp->private_data;
7491         struct trace_iterator *iter = &info->iter;
7492         ssize_t ret = 0;
7493         ssize_t size;
7494
7495         if (!count)
7496                 return 0;
7497
7498 #ifdef CONFIG_TRACER_MAX_TRACE
7499         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7500                 return -EBUSY;
7501 #endif
7502
7503         if (!info->spare) {
7504                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7505                                                           iter->cpu_file);
7506                 if (IS_ERR(info->spare)) {
7507                         ret = PTR_ERR(info->spare);
7508                         info->spare = NULL;
7509                 } else {
7510                         info->spare_cpu = iter->cpu_file;
7511                 }
7512         }
7513         if (!info->spare)
7514                 return ret;
7515
7516         /* Do we have previous read data to read? */
7517         if (info->read < PAGE_SIZE)
7518                 goto read;
7519
7520  again:
7521         trace_access_lock(iter->cpu_file);
7522         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7523                                     &info->spare,
7524                                     count,
7525                                     iter->cpu_file, 0);
7526         trace_access_unlock(iter->cpu_file);
7527
7528         if (ret < 0) {
7529                 if (trace_empty(iter)) {
7530                         if ((filp->f_flags & O_NONBLOCK))
7531                                 return -EAGAIN;
7532
7533                         ret = wait_on_pipe(iter, 0);
7534                         if (ret)
7535                                 return ret;
7536
7537                         goto again;
7538                 }
7539                 return 0;
7540         }
7541
7542         info->read = 0;
7543  read:
7544         size = PAGE_SIZE - info->read;
7545         if (size > count)
7546                 size = count;
7547
7548         ret = copy_to_user(ubuf, info->spare + info->read, size);
7549         if (ret == size)
7550                 return -EFAULT;
7551
7552         size -= ret;
7553
7554         *ppos += size;
7555         info->read += size;
7556
7557         return size;
7558 }
7559
7560 static int tracing_buffers_release(struct inode *inode, struct file *file)
7561 {
7562         struct ftrace_buffer_info *info = file->private_data;
7563         struct trace_iterator *iter = &info->iter;
7564
7565         mutex_lock(&trace_types_lock);
7566
7567         iter->tr->trace_ref--;
7568
7569         __trace_array_put(iter->tr);
7570
7571         if (info->spare)
7572                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7573                                            info->spare_cpu, info->spare);
7574         kvfree(info);
7575
7576         mutex_unlock(&trace_types_lock);
7577
7578         return 0;
7579 }
7580
7581 struct buffer_ref {
7582         struct trace_buffer     *buffer;
7583         void                    *page;
7584         int                     cpu;
7585         refcount_t              refcount;
7586 };
7587
7588 static void buffer_ref_release(struct buffer_ref *ref)
7589 {
7590         if (!refcount_dec_and_test(&ref->refcount))
7591                 return;
7592         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7593         kfree(ref);
7594 }
7595
7596 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7597                                     struct pipe_buffer *buf)
7598 {
7599         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7600
7601         buffer_ref_release(ref);
7602         buf->private = 0;
7603 }
7604
7605 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7606                                 struct pipe_buffer *buf)
7607 {
7608         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7609
7610         if (refcount_read(&ref->refcount) > INT_MAX/2)
7611                 return false;
7612
7613         refcount_inc(&ref->refcount);
7614         return true;
7615 }
7616
7617 /* Pipe buffer operations for a buffer. */
7618 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7619         .release                = buffer_pipe_buf_release,
7620         .get                    = buffer_pipe_buf_get,
7621 };
7622
7623 /*
7624  * Callback from splice_to_pipe(), if we need to release some pages
7625  * at the end of the spd in case we error'ed out in filling the pipe.
7626  */
7627 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7628 {
7629         struct buffer_ref *ref =
7630                 (struct buffer_ref *)spd->partial[i].private;
7631
7632         buffer_ref_release(ref);
7633         spd->partial[i].private = 0;
7634 }
7635
7636 static ssize_t
7637 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7638                             struct pipe_inode_info *pipe, size_t len,
7639                             unsigned int flags)
7640 {
7641         struct ftrace_buffer_info *info = file->private_data;
7642         struct trace_iterator *iter = &info->iter;
7643         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7644         struct page *pages_def[PIPE_DEF_BUFFERS];
7645         struct splice_pipe_desc spd = {
7646                 .pages          = pages_def,
7647                 .partial        = partial_def,
7648                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7649                 .ops            = &buffer_pipe_buf_ops,
7650                 .spd_release    = buffer_spd_release,
7651         };
7652         struct buffer_ref *ref;
7653         int entries, i;
7654         ssize_t ret = 0;
7655
7656 #ifdef CONFIG_TRACER_MAX_TRACE
7657         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7658                 return -EBUSY;
7659 #endif
7660
7661         if (*ppos & (PAGE_SIZE - 1))
7662                 return -EINVAL;
7663
7664         if (len & (PAGE_SIZE - 1)) {
7665                 if (len < PAGE_SIZE)
7666                         return -EINVAL;
7667                 len &= PAGE_MASK;
7668         }
7669
7670         if (splice_grow_spd(pipe, &spd))
7671                 return -ENOMEM;
7672
7673  again:
7674         trace_access_lock(iter->cpu_file);
7675         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7676
7677         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7678                 struct page *page;
7679                 int r;
7680
7681                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7682                 if (!ref) {
7683                         ret = -ENOMEM;
7684                         break;
7685                 }
7686
7687                 refcount_set(&ref->refcount, 1);
7688                 ref->buffer = iter->array_buffer->buffer;
7689                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7690                 if (IS_ERR(ref->page)) {
7691                         ret = PTR_ERR(ref->page);
7692                         ref->page = NULL;
7693                         kfree(ref);
7694                         break;
7695                 }
7696                 ref->cpu = iter->cpu_file;
7697
7698                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7699                                           len, iter->cpu_file, 1);
7700                 if (r < 0) {
7701                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7702                                                    ref->page);
7703                         kfree(ref);
7704                         break;
7705                 }
7706
7707                 page = virt_to_page(ref->page);
7708
7709                 spd.pages[i] = page;
7710                 spd.partial[i].len = PAGE_SIZE;
7711                 spd.partial[i].offset = 0;
7712                 spd.partial[i].private = (unsigned long)ref;
7713                 spd.nr_pages++;
7714                 *ppos += PAGE_SIZE;
7715
7716                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7717         }
7718
7719         trace_access_unlock(iter->cpu_file);
7720         spd.nr_pages = i;
7721
7722         /* did we read anything? */
7723         if (!spd.nr_pages) {
7724                 if (ret)
7725                         goto out;
7726
7727                 ret = -EAGAIN;
7728                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7729                         goto out;
7730
7731                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7732                 if (ret)
7733                         goto out;
7734
7735                 goto again;
7736         }
7737
7738         ret = splice_to_pipe(pipe, &spd);
7739 out:
7740         splice_shrink_spd(&spd);
7741
7742         return ret;
7743 }
7744
7745 static const struct file_operations tracing_buffers_fops = {
7746         .open           = tracing_buffers_open,
7747         .read           = tracing_buffers_read,
7748         .poll           = tracing_buffers_poll,
7749         .release        = tracing_buffers_release,
7750         .splice_read    = tracing_buffers_splice_read,
7751         .llseek         = no_llseek,
7752 };
7753
7754 static ssize_t
7755 tracing_stats_read(struct file *filp, char __user *ubuf,
7756                    size_t count, loff_t *ppos)
7757 {
7758         struct inode *inode = file_inode(filp);
7759         struct trace_array *tr = inode->i_private;
7760         struct array_buffer *trace_buf = &tr->array_buffer;
7761         int cpu = tracing_get_cpu(inode);
7762         struct trace_seq *s;
7763         unsigned long cnt;
7764         unsigned long long t;
7765         unsigned long usec_rem;
7766
7767         s = kmalloc(sizeof(*s), GFP_KERNEL);
7768         if (!s)
7769                 return -ENOMEM;
7770
7771         trace_seq_init(s);
7772
7773         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7774         trace_seq_printf(s, "entries: %ld\n", cnt);
7775
7776         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7777         trace_seq_printf(s, "overrun: %ld\n", cnt);
7778
7779         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7780         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7781
7782         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7783         trace_seq_printf(s, "bytes: %ld\n", cnt);
7784
7785         if (trace_clocks[tr->clock_id].in_ns) {
7786                 /* local or global for trace_clock */
7787                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7788                 usec_rem = do_div(t, USEC_PER_SEC);
7789                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7790                                                                 t, usec_rem);
7791
7792                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7793                 usec_rem = do_div(t, USEC_PER_SEC);
7794                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7795         } else {
7796                 /* counter or tsc mode for trace_clock */
7797                 trace_seq_printf(s, "oldest event ts: %llu\n",
7798                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7799
7800                 trace_seq_printf(s, "now ts: %llu\n",
7801                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7802         }
7803
7804         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7805         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7806
7807         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7808         trace_seq_printf(s, "read events: %ld\n", cnt);
7809
7810         count = simple_read_from_buffer(ubuf, count, ppos,
7811                                         s->buffer, trace_seq_used(s));
7812
7813         kfree(s);
7814
7815         return count;
7816 }
7817
7818 static const struct file_operations tracing_stats_fops = {
7819         .open           = tracing_open_generic_tr,
7820         .read           = tracing_stats_read,
7821         .llseek         = generic_file_llseek,
7822         .release        = tracing_release_generic_tr,
7823 };
7824
7825 #ifdef CONFIG_DYNAMIC_FTRACE
7826
7827 static ssize_t
7828 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7829                   size_t cnt, loff_t *ppos)
7830 {
7831         ssize_t ret;
7832         char *buf;
7833         int r;
7834
7835         /* 256 should be plenty to hold the amount needed */
7836         buf = kmalloc(256, GFP_KERNEL);
7837         if (!buf)
7838                 return -ENOMEM;
7839
7840         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7841                       ftrace_update_tot_cnt,
7842                       ftrace_number_of_pages,
7843                       ftrace_number_of_groups);
7844
7845         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7846         kfree(buf);
7847         return ret;
7848 }
7849
7850 static const struct file_operations tracing_dyn_info_fops = {
7851         .open           = tracing_open_generic,
7852         .read           = tracing_read_dyn_info,
7853         .llseek         = generic_file_llseek,
7854 };
7855 #endif /* CONFIG_DYNAMIC_FTRACE */
7856
7857 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7858 static void
7859 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7860                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7861                 void *data)
7862 {
7863         tracing_snapshot_instance(tr);
7864 }
7865
7866 static void
7867 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7868                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7869                       void *data)
7870 {
7871         struct ftrace_func_mapper *mapper = data;
7872         long *count = NULL;
7873
7874         if (mapper)
7875                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7876
7877         if (count) {
7878
7879                 if (*count <= 0)
7880                         return;
7881
7882                 (*count)--;
7883         }
7884
7885         tracing_snapshot_instance(tr);
7886 }
7887
7888 static int
7889 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7890                       struct ftrace_probe_ops *ops, void *data)
7891 {
7892         struct ftrace_func_mapper *mapper = data;
7893         long *count = NULL;
7894
7895         seq_printf(m, "%ps:", (void *)ip);
7896
7897         seq_puts(m, "snapshot");
7898
7899         if (mapper)
7900                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7901
7902         if (count)
7903                 seq_printf(m, ":count=%ld\n", *count);
7904         else
7905                 seq_puts(m, ":unlimited\n");
7906
7907         return 0;
7908 }
7909
7910 static int
7911 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7912                      unsigned long ip, void *init_data, void **data)
7913 {
7914         struct ftrace_func_mapper *mapper = *data;
7915
7916         if (!mapper) {
7917                 mapper = allocate_ftrace_func_mapper();
7918                 if (!mapper)
7919                         return -ENOMEM;
7920                 *data = mapper;
7921         }
7922
7923         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7924 }
7925
7926 static void
7927 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7928                      unsigned long ip, void *data)
7929 {
7930         struct ftrace_func_mapper *mapper = data;
7931
7932         if (!ip) {
7933                 if (!mapper)
7934                         return;
7935                 free_ftrace_func_mapper(mapper, NULL);
7936                 return;
7937         }
7938
7939         ftrace_func_mapper_remove_ip(mapper, ip);
7940 }
7941
7942 static struct ftrace_probe_ops snapshot_probe_ops = {
7943         .func                   = ftrace_snapshot,
7944         .print                  = ftrace_snapshot_print,
7945 };
7946
7947 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7948         .func                   = ftrace_count_snapshot,
7949         .print                  = ftrace_snapshot_print,
7950         .init                   = ftrace_snapshot_init,
7951         .free                   = ftrace_snapshot_free,
7952 };
7953
7954 static int
7955 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7956                                char *glob, char *cmd, char *param, int enable)
7957 {
7958         struct ftrace_probe_ops *ops;
7959         void *count = (void *)-1;
7960         char *number;
7961         int ret;
7962
7963         if (!tr)
7964                 return -ENODEV;
7965
7966         /* hash funcs only work with set_ftrace_filter */
7967         if (!enable)
7968                 return -EINVAL;
7969
7970         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7971
7972         if (glob[0] == '!')
7973                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7974
7975         if (!param)
7976                 goto out_reg;
7977
7978         number = strsep(&param, ":");
7979
7980         if (!strlen(number))
7981                 goto out_reg;
7982
7983         /*
7984          * We use the callback data field (which is a pointer)
7985          * as our counter.
7986          */
7987         ret = kstrtoul(number, 0, (unsigned long *)&count);
7988         if (ret)
7989                 return ret;
7990
7991  out_reg:
7992         ret = tracing_alloc_snapshot_instance(tr);
7993         if (ret < 0)
7994                 goto out;
7995
7996         ret = register_ftrace_function_probe(glob, tr, ops, count);
7997
7998  out:
7999         return ret < 0 ? ret : 0;
8000 }
8001
8002 static struct ftrace_func_command ftrace_snapshot_cmd = {
8003         .name                   = "snapshot",
8004         .func                   = ftrace_trace_snapshot_callback,
8005 };
8006
8007 static __init int register_snapshot_cmd(void)
8008 {
8009         return register_ftrace_command(&ftrace_snapshot_cmd);
8010 }
8011 #else
8012 static inline __init int register_snapshot_cmd(void) { return 0; }
8013 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8014
8015 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8016 {
8017         if (WARN_ON(!tr->dir))
8018                 return ERR_PTR(-ENODEV);
8019
8020         /* Top directory uses NULL as the parent */
8021         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8022                 return NULL;
8023
8024         /* All sub buffers have a descriptor */
8025         return tr->dir;
8026 }
8027
8028 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8029 {
8030         struct dentry *d_tracer;
8031
8032         if (tr->percpu_dir)
8033                 return tr->percpu_dir;
8034
8035         d_tracer = tracing_get_dentry(tr);
8036         if (IS_ERR(d_tracer))
8037                 return NULL;
8038
8039         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8040
8041         MEM_FAIL(!tr->percpu_dir,
8042                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8043
8044         return tr->percpu_dir;
8045 }
8046
8047 static struct dentry *
8048 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8049                       void *data, long cpu, const struct file_operations *fops)
8050 {
8051         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8052
8053         if (ret) /* See tracing_get_cpu() */
8054                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8055         return ret;
8056 }
8057
8058 static void
8059 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8060 {
8061         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8062         struct dentry *d_cpu;
8063         char cpu_dir[30]; /* 30 characters should be more than enough */
8064
8065         if (!d_percpu)
8066                 return;
8067
8068         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8069         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8070         if (!d_cpu) {
8071                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8072                 return;
8073         }
8074
8075         /* per cpu trace_pipe */
8076         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8077                                 tr, cpu, &tracing_pipe_fops);
8078
8079         /* per cpu trace */
8080         trace_create_cpu_file("trace", 0644, d_cpu,
8081                                 tr, cpu, &tracing_fops);
8082
8083         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8084                                 tr, cpu, &tracing_buffers_fops);
8085
8086         trace_create_cpu_file("stats", 0444, d_cpu,
8087                                 tr, cpu, &tracing_stats_fops);
8088
8089         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8090                                 tr, cpu, &tracing_entries_fops);
8091
8092 #ifdef CONFIG_TRACER_SNAPSHOT
8093         trace_create_cpu_file("snapshot", 0644, d_cpu,
8094                                 tr, cpu, &snapshot_fops);
8095
8096         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8097                                 tr, cpu, &snapshot_raw_fops);
8098 #endif
8099 }
8100
8101 #ifdef CONFIG_FTRACE_SELFTEST
8102 /* Let selftest have access to static functions in this file */
8103 #include "trace_selftest.c"
8104 #endif
8105
8106 static ssize_t
8107 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8108                         loff_t *ppos)
8109 {
8110         struct trace_option_dentry *topt = filp->private_data;
8111         char *buf;
8112
8113         if (topt->flags->val & topt->opt->bit)
8114                 buf = "1\n";
8115         else
8116                 buf = "0\n";
8117
8118         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8119 }
8120
8121 static ssize_t
8122 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8123                          loff_t *ppos)
8124 {
8125         struct trace_option_dentry *topt = filp->private_data;
8126         unsigned long val;
8127         int ret;
8128
8129         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8130         if (ret)
8131                 return ret;
8132
8133         if (val != 0 && val != 1)
8134                 return -EINVAL;
8135
8136         if (!!(topt->flags->val & topt->opt->bit) != val) {
8137                 mutex_lock(&trace_types_lock);
8138                 ret = __set_tracer_option(topt->tr, topt->flags,
8139                                           topt->opt, !val);
8140                 mutex_unlock(&trace_types_lock);
8141                 if (ret)
8142                         return ret;
8143         }
8144
8145         *ppos += cnt;
8146
8147         return cnt;
8148 }
8149
8150
8151 static const struct file_operations trace_options_fops = {
8152         .open = tracing_open_generic,
8153         .read = trace_options_read,
8154         .write = trace_options_write,
8155         .llseek = generic_file_llseek,
8156 };
8157
8158 /*
8159  * In order to pass in both the trace_array descriptor as well as the index
8160  * to the flag that the trace option file represents, the trace_array
8161  * has a character array of trace_flags_index[], which holds the index
8162  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8163  * The address of this character array is passed to the flag option file
8164  * read/write callbacks.
8165  *
8166  * In order to extract both the index and the trace_array descriptor,
8167  * get_tr_index() uses the following algorithm.
8168  *
8169  *   idx = *ptr;
8170  *
8171  * As the pointer itself contains the address of the index (remember
8172  * index[1] == 1).
8173  *
8174  * Then to get the trace_array descriptor, by subtracting that index
8175  * from the ptr, we get to the start of the index itself.
8176  *
8177  *   ptr - idx == &index[0]
8178  *
8179  * Then a simple container_of() from that pointer gets us to the
8180  * trace_array descriptor.
8181  */
8182 static void get_tr_index(void *data, struct trace_array **ptr,
8183                          unsigned int *pindex)
8184 {
8185         *pindex = *(unsigned char *)data;
8186
8187         *ptr = container_of(data - *pindex, struct trace_array,
8188                             trace_flags_index);
8189 }
8190
8191 static ssize_t
8192 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8193                         loff_t *ppos)
8194 {
8195         void *tr_index = filp->private_data;
8196         struct trace_array *tr;
8197         unsigned int index;
8198         char *buf;
8199
8200         get_tr_index(tr_index, &tr, &index);
8201
8202         if (tr->trace_flags & (1 << index))
8203                 buf = "1\n";
8204         else
8205                 buf = "0\n";
8206
8207         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8208 }
8209
8210 static ssize_t
8211 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8212                          loff_t *ppos)
8213 {
8214         void *tr_index = filp->private_data;
8215         struct trace_array *tr;
8216         unsigned int index;
8217         unsigned long val;
8218         int ret;
8219
8220         get_tr_index(tr_index, &tr, &index);
8221
8222         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8223         if (ret)
8224                 return ret;
8225
8226         if (val != 0 && val != 1)
8227                 return -EINVAL;
8228
8229         mutex_lock(&event_mutex);
8230         mutex_lock(&trace_types_lock);
8231         ret = set_tracer_flag(tr, 1 << index, val);
8232         mutex_unlock(&trace_types_lock);
8233         mutex_unlock(&event_mutex);
8234
8235         if (ret < 0)
8236                 return ret;
8237
8238         *ppos += cnt;
8239
8240         return cnt;
8241 }
8242
8243 static const struct file_operations trace_options_core_fops = {
8244         .open = tracing_open_generic,
8245         .read = trace_options_core_read,
8246         .write = trace_options_core_write,
8247         .llseek = generic_file_llseek,
8248 };
8249
8250 struct dentry *trace_create_file(const char *name,
8251                                  umode_t mode,
8252                                  struct dentry *parent,
8253                                  void *data,
8254                                  const struct file_operations *fops)
8255 {
8256         struct dentry *ret;
8257
8258         ret = tracefs_create_file(name, mode, parent, data, fops);
8259         if (!ret)
8260                 pr_warn("Could not create tracefs '%s' entry\n", name);
8261
8262         return ret;
8263 }
8264
8265
8266 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8267 {
8268         struct dentry *d_tracer;
8269
8270         if (tr->options)
8271                 return tr->options;
8272
8273         d_tracer = tracing_get_dentry(tr);
8274         if (IS_ERR(d_tracer))
8275                 return NULL;
8276
8277         tr->options = tracefs_create_dir("options", d_tracer);
8278         if (!tr->options) {
8279                 pr_warn("Could not create tracefs directory 'options'\n");
8280                 return NULL;
8281         }
8282
8283         return tr->options;
8284 }
8285
8286 static void
8287 create_trace_option_file(struct trace_array *tr,
8288                          struct trace_option_dentry *topt,
8289                          struct tracer_flags *flags,
8290                          struct tracer_opt *opt)
8291 {
8292         struct dentry *t_options;
8293
8294         t_options = trace_options_init_dentry(tr);
8295         if (!t_options)
8296                 return;
8297
8298         topt->flags = flags;
8299         topt->opt = opt;
8300         topt->tr = tr;
8301
8302         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8303                                     &trace_options_fops);
8304
8305 }
8306
8307 static void
8308 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8309 {
8310         struct trace_option_dentry *topts;
8311         struct trace_options *tr_topts;
8312         struct tracer_flags *flags;
8313         struct tracer_opt *opts;
8314         int cnt;
8315         int i;
8316
8317         if (!tracer)
8318                 return;
8319
8320         flags = tracer->flags;
8321
8322         if (!flags || !flags->opts)
8323                 return;
8324
8325         /*
8326          * If this is an instance, only create flags for tracers
8327          * the instance may have.
8328          */
8329         if (!trace_ok_for_array(tracer, tr))
8330                 return;
8331
8332         for (i = 0; i < tr->nr_topts; i++) {
8333                 /* Make sure there's no duplicate flags. */
8334                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8335                         return;
8336         }
8337
8338         opts = flags->opts;
8339
8340         for (cnt = 0; opts[cnt].name; cnt++)
8341                 ;
8342
8343         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8344         if (!topts)
8345                 return;
8346
8347         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8348                             GFP_KERNEL);
8349         if (!tr_topts) {
8350                 kfree(topts);
8351                 return;
8352         }
8353
8354         tr->topts = tr_topts;
8355         tr->topts[tr->nr_topts].tracer = tracer;
8356         tr->topts[tr->nr_topts].topts = topts;
8357         tr->nr_topts++;
8358
8359         for (cnt = 0; opts[cnt].name; cnt++) {
8360                 create_trace_option_file(tr, &topts[cnt], flags,
8361                                          &opts[cnt]);
8362                 MEM_FAIL(topts[cnt].entry == NULL,
8363                           "Failed to create trace option: %s",
8364                           opts[cnt].name);
8365         }
8366 }
8367
8368 static struct dentry *
8369 create_trace_option_core_file(struct trace_array *tr,
8370                               const char *option, long index)
8371 {
8372         struct dentry *t_options;
8373
8374         t_options = trace_options_init_dentry(tr);
8375         if (!t_options)
8376                 return NULL;
8377
8378         return trace_create_file(option, 0644, t_options,
8379                                  (void *)&tr->trace_flags_index[index],
8380                                  &trace_options_core_fops);
8381 }
8382
8383 static void create_trace_options_dir(struct trace_array *tr)
8384 {
8385         struct dentry *t_options;
8386         bool top_level = tr == &global_trace;
8387         int i;
8388
8389         t_options = trace_options_init_dentry(tr);
8390         if (!t_options)
8391                 return;
8392
8393         for (i = 0; trace_options[i]; i++) {
8394                 if (top_level ||
8395                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8396                         create_trace_option_core_file(tr, trace_options[i], i);
8397         }
8398 }
8399
8400 static ssize_t
8401 rb_simple_read(struct file *filp, char __user *ubuf,
8402                size_t cnt, loff_t *ppos)
8403 {
8404         struct trace_array *tr = filp->private_data;
8405         char buf[64];
8406         int r;
8407
8408         r = tracer_tracing_is_on(tr);
8409         r = sprintf(buf, "%d\n", r);
8410
8411         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8412 }
8413
8414 static ssize_t
8415 rb_simple_write(struct file *filp, const char __user *ubuf,
8416                 size_t cnt, loff_t *ppos)
8417 {
8418         struct trace_array *tr = filp->private_data;
8419         struct trace_buffer *buffer = tr->array_buffer.buffer;
8420         unsigned long val;
8421         int ret;
8422
8423         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8424         if (ret)
8425                 return ret;
8426
8427         if (buffer) {
8428                 mutex_lock(&trace_types_lock);
8429                 if (!!val == tracer_tracing_is_on(tr)) {
8430                         val = 0; /* do nothing */
8431                 } else if (val) {
8432                         tracer_tracing_on(tr);
8433                         if (tr->current_trace->start)
8434                                 tr->current_trace->start(tr);
8435                 } else {
8436                         tracer_tracing_off(tr);
8437                         if (tr->current_trace->stop)
8438                                 tr->current_trace->stop(tr);
8439                 }
8440                 mutex_unlock(&trace_types_lock);
8441         }
8442
8443         (*ppos)++;
8444
8445         return cnt;
8446 }
8447
8448 static const struct file_operations rb_simple_fops = {
8449         .open           = tracing_open_generic_tr,
8450         .read           = rb_simple_read,
8451         .write          = rb_simple_write,
8452         .release        = tracing_release_generic_tr,
8453         .llseek         = default_llseek,
8454 };
8455
8456 static ssize_t
8457 buffer_percent_read(struct file *filp, char __user *ubuf,
8458                     size_t cnt, loff_t *ppos)
8459 {
8460         struct trace_array *tr = filp->private_data;
8461         char buf[64];
8462         int r;
8463
8464         r = tr->buffer_percent;
8465         r = sprintf(buf, "%d\n", r);
8466
8467         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8468 }
8469
8470 static ssize_t
8471 buffer_percent_write(struct file *filp, const char __user *ubuf,
8472                      size_t cnt, loff_t *ppos)
8473 {
8474         struct trace_array *tr = filp->private_data;
8475         unsigned long val;
8476         int ret;
8477
8478         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8479         if (ret)
8480                 return ret;
8481
8482         if (val > 100)
8483                 return -EINVAL;
8484
8485         if (!val)
8486                 val = 1;
8487
8488         tr->buffer_percent = val;
8489
8490         (*ppos)++;
8491
8492         return cnt;
8493 }
8494
8495 static const struct file_operations buffer_percent_fops = {
8496         .open           = tracing_open_generic_tr,
8497         .read           = buffer_percent_read,
8498         .write          = buffer_percent_write,
8499         .release        = tracing_release_generic_tr,
8500         .llseek         = default_llseek,
8501 };
8502
8503 static struct dentry *trace_instance_dir;
8504
8505 static void
8506 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8507
8508 static int
8509 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8510 {
8511         enum ring_buffer_flags rb_flags;
8512
8513         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8514
8515         buf->tr = tr;
8516
8517         buf->buffer = ring_buffer_alloc(size, rb_flags);
8518         if (!buf->buffer)
8519                 return -ENOMEM;
8520
8521         buf->data = alloc_percpu(struct trace_array_cpu);
8522         if (!buf->data) {
8523                 ring_buffer_free(buf->buffer);
8524                 buf->buffer = NULL;
8525                 return -ENOMEM;
8526         }
8527
8528         /* Allocate the first page for all buffers */
8529         set_buffer_entries(&tr->array_buffer,
8530                            ring_buffer_size(tr->array_buffer.buffer, 0));
8531
8532         return 0;
8533 }
8534
8535 static int allocate_trace_buffers(struct trace_array *tr, int size)
8536 {
8537         int ret;
8538
8539         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8540         if (ret)
8541                 return ret;
8542
8543 #ifdef CONFIG_TRACER_MAX_TRACE
8544         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8545                                     allocate_snapshot ? size : 1);
8546         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8547                 ring_buffer_free(tr->array_buffer.buffer);
8548                 tr->array_buffer.buffer = NULL;
8549                 free_percpu(tr->array_buffer.data);
8550                 tr->array_buffer.data = NULL;
8551                 return -ENOMEM;
8552         }
8553         tr->allocated_snapshot = allocate_snapshot;
8554
8555         /*
8556          * Only the top level trace array gets its snapshot allocated
8557          * from the kernel command line.
8558          */
8559         allocate_snapshot = false;
8560 #endif
8561
8562         return 0;
8563 }
8564
8565 static void free_trace_buffer(struct array_buffer *buf)
8566 {
8567         if (buf->buffer) {
8568                 ring_buffer_free(buf->buffer);
8569                 buf->buffer = NULL;
8570                 free_percpu(buf->data);
8571                 buf->data = NULL;
8572         }
8573 }
8574
8575 static void free_trace_buffers(struct trace_array *tr)
8576 {
8577         if (!tr)
8578                 return;
8579
8580         free_trace_buffer(&tr->array_buffer);
8581
8582 #ifdef CONFIG_TRACER_MAX_TRACE
8583         free_trace_buffer(&tr->max_buffer);
8584 #endif
8585 }
8586
8587 static void init_trace_flags_index(struct trace_array *tr)
8588 {
8589         int i;
8590
8591         /* Used by the trace options files */
8592         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8593                 tr->trace_flags_index[i] = i;
8594 }
8595
8596 static void __update_tracer_options(struct trace_array *tr)
8597 {
8598         struct tracer *t;
8599
8600         for (t = trace_types; t; t = t->next)
8601                 add_tracer_options(tr, t);
8602 }
8603
8604 static void update_tracer_options(struct trace_array *tr)
8605 {
8606         mutex_lock(&trace_types_lock);
8607         __update_tracer_options(tr);
8608         mutex_unlock(&trace_types_lock);
8609 }
8610
8611 /* Must have trace_types_lock held */
8612 struct trace_array *trace_array_find(const char *instance)
8613 {
8614         struct trace_array *tr, *found = NULL;
8615
8616         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8617                 if (tr->name && strcmp(tr->name, instance) == 0) {
8618                         found = tr;
8619                         break;
8620                 }
8621         }
8622
8623         return found;
8624 }
8625
8626 struct trace_array *trace_array_find_get(const char *instance)
8627 {
8628         struct trace_array *tr;
8629
8630         mutex_lock(&trace_types_lock);
8631         tr = trace_array_find(instance);
8632         if (tr)
8633                 tr->ref++;
8634         mutex_unlock(&trace_types_lock);
8635
8636         return tr;
8637 }
8638
8639 static struct trace_array *trace_array_create(const char *name)
8640 {
8641         struct trace_array *tr;
8642         int ret;
8643
8644         ret = -ENOMEM;
8645         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8646         if (!tr)
8647                 return ERR_PTR(ret);
8648
8649         tr->name = kstrdup(name, GFP_KERNEL);
8650         if (!tr->name)
8651                 goto out_free_tr;
8652
8653         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8654                 goto out_free_tr;
8655
8656         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8657
8658         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8659
8660         raw_spin_lock_init(&tr->start_lock);
8661
8662         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8663
8664         tr->current_trace = &nop_trace;
8665
8666         INIT_LIST_HEAD(&tr->systems);
8667         INIT_LIST_HEAD(&tr->events);
8668         INIT_LIST_HEAD(&tr->hist_vars);
8669         INIT_LIST_HEAD(&tr->err_log);
8670
8671         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8672                 goto out_free_tr;
8673
8674         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8675         if (!tr->dir)
8676                 goto out_free_tr;
8677
8678         ret = event_trace_add_tracer(tr->dir, tr);
8679         if (ret) {
8680                 tracefs_remove(tr->dir);
8681                 goto out_free_tr;
8682         }
8683
8684         ftrace_init_trace_array(tr);
8685
8686         init_tracer_tracefs(tr, tr->dir);
8687         init_trace_flags_index(tr);
8688         __update_tracer_options(tr);
8689
8690         list_add(&tr->list, &ftrace_trace_arrays);
8691
8692         tr->ref++;
8693
8694
8695         return tr;
8696
8697  out_free_tr:
8698         free_trace_buffers(tr);
8699         free_cpumask_var(tr->tracing_cpumask);
8700         kfree(tr->name);
8701         kfree(tr);
8702
8703         return ERR_PTR(ret);
8704 }
8705
8706 static int instance_mkdir(const char *name)
8707 {
8708         struct trace_array *tr;
8709         int ret;
8710
8711         mutex_lock(&event_mutex);
8712         mutex_lock(&trace_types_lock);
8713
8714         ret = -EEXIST;
8715         if (trace_array_find(name))
8716                 goto out_unlock;
8717
8718         tr = trace_array_create(name);
8719
8720         ret = PTR_ERR_OR_ZERO(tr);
8721
8722 out_unlock:
8723         mutex_unlock(&trace_types_lock);
8724         mutex_unlock(&event_mutex);
8725         return ret;
8726 }
8727
8728 /**
8729  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8730  * @name: The name of the trace array to be looked up/created.
8731  *
8732  * Returns pointer to trace array with given name.
8733  * NULL, if it cannot be created.
8734  *
8735  * NOTE: This function increments the reference counter associated with the
8736  * trace array returned. This makes sure it cannot be freed while in use.
8737  * Use trace_array_put() once the trace array is no longer needed.
8738  * If the trace_array is to be freed, trace_array_destroy() needs to
8739  * be called after the trace_array_put(), or simply let user space delete
8740  * it from the tracefs instances directory. But until the
8741  * trace_array_put() is called, user space can not delete it.
8742  *
8743  */
8744 struct trace_array *trace_array_get_by_name(const char *name)
8745 {
8746         struct trace_array *tr;
8747
8748         mutex_lock(&event_mutex);
8749         mutex_lock(&trace_types_lock);
8750
8751         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8752                 if (tr->name && strcmp(tr->name, name) == 0)
8753                         goto out_unlock;
8754         }
8755
8756         tr = trace_array_create(name);
8757
8758         if (IS_ERR(tr))
8759                 tr = NULL;
8760 out_unlock:
8761         if (tr)
8762                 tr->ref++;
8763
8764         mutex_unlock(&trace_types_lock);
8765         mutex_unlock(&event_mutex);
8766         return tr;
8767 }
8768 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8769
8770 static int __remove_instance(struct trace_array *tr)
8771 {
8772         int i;
8773
8774         /* Reference counter for a newly created trace array = 1. */
8775         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8776                 return -EBUSY;
8777
8778         list_del(&tr->list);
8779
8780         /* Disable all the flags that were enabled coming in */
8781         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8782                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8783                         set_tracer_flag(tr, 1 << i, 0);
8784         }
8785
8786         tracing_set_nop(tr);
8787         clear_ftrace_function_probes(tr);
8788         event_trace_del_tracer(tr);
8789         ftrace_clear_pids(tr);
8790         ftrace_destroy_function_files(tr);
8791         tracefs_remove(tr->dir);
8792         free_trace_buffers(tr);
8793
8794         for (i = 0; i < tr->nr_topts; i++) {
8795                 kfree(tr->topts[i].topts);
8796         }
8797         kfree(tr->topts);
8798
8799         free_cpumask_var(tr->tracing_cpumask);
8800         kfree(tr->name);
8801         kfree(tr);
8802         tr = NULL;
8803
8804         return 0;
8805 }
8806
8807 int trace_array_destroy(struct trace_array *this_tr)
8808 {
8809         struct trace_array *tr;
8810         int ret;
8811
8812         if (!this_tr)
8813                 return -EINVAL;
8814
8815         mutex_lock(&event_mutex);
8816         mutex_lock(&trace_types_lock);
8817
8818         ret = -ENODEV;
8819
8820         /* Making sure trace array exists before destroying it. */
8821         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8822                 if (tr == this_tr) {
8823                         ret = __remove_instance(tr);
8824                         break;
8825                 }
8826         }
8827
8828         mutex_unlock(&trace_types_lock);
8829         mutex_unlock(&event_mutex);
8830
8831         return ret;
8832 }
8833 EXPORT_SYMBOL_GPL(trace_array_destroy);
8834
8835 static int instance_rmdir(const char *name)
8836 {
8837         struct trace_array *tr;
8838         int ret;
8839
8840         mutex_lock(&event_mutex);
8841         mutex_lock(&trace_types_lock);
8842
8843         ret = -ENODEV;
8844         tr = trace_array_find(name);
8845         if (tr)
8846                 ret = __remove_instance(tr);
8847
8848         mutex_unlock(&trace_types_lock);
8849         mutex_unlock(&event_mutex);
8850
8851         return ret;
8852 }
8853
8854 static __init void create_trace_instances(struct dentry *d_tracer)
8855 {
8856         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8857                                                          instance_mkdir,
8858                                                          instance_rmdir);
8859         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8860                 return;
8861 }
8862
8863 static void
8864 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8865 {
8866         struct trace_event_file *file;
8867         int cpu;
8868
8869         trace_create_file("available_tracers", 0444, d_tracer,
8870                         tr, &show_traces_fops);
8871
8872         trace_create_file("current_tracer", 0644, d_tracer,
8873                         tr, &set_tracer_fops);
8874
8875         trace_create_file("tracing_cpumask", 0644, d_tracer,
8876                           tr, &tracing_cpumask_fops);
8877
8878         trace_create_file("trace_options", 0644, d_tracer,
8879                           tr, &tracing_iter_fops);
8880
8881         trace_create_file("trace", 0644, d_tracer,
8882                           tr, &tracing_fops);
8883
8884         trace_create_file("trace_pipe", 0444, d_tracer,
8885                           tr, &tracing_pipe_fops);
8886
8887         trace_create_file("buffer_size_kb", 0644, d_tracer,
8888                           tr, &tracing_entries_fops);
8889
8890         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8891                           tr, &tracing_total_entries_fops);
8892
8893         trace_create_file("free_buffer", 0200, d_tracer,
8894                           tr, &tracing_free_buffer_fops);
8895
8896         trace_create_file("trace_marker", 0220, d_tracer,
8897                           tr, &tracing_mark_fops);
8898
8899         file = __find_event_file(tr, "ftrace", "print");
8900         if (file && file->dir)
8901                 trace_create_file("trigger", 0644, file->dir, file,
8902                                   &event_trigger_fops);
8903         tr->trace_marker_file = file;
8904
8905         trace_create_file("trace_marker_raw", 0220, d_tracer,
8906                           tr, &tracing_mark_raw_fops);
8907
8908         trace_create_file("trace_clock", 0644, d_tracer, tr,
8909                           &trace_clock_fops);
8910
8911         trace_create_file("tracing_on", 0644, d_tracer,
8912                           tr, &rb_simple_fops);
8913
8914         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8915                           &trace_time_stamp_mode_fops);
8916
8917         tr->buffer_percent = 50;
8918
8919         trace_create_file("buffer_percent", 0444, d_tracer,
8920                         tr, &buffer_percent_fops);
8921
8922         create_trace_options_dir(tr);
8923
8924 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8925         trace_create_maxlat_file(tr, d_tracer);
8926 #endif
8927
8928         if (ftrace_create_function_files(tr, d_tracer))
8929                 MEM_FAIL(1, "Could not allocate function filter files");
8930
8931 #ifdef CONFIG_TRACER_SNAPSHOT
8932         trace_create_file("snapshot", 0644, d_tracer,
8933                           tr, &snapshot_fops);
8934 #endif
8935
8936         trace_create_file("error_log", 0644, d_tracer,
8937                           tr, &tracing_err_log_fops);
8938
8939         for_each_tracing_cpu(cpu)
8940                 tracing_init_tracefs_percpu(tr, cpu);
8941
8942         ftrace_init_tracefs(tr, d_tracer);
8943 }
8944
8945 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8946 {
8947         struct vfsmount *mnt;
8948         struct file_system_type *type;
8949
8950         /*
8951          * To maintain backward compatibility for tools that mount
8952          * debugfs to get to the tracing facility, tracefs is automatically
8953          * mounted to the debugfs/tracing directory.
8954          */
8955         type = get_fs_type("tracefs");
8956         if (!type)
8957                 return NULL;
8958         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8959         put_filesystem(type);
8960         if (IS_ERR(mnt))
8961                 return NULL;
8962         mntget(mnt);
8963
8964         return mnt;
8965 }
8966
8967 /**
8968  * tracing_init_dentry - initialize top level trace array
8969  *
8970  * This is called when creating files or directories in the tracing
8971  * directory. It is called via fs_initcall() by any of the boot up code
8972  * and expects to return the dentry of the top level tracing directory.
8973  */
8974 struct dentry *tracing_init_dentry(void)
8975 {
8976         struct trace_array *tr = &global_trace;
8977
8978         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8979                 pr_warn("Tracing disabled due to lockdown\n");
8980                 return ERR_PTR(-EPERM);
8981         }
8982
8983         /* The top level trace array uses  NULL as parent */
8984         if (tr->dir)
8985                 return NULL;
8986
8987         if (WARN_ON(!tracefs_initialized()))
8988                 return ERR_PTR(-ENODEV);
8989
8990         /*
8991          * As there may still be users that expect the tracing
8992          * files to exist in debugfs/tracing, we must automount
8993          * the tracefs file system there, so older tools still
8994          * work with the newer kerenl.
8995          */
8996         tr->dir = debugfs_create_automount("tracing", NULL,
8997                                            trace_automount, NULL);
8998
8999         return NULL;
9000 }
9001
9002 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9003 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9004
9005 static void __init trace_eval_init(void)
9006 {
9007         int len;
9008
9009         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9010         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9011 }
9012
9013 #ifdef CONFIG_MODULES
9014 static void trace_module_add_evals(struct module *mod)
9015 {
9016         if (!mod->num_trace_evals)
9017                 return;
9018
9019         /*
9020          * Modules with bad taint do not have events created, do
9021          * not bother with enums either.
9022          */
9023         if (trace_module_has_bad_taint(mod))
9024                 return;
9025
9026         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9027 }
9028
9029 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9030 static void trace_module_remove_evals(struct module *mod)
9031 {
9032         union trace_eval_map_item *map;
9033         union trace_eval_map_item **last = &trace_eval_maps;
9034
9035         if (!mod->num_trace_evals)
9036                 return;
9037
9038         mutex_lock(&trace_eval_mutex);
9039
9040         map = trace_eval_maps;
9041
9042         while (map) {
9043                 if (map->head.mod == mod)
9044                         break;
9045                 map = trace_eval_jmp_to_tail(map);
9046                 last = &map->tail.next;
9047                 map = map->tail.next;
9048         }
9049         if (!map)
9050                 goto out;
9051
9052         *last = trace_eval_jmp_to_tail(map)->tail.next;
9053         kfree(map);
9054  out:
9055         mutex_unlock(&trace_eval_mutex);
9056 }
9057 #else
9058 static inline void trace_module_remove_evals(struct module *mod) { }
9059 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9060
9061 static int trace_module_notify(struct notifier_block *self,
9062                                unsigned long val, void *data)
9063 {
9064         struct module *mod = data;
9065
9066         switch (val) {
9067         case MODULE_STATE_COMING:
9068                 trace_module_add_evals(mod);
9069                 break;
9070         case MODULE_STATE_GOING:
9071                 trace_module_remove_evals(mod);
9072                 break;
9073         }
9074
9075         return 0;
9076 }
9077
9078 static struct notifier_block trace_module_nb = {
9079         .notifier_call = trace_module_notify,
9080         .priority = 0,
9081 };
9082 #endif /* CONFIG_MODULES */
9083
9084 static __init int tracer_init_tracefs(void)
9085 {
9086         struct dentry *d_tracer;
9087
9088         trace_access_lock_init();
9089
9090         d_tracer = tracing_init_dentry();
9091         if (IS_ERR(d_tracer))
9092                 return 0;
9093
9094         event_trace_init();
9095
9096         init_tracer_tracefs(&global_trace, d_tracer);
9097         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9098
9099         trace_create_file("tracing_thresh", 0644, d_tracer,
9100                         &global_trace, &tracing_thresh_fops);
9101
9102         trace_create_file("README", 0444, d_tracer,
9103                         NULL, &tracing_readme_fops);
9104
9105         trace_create_file("saved_cmdlines", 0444, d_tracer,
9106                         NULL, &tracing_saved_cmdlines_fops);
9107
9108         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9109                           NULL, &tracing_saved_cmdlines_size_fops);
9110
9111         trace_create_file("saved_tgids", 0444, d_tracer,
9112                         NULL, &tracing_saved_tgids_fops);
9113
9114         trace_eval_init();
9115
9116         trace_create_eval_file(d_tracer);
9117
9118 #ifdef CONFIG_MODULES
9119         register_module_notifier(&trace_module_nb);
9120 #endif
9121
9122 #ifdef CONFIG_DYNAMIC_FTRACE
9123         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9124                         NULL, &tracing_dyn_info_fops);
9125 #endif
9126
9127         create_trace_instances(d_tracer);
9128
9129         update_tracer_options(&global_trace);
9130
9131         return 0;
9132 }
9133
9134 static int trace_panic_handler(struct notifier_block *this,
9135                                unsigned long event, void *unused)
9136 {
9137         if (ftrace_dump_on_oops)
9138                 ftrace_dump(ftrace_dump_on_oops);
9139         return NOTIFY_OK;
9140 }
9141
9142 static struct notifier_block trace_panic_notifier = {
9143         .notifier_call  = trace_panic_handler,
9144         .next           = NULL,
9145         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9146 };
9147
9148 static int trace_die_handler(struct notifier_block *self,
9149                              unsigned long val,
9150                              void *data)
9151 {
9152         switch (val) {
9153         case DIE_OOPS:
9154                 if (ftrace_dump_on_oops)
9155                         ftrace_dump(ftrace_dump_on_oops);
9156                 break;
9157         default:
9158                 break;
9159         }
9160         return NOTIFY_OK;
9161 }
9162
9163 static struct notifier_block trace_die_notifier = {
9164         .notifier_call = trace_die_handler,
9165         .priority = 200
9166 };
9167
9168 /*
9169  * printk is set to max of 1024, we really don't need it that big.
9170  * Nothing should be printing 1000 characters anyway.
9171  */
9172 #define TRACE_MAX_PRINT         1000
9173
9174 /*
9175  * Define here KERN_TRACE so that we have one place to modify
9176  * it if we decide to change what log level the ftrace dump
9177  * should be at.
9178  */
9179 #define KERN_TRACE              KERN_EMERG
9180
9181 void
9182 trace_printk_seq(struct trace_seq *s)
9183 {
9184         /* Probably should print a warning here. */
9185         if (s->seq.len >= TRACE_MAX_PRINT)
9186                 s->seq.len = TRACE_MAX_PRINT;
9187
9188         /*
9189          * More paranoid code. Although the buffer size is set to
9190          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9191          * an extra layer of protection.
9192          */
9193         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9194                 s->seq.len = s->seq.size - 1;
9195
9196         /* should be zero ended, but we are paranoid. */
9197         s->buffer[s->seq.len] = 0;
9198
9199         printk(KERN_TRACE "%s", s->buffer);
9200
9201         trace_seq_init(s);
9202 }
9203
9204 void trace_init_global_iter(struct trace_iterator *iter)
9205 {
9206         iter->tr = &global_trace;
9207         iter->trace = iter->tr->current_trace;
9208         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9209         iter->array_buffer = &global_trace.array_buffer;
9210
9211         if (iter->trace && iter->trace->open)
9212                 iter->trace->open(iter);
9213
9214         /* Annotate start of buffers if we had overruns */
9215         if (ring_buffer_overruns(iter->array_buffer->buffer))
9216                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9217
9218         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9219         if (trace_clocks[iter->tr->clock_id].in_ns)
9220                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9221 }
9222
9223 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9224 {
9225         /* use static because iter can be a bit big for the stack */
9226         static struct trace_iterator iter;
9227         static atomic_t dump_running;
9228         struct trace_array *tr = &global_trace;
9229         unsigned int old_userobj;
9230         unsigned long flags;
9231         int cnt = 0, cpu;
9232
9233         /* Only allow one dump user at a time. */
9234         if (atomic_inc_return(&dump_running) != 1) {
9235                 atomic_dec(&dump_running);
9236                 return;
9237         }
9238
9239         /*
9240          * Always turn off tracing when we dump.
9241          * We don't need to show trace output of what happens
9242          * between multiple crashes.
9243          *
9244          * If the user does a sysrq-z, then they can re-enable
9245          * tracing with echo 1 > tracing_on.
9246          */
9247         tracing_off();
9248
9249         local_irq_save(flags);
9250         printk_nmi_direct_enter();
9251
9252         /* Simulate the iterator */
9253         trace_init_global_iter(&iter);
9254         /* Can not use kmalloc for iter.temp */
9255         iter.temp = static_temp_buf;
9256         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9257
9258         for_each_tracing_cpu(cpu) {
9259                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9260         }
9261
9262         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9263
9264         /* don't look at user memory in panic mode */
9265         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9266
9267         switch (oops_dump_mode) {
9268         case DUMP_ALL:
9269                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9270                 break;
9271         case DUMP_ORIG:
9272                 iter.cpu_file = raw_smp_processor_id();
9273                 break;
9274         case DUMP_NONE:
9275                 goto out_enable;
9276         default:
9277                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9278                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9279         }
9280
9281         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9282
9283         /* Did function tracer already get disabled? */
9284         if (ftrace_is_dead()) {
9285                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9286                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9287         }
9288
9289         /*
9290          * We need to stop all tracing on all CPUS to read the
9291          * the next buffer. This is a bit expensive, but is
9292          * not done often. We fill all what we can read,
9293          * and then release the locks again.
9294          */
9295
9296         while (!trace_empty(&iter)) {
9297
9298                 if (!cnt)
9299                         printk(KERN_TRACE "---------------------------------\n");
9300
9301                 cnt++;
9302
9303                 trace_iterator_reset(&iter);
9304                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9305
9306                 if (trace_find_next_entry_inc(&iter) != NULL) {
9307                         int ret;
9308
9309                         ret = print_trace_line(&iter);
9310                         if (ret != TRACE_TYPE_NO_CONSUME)
9311                                 trace_consume(&iter);
9312                 }
9313                 touch_nmi_watchdog();
9314
9315                 trace_printk_seq(&iter.seq);
9316         }
9317
9318         if (!cnt)
9319                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9320         else
9321                 printk(KERN_TRACE "---------------------------------\n");
9322
9323  out_enable:
9324         tr->trace_flags |= old_userobj;
9325
9326         for_each_tracing_cpu(cpu) {
9327                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9328         }
9329         atomic_dec(&dump_running);
9330         printk_nmi_direct_exit();
9331         local_irq_restore(flags);
9332 }
9333 EXPORT_SYMBOL_GPL(ftrace_dump);
9334
9335 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9336 {
9337         char **argv;
9338         int argc, ret;
9339
9340         argc = 0;
9341         ret = 0;
9342         argv = argv_split(GFP_KERNEL, buf, &argc);
9343         if (!argv)
9344                 return -ENOMEM;
9345
9346         if (argc)
9347                 ret = createfn(argc, argv);
9348
9349         argv_free(argv);
9350
9351         return ret;
9352 }
9353
9354 #define WRITE_BUFSIZE  4096
9355
9356 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9357                                 size_t count, loff_t *ppos,
9358                                 int (*createfn)(int, char **))
9359 {
9360         char *kbuf, *buf, *tmp;
9361         int ret = 0;
9362         size_t done = 0;
9363         size_t size;
9364
9365         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9366         if (!kbuf)
9367                 return -ENOMEM;
9368
9369         while (done < count) {
9370                 size = count - done;
9371
9372                 if (size >= WRITE_BUFSIZE)
9373                         size = WRITE_BUFSIZE - 1;
9374
9375                 if (copy_from_user(kbuf, buffer + done, size)) {
9376                         ret = -EFAULT;
9377                         goto out;
9378                 }
9379                 kbuf[size] = '\0';
9380                 buf = kbuf;
9381                 do {
9382                         tmp = strchr(buf, '\n');
9383                         if (tmp) {
9384                                 *tmp = '\0';
9385                                 size = tmp - buf + 1;
9386                         } else {
9387                                 size = strlen(buf);
9388                                 if (done + size < count) {
9389                                         if (buf != kbuf)
9390                                                 break;
9391                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9392                                         pr_warn("Line length is too long: Should be less than %d\n",
9393                                                 WRITE_BUFSIZE - 2);
9394                                         ret = -EINVAL;
9395                                         goto out;
9396                                 }
9397                         }
9398                         done += size;
9399
9400                         /* Remove comments */
9401                         tmp = strchr(buf, '#');
9402
9403                         if (tmp)
9404                                 *tmp = '\0';
9405
9406                         ret = trace_run_command(buf, createfn);
9407                         if (ret)
9408                                 goto out;
9409                         buf += size;
9410
9411                 } while (done < count);
9412         }
9413         ret = done;
9414
9415 out:
9416         kfree(kbuf);
9417
9418         return ret;
9419 }
9420
9421 __init static int tracer_alloc_buffers(void)
9422 {
9423         int ring_buf_size;
9424         int ret = -ENOMEM;
9425
9426
9427         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9428                 pr_warn("Tracing disabled due to lockdown\n");
9429                 return -EPERM;
9430         }
9431
9432         /*
9433          * Make sure we don't accidently add more trace options
9434          * than we have bits for.
9435          */
9436         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9437
9438         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9439                 goto out;
9440
9441         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9442                 goto out_free_buffer_mask;
9443
9444         /* Only allocate trace_printk buffers if a trace_printk exists */
9445         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9446                 /* Must be called before global_trace.buffer is allocated */
9447                 trace_printk_init_buffers();
9448
9449         /* To save memory, keep the ring buffer size to its minimum */
9450         if (ring_buffer_expanded)
9451                 ring_buf_size = trace_buf_size;
9452         else
9453                 ring_buf_size = 1;
9454
9455         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9456         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9457
9458         raw_spin_lock_init(&global_trace.start_lock);
9459
9460         /*
9461          * The prepare callbacks allocates some memory for the ring buffer. We
9462          * don't free the buffer if the if the CPU goes down. If we were to free
9463          * the buffer, then the user would lose any trace that was in the
9464          * buffer. The memory will be removed once the "instance" is removed.
9465          */
9466         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9467                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9468                                       NULL);
9469         if (ret < 0)
9470                 goto out_free_cpumask;
9471         /* Used for event triggers */
9472         ret = -ENOMEM;
9473         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9474         if (!temp_buffer)
9475                 goto out_rm_hp_state;
9476
9477         if (trace_create_savedcmd() < 0)
9478                 goto out_free_temp_buffer;
9479
9480         /* TODO: make the number of buffers hot pluggable with CPUS */
9481         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9482                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9483                 goto out_free_savedcmd;
9484         }
9485
9486         if (global_trace.buffer_disabled)
9487                 tracing_off();
9488
9489         if (trace_boot_clock) {
9490                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9491                 if (ret < 0)
9492                         pr_warn("Trace clock %s not defined, going back to default\n",
9493                                 trace_boot_clock);
9494         }
9495
9496         /*
9497          * register_tracer() might reference current_trace, so it
9498          * needs to be set before we register anything. This is
9499          * just a bootstrap of current_trace anyway.
9500          */
9501         global_trace.current_trace = &nop_trace;
9502
9503         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9504
9505         ftrace_init_global_array_ops(&global_trace);
9506
9507         init_trace_flags_index(&global_trace);
9508
9509         register_tracer(&nop_trace);
9510
9511         /* Function tracing may start here (via kernel command line) */
9512         init_function_trace();
9513
9514         /* All seems OK, enable tracing */
9515         tracing_disabled = 0;
9516
9517         atomic_notifier_chain_register(&panic_notifier_list,
9518                                        &trace_panic_notifier);
9519
9520         register_die_notifier(&trace_die_notifier);
9521
9522         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9523
9524         INIT_LIST_HEAD(&global_trace.systems);
9525         INIT_LIST_HEAD(&global_trace.events);
9526         INIT_LIST_HEAD(&global_trace.hist_vars);
9527         INIT_LIST_HEAD(&global_trace.err_log);
9528         list_add(&global_trace.list, &ftrace_trace_arrays);
9529
9530         apply_trace_boot_options();
9531
9532         register_snapshot_cmd();
9533
9534         return 0;
9535
9536 out_free_savedcmd:
9537         free_saved_cmdlines_buffer(savedcmd);
9538 out_free_temp_buffer:
9539         ring_buffer_free(temp_buffer);
9540 out_rm_hp_state:
9541         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9542 out_free_cpumask:
9543         free_cpumask_var(global_trace.tracing_cpumask);
9544 out_free_buffer_mask:
9545         free_cpumask_var(tracing_buffer_mask);
9546 out:
9547         return ret;
9548 }
9549
9550 void __init early_trace_init(void)
9551 {
9552         if (tracepoint_printk) {
9553                 tracepoint_print_iter =
9554                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9555                 if (MEM_FAIL(!tracepoint_print_iter,
9556                              "Failed to allocate trace iterator\n"))
9557                         tracepoint_printk = 0;
9558                 else
9559                         static_key_enable(&tracepoint_printk_key.key);
9560         }
9561         tracer_alloc_buffers();
9562 }
9563
9564 void __init trace_init(void)
9565 {
9566         trace_event_init();
9567 }
9568
9569 __init static int clear_boot_tracer(void)
9570 {
9571         /*
9572          * The default tracer at boot buffer is an init section.
9573          * This function is called in lateinit. If we did not
9574          * find the boot tracer, then clear it out, to prevent
9575          * later registration from accessing the buffer that is
9576          * about to be freed.
9577          */
9578         if (!default_bootup_tracer)
9579                 return 0;
9580
9581         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9582                default_bootup_tracer);
9583         default_bootup_tracer = NULL;
9584
9585         return 0;
9586 }
9587
9588 fs_initcall(tracer_init_tracefs);
9589 late_initcall_sync(clear_boot_tracer);
9590
9591 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9592 __init static int tracing_set_default_clock(void)
9593 {
9594         /* sched_clock_stable() is determined in late_initcall */
9595         if (!trace_boot_clock && !sched_clock_stable()) {
9596                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9597                         pr_warn("Can not set tracing clock due to lockdown\n");
9598                         return -EPERM;
9599                 }
9600
9601                 printk(KERN_WARNING
9602                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9603                        "If you want to keep using the local clock, then add:\n"
9604                        "  \"trace_clock=local\"\n"
9605                        "on the kernel command line\n");
9606                 tracing_set_clock(&global_trace, "global");
9607         }
9608
9609         return 0;
9610 }
9611 late_initcall_sync(tracing_set_default_clock);
9612 #endif